Repository: modin-project/modin
Branch: main
Commit: 7ca200b08597
Files: 681
Total size: 6.8 MB

Directory structure:
gitextract_eudtie4f/

├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.yaml
│   │   ├── feature_request.md
│   │   └── question.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── actions/
│   │   ├── mamba-env/
│   │   │   └── action.yml
│   │   ├── python-only/
│   │   │   └── action.yml
│   │   ├── run-core-tests/
│   │   │   ├── action.yml
│   │   │   ├── group_1/
│   │   │   │   └── action.yml
│   │   │   ├── group_2/
│   │   │   │   └── action.yml
│   │   │   ├── group_3/
│   │   │   │   └── action.yml
│   │   │   └── group_4/
│   │   │       └── action.yml
│   │   └── upload-coverage/
│   │       └── action.yml
│   ├── dependabot.yaml
│   ├── stale.yml
│   └── workflows/
│       ├── ci-notebooks.yml
│       ├── ci-required.yml
│       ├── ci.yml
│       ├── codeql/
│       │   └── codeql-config.yml
│       ├── codeql.yml
│       ├── fuzzydata-test.yml
│       ├── publish-to-pypi.yml
│       ├── push-to-main.yml
│       └── sql_server/
│           └── set_up_sql_server.sh
├── .gitignore
├── .readthedocs.yaml
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── LICENSE
├── LICENSE_HEADER
├── MANIFEST.in
├── NOTICE
├── README.md
├── asv_bench/
│   ├── README.md
│   ├── asv.conf.dask.json
│   ├── asv.conf.json
│   ├── asv.conf.unidist.json
│   ├── benchmarks/
│   │   ├── __init__.py
│   │   ├── benchmarks.py
│   │   ├── io/
│   │   │   ├── __init__.py
│   │   │   ├── csv.py
│   │   │   └── parquet.py
│   │   ├── scalability/
│   │   │   ├── __init__.py
│   │   │   └── scalability_benchmarks.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── common.py
│   │       ├── compatibility.py
│   │       └── data_shapes.py
│   └── test/
│       ├── __init__.py
│       └── test_utils.py
├── ci/
│   └── teamcity/
│       ├── Dockerfile.teamcity-ci
│       ├── build-docker.py
│       └── comment_on_pr.py
├── codecov.yml
├── contributing/
│   ├── contributing.md
│   └── pre-commit
├── docker/
│   └── Dockerfile
├── docs/
│   ├── _static/
│   │   └── custom.js
│   ├── _templates/
│   │   └── layout.html
│   ├── conf.py
│   ├── contact.rst
│   ├── development/
│   │   ├── architecture.rst
│   │   ├── contributing.rst
│   │   ├── index.rst
│   │   ├── partition_api.rst
│   │   ├── using_pandas_on_dask.rst
│   │   ├── using_pandas_on_mpi.rst
│   │   ├── using_pandas_on_python.rst
│   │   └── using_pandas_on_ray.rst
│   ├── ecosystem.rst
│   ├── flow/
│   │   └── modin/
│   │       ├── config.rst
│   │       ├── core/
│   │       │   ├── dataframe/
│   │       │   │   ├── algebra.rst
│   │       │   │   ├── base/
│   │       │   │   │   ├── dataframe.rst
│   │       │   │   │   ├── index.rst
│   │       │   │   │   └── partitioning/
│   │       │   │   │       └── axis_partition.rst
│   │       │   │   ├── index.rst
│   │       │   │   └── pandas/
│   │       │   │       ├── dataframe.rst
│   │       │   │       ├── index.rst
│   │       │   │       ├── metadata/
│   │       │   │       │   ├── dtypes.rst
│   │       │   │       │   └── index.rst
│   │       │   │       └── partitioning/
│   │       │   │           ├── axis_partition.rst
│   │       │   │           ├── partition.rst
│   │       │   │           └── partition_manager.rst
│   │       │   ├── execution/
│   │       │   │   ├── dask/
│   │       │   │   │   └── implementations/
│   │       │   │   │       └── pandas_on_dask/
│   │       │   │   │           ├── dataframe.rst
│   │       │   │   │           ├── index.rst
│   │       │   │   │           └── partitioning/
│   │       │   │   │               ├── partition.rst
│   │       │   │   │               ├── partition_manager.rst
│   │       │   │   │               └── virtual_partition.rst
│   │       │   │   ├── dispatching.rst
│   │       │   │   ├── python/
│   │       │   │   │   └── implementations/
│   │       │   │   │       └── pandas_on_python/
│   │       │   │   │           ├── dataframe.rst
│   │       │   │   │           ├── index.rst
│   │       │   │   │           └── partitioning/
│   │       │   │   │               ├── axis_partition.rst
│   │       │   │   │               ├── partition.rst
│   │       │   │   │               └── partition_manager.rst
│   │       │   │   ├── ray/
│   │       │   │   │   ├── generic.rst
│   │       │   │   │   └── implementations/
│   │       │   │   │       └── pandas_on_ray/
│   │       │   │   │           ├── dataframe.rst
│   │       │   │   │           ├── index.rst
│   │       │   │   │           └── partitioning/
│   │       │   │   │               ├── axis_partition.rst
│   │       │   │   │               ├── partition.rst
│   │       │   │   │               └── partition_manager.rst
│   │       │   │   └── unidist/
│   │       │   │       ├── generic.rst
│   │       │   │       └── implementations/
│   │       │   │           └── pandas_on_unidist/
│   │       │   │               ├── dataframe.rst
│   │       │   │               ├── index.rst
│   │       │   │               └── partitioning/
│   │       │   │                   ├── axis_partition.rst
│   │       │   │                   ├── partition.rst
│   │       │   │                   └── partition_manager.rst
│   │       │   ├── io/
│   │       │   │   └── index.rst
│   │       │   └── storage_formats/
│   │       │       ├── base/
│   │       │       │   └── query_compiler.rst
│   │       │       ├── index.rst
│   │       │       └── pandas/
│   │       │           ├── index.rst
│   │       │           ├── parsers.rst
│   │       │           └── query_compiler.rst
│   │       ├── distributed/
│   │       │   └── dataframe/
│   │       │       └── pandas.rst
│   │       ├── experimental/
│   │       │   ├── batch.rst
│   │       │   ├── core/
│   │       │   │   └── io/
│   │       │   │       └── index.rst
│   │       │   ├── index.rst
│   │       │   ├── pandas.rst
│   │       │   ├── range_partitioning_groupby.rst
│   │       │   ├── reshuffling_groupby.rst
│   │       │   ├── sklearn.rst
│   │       │   └── xgboost.rst
│   │       ├── pandas/
│   │       │   ├── base.rst
│   │       │   ├── dataframe.rst
│   │       │   └── series.rst
│   │       └── utils.rst
│   ├── getting_started/
│   │   ├── examples.rst
│   │   ├── faq.rst
│   │   ├── installation.rst
│   │   ├── quickstart.rst
│   │   ├── troubleshooting.rst
│   │   ├── using_modin/
│   │   │   ├── using_modin.rst
│   │   │   ├── using_modin_cluster.rst
│   │   │   └── using_modin_locally.rst
│   │   └── why_modin/
│   │       ├── modin_vs_dask_vs_koalas.rst
│   │       ├── out_of_core.rst
│   │       ├── pandas.rst
│   │       └── why_modin.rst
│   ├── index.rst
│   ├── release-procedure.md
│   ├── release_notes/
│   │   ├── release_notes-0.14.0.rst
│   │   ├── release_notes-0.15.0.rst
│   │   ├── release_notes-0.16.0.rst
│   │   └── release_notes-template.rst
│   ├── requirements-doc.txt
│   ├── supported_apis/
│   │   ├── dataframe_supported.rst
│   │   ├── defaulting_to_pandas.rst
│   │   ├── index.rst
│   │   ├── io_supported.rst
│   │   ├── older_pandas_compat.rst
│   │   ├── series_supported.rst
│   │   └── utilities_supported.rst
│   └── usage_guide/
│       ├── advanced_usage/
│       │   ├── batch.rst
│       │   ├── index.rst
│       │   ├── modin_engines.rst
│       │   ├── modin_logging.rst
│       │   ├── modin_metrics.rst
│       │   ├── modin_xgboost.rst
│       │   ├── progress_bar.rst
│       │   └── spreadsheets_api.rst
│       ├── benchmarking.rst
│       ├── examples/
│       │   └── index.rst
│       ├── index.rst
│       ├── integrations.rst
│       └── optimization_notes/
│           ├── index.rst
│           └── range_partitioning_ops.rst
├── environment-dev.yml
├── examples/
│   ├── data/
│   │   ├── boston_housing.csv
│   │   ├── census_1k.csv
│   │   ├── nyc-taxi_1k.csv
│   │   ├── plasticc_test_set_1k.csv
│   │   ├── plasticc_test_set_metadata_1k.csv
│   │   ├── plasticc_training_set_1k.csv
│   │   └── plasticc_training_set_metadata_1k.csv
│   ├── docker/
│   │   └── modin-ray/
│   │       ├── Dockerfile
│   │       ├── build-docker-image.sh
│   │       ├── census.py
│   │       ├── nyc-taxi.py
│   │       ├── plasticc.py
│   │       └── taxi.pstat
│   ├── jupyter/
│   │   ├── Modin_Taxi.ipynb
│   │   ├── Pandas_Taxi.ipynb
│   │   └── integrations/
│   │       ├── NLTK.ipynb
│   │       ├── altair.ipynb
│   │       ├── bokeh.ipynb
│   │       ├── huggingface.ipynb
│   │       ├── matplotlib.ipynb
│   │       ├── plotly.ipynb
│   │       ├── seaborn.ipynb
│   │       ├── sklearn.ipynb
│   │       ├── statsmodels.ipynb
│   │       ├── tensorflow.ipynb
│   │       └── xgboost.ipynb
│   ├── modin-scikit-learn-example.ipynb
│   ├── quickstart.ipynb
│   ├── spreadsheet/
│   │   ├── requirements.txt
│   │   └── tutorial.ipynb
│   └── tutorial/
│       ├── README.md
│       └── jupyter/
│           ├── README.md
│           └── execution/
│               ├── pandas_on_dask/
│               │   ├── Dockerfile
│               │   ├── cluster/
│               │   │   └── exercise_5.ipynb
│               │   ├── local/
│               │   │   ├── exercise_1.ipynb
│               │   │   ├── exercise_2.ipynb
│               │   │   ├── exercise_3.ipynb
│               │   │   └── exercise_4.ipynb
│               │   ├── requirements.txt
│               │   └── test/
│               │       └── test_notebooks.py
│               ├── pandas_on_ray/
│               │   ├── Dockerfile
│               │   ├── cluster/
│               │   │   ├── README.md
│               │   │   ├── exercise_5.py
│               │   │   └── modin-cluster.yaml
│               │   ├── local/
│               │   │   ├── exercise_1.ipynb
│               │   │   ├── exercise_2.ipynb
│               │   │   ├── exercise_3.ipynb
│               │   │   └── exercise_4.ipynb
│               │   ├── requirements.txt
│               │   └── test/
│               │       └── test_notebooks.py
│               ├── pandas_on_unidist/
│               │   ├── Dockerfile
│               │   ├── README.md
│               │   ├── jupyter_unidist_env.yml
│               │   ├── local/
│               │   │   ├── exercise_1.ipynb
│               │   │   ├── exercise_2.ipynb
│               │   │   ├── exercise_3.ipynb
│               │   │   └── exercise_4.ipynb
│               │   ├── setup_kernel.py
│               │   └── test/
│               │       └── test_notebooks.py
│               └── test/
│                   └── utils.py
├── modin/
│   ├── __init__.py
│   ├── __main__.py
│   ├── _version.py
│   ├── config/
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── envvars.py
│   │   └── pubsub.py
│   ├── conftest.py
│   ├── core/
│   │   ├── __init__.py
│   │   ├── computation/
│   │   │   ├── __init__.py
│   │   │   ├── align.py
│   │   │   ├── check.py
│   │   │   ├── common.py
│   │   │   ├── engines.py
│   │   │   ├── eval.py
│   │   │   ├── expr.py
│   │   │   ├── ops.py
│   │   │   ├── parsing.py
│   │   │   └── scope.py
│   │   ├── dataframe/
│   │   │   ├── __init__.py
│   │   │   ├── algebra/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── binary.py
│   │   │   │   ├── default2pandas/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── binary.py
│   │   │   │   │   ├── cat.py
│   │   │   │   │   ├── dataframe.py
│   │   │   │   │   ├── datetime.py
│   │   │   │   │   ├── default.py
│   │   │   │   │   ├── groupby.py
│   │   │   │   │   ├── list.py
│   │   │   │   │   ├── resample.py
│   │   │   │   │   ├── rolling.py
│   │   │   │   │   ├── series.py
│   │   │   │   │   ├── str.py
│   │   │   │   │   └── struct.py
│   │   │   │   ├── fold.py
│   │   │   │   ├── groupby.py
│   │   │   │   ├── map.py
│   │   │   │   ├── operator.py
│   │   │   │   ├── reduce.py
│   │   │   │   └── tree_reduce.py
│   │   │   ├── base/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataframe/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── dataframe.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── interchange/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── dataframe_protocol/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── dataframe.py
│   │   │   │   │       └── utils.py
│   │   │   │   └── partitioning/
│   │   │   │       ├── __init__.py
│   │   │   │       └── axis_partition.py
│   │   │   └── pandas/
│   │   │       ├── __init__.py
│   │   │       ├── dataframe/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── dataframe.py
│   │   │       │   └── utils.py
│   │   │       ├── interchange/
│   │   │       │   ├── __init__.py
│   │   │       │   └── dataframe_protocol/
│   │   │       │       ├── __init__.py
│   │   │       │       ├── buffer.py
│   │   │       │       ├── column.py
│   │   │       │       ├── dataframe.py
│   │   │       │       ├── exception.py
│   │   │       │       └── from_dataframe.py
│   │   │       ├── metadata/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── dtypes.py
│   │   │       │   └── index.py
│   │   │       ├── partitioning/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── axis_partition.py
│   │   │       │   ├── partition.py
│   │   │       │   └── partition_manager.py
│   │   │       └── utils.py
│   │   ├── execution/
│   │   │   ├── __init__.py
│   │   │   ├── dask/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── common/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── engine_wrapper.py
│   │   │   │   │   └── utils.py
│   │   │   │   └── implementations/
│   │   │   │       ├── __init__.py
│   │   │   │       └── pandas_on_dask/
│   │   │   │           ├── __init__.py
│   │   │   │           ├── dataframe/
│   │   │   │           │   ├── __init__.py
│   │   │   │           │   └── dataframe.py
│   │   │   │           ├── io/
│   │   │   │           │   ├── __init__.py
│   │   │   │           │   └── io.py
│   │   │   │           └── partitioning/
│   │   │   │               ├── __init__.py
│   │   │   │               ├── partition.py
│   │   │   │               ├── partition_manager.py
│   │   │   │               └── virtual_partition.py
│   │   │   ├── dispatching/
│   │   │   │   ├── __init__.py
│   │   │   │   └── factories/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── dispatcher.py
│   │   │   │       └── factories.py
│   │   │   ├── modin_aqp.py
│   │   │   ├── python/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── common/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── engine_wrapper.py
│   │   │   │   └── implementations/
│   │   │   │       ├── __init__.py
│   │   │   │       └── pandas_on_python/
│   │   │   │           ├── __init__.py
│   │   │   │           ├── dataframe/
│   │   │   │           │   ├── __init__.py
│   │   │   │           │   └── dataframe.py
│   │   │   │           ├── io/
│   │   │   │           │   ├── __init__.py
│   │   │   │           │   └── io.py
│   │   │   │           └── partitioning/
│   │   │   │               ├── __init__.py
│   │   │   │               ├── partition.py
│   │   │   │               ├── partition_manager.py
│   │   │   │               └── virtual_partition.py
│   │   │   ├── ray/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── common/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── deferred_execution.py
│   │   │   │   │   ├── engine_wrapper.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── generic/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── io/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── io.py
│   │   │   │   │   └── partitioning/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── partition_manager.py
│   │   │   │   └── implementations/
│   │   │   │       ├── __init__.py
│   │   │   │       └── pandas_on_ray/
│   │   │   │           ├── __init__.py
│   │   │   │           ├── dataframe/
│   │   │   │           │   ├── __init__.py
│   │   │   │           │   └── dataframe.py
│   │   │   │           ├── io/
│   │   │   │           │   ├── __init__.py
│   │   │   │           │   └── io.py
│   │   │   │           └── partitioning/
│   │   │   │               ├── __init__.py
│   │   │   │               ├── partition.py
│   │   │   │               ├── partition_manager.py
│   │   │   │               └── virtual_partition.py
│   │   │   ├── unidist/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── common/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── engine_wrapper.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── generic/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── io/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── io.py
│   │   │   │   │   └── partitioning/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── partition_manager.py
│   │   │   │   └── implementations/
│   │   │   │       ├── __init__.py
│   │   │   │       └── pandas_on_unidist/
│   │   │   │           ├── __init__.py
│   │   │   │           ├── dataframe/
│   │   │   │           │   ├── __init__.py
│   │   │   │           │   └── dataframe.py
│   │   │   │           ├── io/
│   │   │   │           │   ├── __init__.py
│   │   │   │           │   └── io.py
│   │   │   │           └── partitioning/
│   │   │   │               ├── __init__.py
│   │   │   │               ├── partition.py
│   │   │   │               ├── partition_manager.py
│   │   │   │               └── virtual_partition.py
│   │   │   └── utils.py
│   │   ├── io/
│   │   │   ├── __init__.py
│   │   │   ├── column_stores/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── column_store_dispatcher.py
│   │   │   │   ├── feather_dispatcher.py
│   │   │   │   ├── hdf_dispatcher.py
│   │   │   │   └── parquet_dispatcher.py
│   │   │   ├── file_dispatcher.py
│   │   │   ├── io.py
│   │   │   ├── sql/
│   │   │   │   ├── __init__.py
│   │   │   │   └── sql_dispatcher.py
│   │   │   └── text/
│   │   │       ├── __init__.py
│   │   │       ├── csv_dispatcher.py
│   │   │       ├── excel_dispatcher.py
│   │   │       ├── fwf_dispatcher.py
│   │   │       ├── json_dispatcher.py
│   │   │       ├── text_file_dispatcher.py
│   │   │       └── utils.py
│   │   └── storage_formats/
│   │       ├── __init__.py
│   │       ├── base/
│   │       │   ├── __init__.py
│   │       │   ├── doc_utils.py
│   │       │   ├── query_compiler.py
│   │       │   └── query_compiler_calculator.py
│   │       └── pandas/
│   │           ├── __init__.py
│   │           ├── aggregations.py
│   │           ├── groupby.py
│   │           ├── merge.py
│   │           ├── native_query_compiler.py
│   │           ├── parsers.py
│   │           ├── query_compiler.py
│   │           ├── query_compiler_caster.py
│   │           └── utils.py
│   ├── db_conn.py
│   ├── distributed/
│   │   ├── __init__.py
│   │   └── dataframe/
│   │       ├── __init__.py
│   │       └── pandas/
│   │           ├── __init__.py
│   │           └── partitions.py
│   ├── error_message.py
│   ├── experimental/
│   │   ├── __init__.py
│   │   ├── batch/
│   │   │   ├── __init__.py
│   │   │   └── pipeline.py
│   │   ├── core/
│   │   │   ├── __init__.py
│   │   │   ├── execution/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dask/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── implementations/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── pandas_on_dask/
│   │   │   │   │           └── __init__.py
│   │   │   │   ├── ray/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── implementations/
│   │   │   │   │       └── __init__.py
│   │   │   │   └── unidist/
│   │   │   │       ├── __init__.py
│   │   │   │       └── implementations/
│   │   │   │           ├── __init__.py
│   │   │   │           └── pandas_on_unidist/
│   │   │   │               └── __init__.py
│   │   │   ├── io/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── glob/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── glob_dispatcher.py
│   │   │   │   ├── sql/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── sql_dispatcher.py
│   │   │   │   │   └── utils.py
│   │   │   │   └── text/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── csv_glob_dispatcher.py
│   │   │   │       └── custom_text_dispatcher.py
│   │   │   └── storage_formats/
│   │   │       ├── __init__.py
│   │   │       └── pandas/
│   │   │           ├── __init__.py
│   │   │           └── parsers.py
│   │   ├── fuzzydata/
│   │   │   └── __init__.py
│   │   ├── pandas/
│   │   │   ├── __init__.py
│   │   │   └── io.py
│   │   ├── sklearn/
│   │   │   ├── __init__.py
│   │   │   └── model_selection/
│   │   │       ├── __init__.py
│   │   │       └── train_test_split.py
│   │   ├── spreadsheet/
│   │   │   ├── __init__.py
│   │   │   └── general.py
│   │   ├── torch/
│   │   │   ├── __init__.py
│   │   │   └── datasets.py
│   │   └── xgboost/
│   │       ├── __init__.py
│   │       ├── utils.py
│   │       ├── xgboost.py
│   │       └── xgboost_ray.py
│   ├── logging/
│   │   ├── __init__.py
│   │   ├── class_logger.py
│   │   ├── config.py
│   │   ├── logger_decorator.py
│   │   └── metrics.py
│   ├── numpy/
│   │   ├── __init__.py
│   │   ├── arr.py
│   │   ├── array_creation.py
│   │   ├── array_shaping.py
│   │   ├── constants.py
│   │   ├── indexing.py
│   │   ├── linalg.py
│   │   ├── logic.py
│   │   ├── math.py
│   │   ├── trigonometry.py
│   │   └── utils.py
│   ├── pandas/
│   │   ├── __init__.py
│   │   ├── accessor.py
│   │   ├── api/
│   │   │   ├── __init__.py
│   │   │   └── extensions/
│   │   │       ├── __init__.py
│   │   │       └── extensions.py
│   │   ├── arrays/
│   │   │   └── __init__.py
│   │   ├── base.py
│   │   ├── dataframe.py
│   │   ├── errors/
│   │   │   └── __init__.py
│   │   ├── general.py
│   │   ├── groupby.py
│   │   ├── indexing.py
│   │   ├── io.py
│   │   ├── iterator.py
│   │   ├── plotting.py
│   │   ├── resample.py
│   │   ├── series.py
│   │   ├── series_utils.py
│   │   ├── testing/
│   │   │   └── __init__.py
│   │   ├── utils.py
│   │   └── window.py
│   ├── polars/
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── dataframe.py
│   │   ├── groupby.py
│   │   ├── lazyframe.py
│   │   └── series.py
│   ├── tests/
│   │   ├── __init__.py
│   │   ├── config/
│   │   │   ├── __init__.py
│   │   │   ├── docs_module/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── classes.py
│   │   │   │   └── functions.py
│   │   │   ├── docs_module_with_just_base/
│   │   │   │   ├── __init__.py
│   │   │   │   └── classes.py
│   │   │   ├── test_envvars.py
│   │   │   └── test_parameter.py
│   │   ├── core/
│   │   │   ├── __init__.py
│   │   │   ├── storage_formats/
│   │   │   │   ├── base/
│   │   │   │   │   └── test_internals.py
│   │   │   │   ├── cudf/
│   │   │   │   │   ├── test_gpu_managers.py
│   │   │   │   │   └── test_internals.py
│   │   │   │   └── pandas/
│   │   │   │       └── test_internals.py
│   │   │   └── test_dispatcher.py
│   │   ├── experimental/
│   │   │   ├── __init__.py
│   │   │   ├── spreadsheet/
│   │   │   │   ├── __init__.py
│   │   │   │   └── test_general.py
│   │   │   ├── test_fuzzydata.py
│   │   │   ├── test_io_exp.py
│   │   │   ├── test_pipeline.py
│   │   │   ├── torch/
│   │   │   │   ├── __init__.py
│   │   │   │   └── test_dataloader.py
│   │   │   └── xgboost/
│   │   │       ├── __init__.py
│   │   │       ├── test_default.py
│   │   │       ├── test_dmatrix.py
│   │   │       └── test_xgboost.py
│   │   ├── interchange/
│   │   │   ├── __init__.py
│   │   │   └── dataframe_protocol/
│   │   │       ├── __init__.py
│   │   │       ├── base/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── test_sanity.py
│   │   │       │   └── test_utils.py
│   │   │       ├── pandas/
│   │   │       │   ├── __init__.py
│   │   │       │   └── test_protocol.py
│   │   │       └── test_general.py
│   │   ├── numpy/
│   │   │   ├── __init__.py
│   │   │   ├── test_array.py
│   │   │   ├── test_array_arithmetic.py
│   │   │   ├── test_array_axis_functions.py
│   │   │   ├── test_array_creation.py
│   │   │   ├── test_array_indexing.py
│   │   │   ├── test_array_linalg.py
│   │   │   ├── test_array_logic.py
│   │   │   ├── test_array_math.py
│   │   │   ├── test_array_shaping.py
│   │   │   └── utils.py
│   │   ├── pandas/
│   │   │   ├── __init__.py
│   │   │   ├── conftest.py
│   │   │   ├── data/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── airline.sas7bdat
│   │   │   │   ├── blah.csv
│   │   │   │   ├── every_other_row_nan.xlsx
│   │   │   │   ├── excel_sheetname_title.xlsx
│   │   │   │   ├── hdfs.parquet/
│   │   │   │   │   ├── part-00000-a7bff54c-2ff4-4654-9783-626542bd3a90-c000.snappy.parquet
│   │   │   │   │   ├── part-00001-a7bff54c-2ff4-4654-9783-626542bd3a90-c000.snappy.parquet
│   │   │   │   │   └── part-00002-a7bff54c-2ff4-4654-9783-626542bd3a90-c000.snappy.parquet
│   │   │   │   ├── issue5159.parquet/
│   │   │   │   │   └── part-0000.snappy.parquet/
│   │   │   │   │       ├── par=a/
│   │   │   │   │       │   └── 44c5b23d806c4dc8a97d70c4fb2219f5-0.parquet
│   │   │   │   │       └── par=b/
│   │   │   │   │           └── 44c5b23d806c4dc8a97d70c4fb2219f5-0.parquet
│   │   │   │   ├── issue_1930.csv
│   │   │   │   ├── issue_2074.csv
│   │   │   │   ├── issue_2239.csv
│   │   │   │   ├── issue_3119.csv
│   │   │   │   ├── issue_4543.csv
│   │   │   │   ├── issue_976.csv
│   │   │   │   ├── modin_error_book.xlsx
│   │   │   │   ├── multiple_csv/
│   │   │   │   │   ├── test_data0.csv
│   │   │   │   │   └── test_data1.csv
│   │   │   │   ├── newlines.csv
│   │   │   │   ├── test_border_rows.xlsx
│   │   │   │   ├── test_categories.csv
│   │   │   │   ├── test_categories.json
│   │   │   │   ├── test_data.feather
│   │   │   │   ├── test_data.fwf
│   │   │   │   ├── test_data.json
│   │   │   │   ├── test_data.parquet
│   │   │   │   ├── test_data_dir.parquet/
│   │   │   │   │   ├── part_0.parquet
│   │   │   │   │   ├── part_1.parquet
│   │   │   │   │   ├── part_10.parquet
│   │   │   │   │   ├── part_11.parquet
│   │   │   │   │   ├── part_12.parquet
│   │   │   │   │   ├── part_13.parquet
│   │   │   │   │   ├── part_14.parquet
│   │   │   │   │   ├── part_15.parquet
│   │   │   │   │   ├── part_2.parquet
│   │   │   │   │   ├── part_3.parquet
│   │   │   │   │   ├── part_4.parquet
│   │   │   │   │   ├── part_5.parquet
│   │   │   │   │   ├── part_6.parquet
│   │   │   │   │   ├── part_7.parquet
│   │   │   │   │   ├── part_8.parquet
│   │   │   │   │   └── part_9.parquet
│   │   │   │   ├── test_delim.csv
│   │   │   │   ├── test_different_columns_in_rows.json
│   │   │   │   ├── test_empty_rows.xlsx
│   │   │   │   ├── test_emptyline.xlsx
│   │   │   │   ├── test_null_col.csv
│   │   │   │   ├── test_time_parsing.csv
│   │   │   │   └── test_usecols.csv
│   │   │   ├── dataframe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── test_binary.py
│   │   │   │   ├── test_default.py
│   │   │   │   ├── test_indexing.py
│   │   │   │   ├── test_iter.py
│   │   │   │   ├── test_join_sort.py
│   │   │   │   ├── test_map_metadata.py
│   │   │   │   ├── test_pickle.py
│   │   │   │   ├── test_reduce.py
│   │   │   │   ├── test_udf.py
│   │   │   │   └── test_window.py
│   │   │   ├── extensions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── conftest.py
│   │   │   │   ├── test_api_reexport.py
│   │   │   │   ├── test_base_extensions.py
│   │   │   │   ├── test_dataframe_extensions.py
│   │   │   │   ├── test_groupby_extensions.py
│   │   │   │   ├── test_pd_extensions.py
│   │   │   │   └── test_series_extensions.py
│   │   │   ├── integrations/
│   │   │   │   ├── __init__.py
│   │   │   │   └── test_lazy_import.py
│   │   │   ├── internals/
│   │   │   │   ├── __init__.py
│   │   │   │   └── test_benchmark_mode.py
│   │   │   ├── native_df_interoperability/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── conftest.py
│   │   │   │   ├── test_binary.py
│   │   │   │   ├── test_compiler_caster.py
│   │   │   │   ├── test_copy_on_write.py
│   │   │   │   ├── test_default.py
│   │   │   │   ├── test_default_to_pandas_without_warnings.py
│   │   │   │   ├── test_general.py
│   │   │   │   ├── test_indexing.py
│   │   │   │   ├── test_iter.py
│   │   │   │   ├── test_join_sort.py
│   │   │   │   ├── test_map_metadata.py
│   │   │   │   ├── test_pickle.py
│   │   │   │   ├── test_window.py
│   │   │   │   └── utils.py
│   │   │   ├── test_api.py
│   │   │   ├── test_backend.py
│   │   │   ├── test_concat.py
│   │   │   ├── test_expanding.py
│   │   │   ├── test_general.py
│   │   │   ├── test_groupby.py
│   │   │   ├── test_io.py
│   │   │   ├── test_repartition.py
│   │   │   ├── test_reshape.py
│   │   │   ├── test_rolling.py
│   │   │   ├── test_series.py
│   │   │   └── utils.py
│   │   ├── polars/
│   │   │   └── test_dataframe.py
│   │   ├── test_dataframe_api_standard.py
│   │   ├── test_docstring_urls.py
│   │   ├── test_envvar_catcher.py
│   │   ├── test_envvar_npartitions.py
│   │   ├── test_executions_api.py
│   │   ├── test_headers.py
│   │   ├── test_logging.py
│   │   ├── test_metrics.py
│   │   ├── test_partition_api.py
│   │   └── test_utils.py
│   └── utils.py
├── modin-autoimport-pandas.pth
├── mypy.ini
├── requirements/
│   ├── env_unidist_linux.yml
│   ├── env_unidist_win.yml
│   └── requirements-no-engine.yml
├── requirements-dev.txt
├── scripts/
│   ├── __init__.py
│   ├── doc_checker.py
│   ├── release.py
│   └── test/
│       ├── __init__.py
│       ├── examples.py
│       └── test_doc_checker.py
├── setup.cfg
├── setup.py
├── stress_tests/
│   ├── kaggle/
│   │   ├── kaggle10.py
│   │   ├── kaggle12.py
│   │   ├── kaggle13.py
│   │   ├── kaggle14.py
│   │   ├── kaggle17.py
│   │   ├── kaggle18.py
│   │   ├── kaggle19.py
│   │   ├── kaggle20.py
│   │   ├── kaggle22.py
│   │   ├── kaggle3.py
│   │   ├── kaggle4.py
│   │   ├── kaggle5.py
│   │   ├── kaggle6.py
│   │   ├── kaggle7.py
│   │   ├── kaggle8.py
│   │   └── kaggle9.py
│   ├── run_stress_tests.sh
│   └── test_kaggle_ipynb.py
└── versioneer.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
* text=auto
modin/_version.py export-subst


================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.yaml
================================================
name: Bug report
description: Report incorrect behavior in the Modin library
title: 'BUG: '
labels: ['bug 🦗', 'Triage 🩹']

body:
  - type: checkboxes
    id: checks
    attributes:
      label: Modin version checks
      options:
        - label: >
            I have checked that this issue has not already been reported.
          required: true
        - label: >
            I have confirmed this bug exists on the latest released version of Modin.
          required: true
        - label: >
            I have confirmed this bug exists on the main branch of Modin. (In order to do this you
            can follow [this guide](https://modin.readthedocs.io/en/stable/getting_started/installation.html#installing-from-the-github-main-branch).)
  - type: textarea
    id: example
    attributes:
      label: Reproducible Example
      description: >
        Please follow [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) on how to
        provide a minimal, copy-pastable example.
      placeholder: >
        import modin.pandas as pd

        df = pd.DataFrame(range(5))

        ...
      render: python
    validations:
      required: true
  - type: textarea
    id: problem
    attributes:
      label: Issue Description
      description: >
        Please provide a description of the issue shown in the reproducible example.
    validations:
      required: true
  - type: textarea
    id: expected-behavior
    attributes:
      label: Expected Behavior
      description: >
        Please describe or show a code example of the expected behavior.
    validations:
      required: true
  - type: textarea
    id: logs
    attributes:
      label: Error Logs
      description: >
        Please paste the output of any relevant error logs.
      value: >
        <details>


        ```python-traceback


        Replace this line with the error backtrace (if applicable).


        ```


        </details>
  - type: textarea
    id: version
    attributes:
      label: Installed Versions
      description: >
        Please paste the output of ``pd.show_versions()``
      value: >
        <details>


        Replace this line with the output of pd.show_versions()


        </details>
    validations:
      required: true


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Request a new API or feature implementation
title: ''
labels: 'new feature/request 💬, Triage 🩹'
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. What kind of performance improvements would you like to see with this new API?


================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: Question
about: You want to ask a question
title: ''
labels: 'question ❓, Triage 🩹'
assignees: ''

---


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
<!--
Thank you for your contribution!
Please review the contributing docs: https://modin.readthedocs.io/en/latest/development/contributing.html
if you have questions about contributing.
-->

## What do these changes do?

<!-- Please give a short brief about these changes. -->

- [x] first commit message and PR title follow format outlined [here](https://modin.readthedocs.io/en/latest/development/contributing.html#commit-message-formatting)
  > **_NOTE:_**  If you edit the PR title to match this format, you need to add another commit (even if it's empty) or amend your last commit for the CI job that checks the PR title to pick up the new PR title.
- [ ] passes `flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py`
- [ ] passes `black --check modin/ asv_bench/benchmarks scripts/doc_checker.py`
- [ ] signed commit with `git commit -s` <!-- you can amend your commit with a signature via `git commit -amend -s` -->
- [ ] Resolves #? <!-- issue must be created for each patch -->
- [ ] tests added and passing
- [ ] module layout described at `docs/development/architecture.rst` is up-to-date <!-- if you have added, renamed or removed files or directories please update the documentation accordingly -->


================================================
FILE: .github/actions/mamba-env/action.yml
================================================
name: "Install environment using Mamba"
description: "Prepare the environment to run Modin"
inputs:
  python-version:
    description: "Python version to install"
    default: "3.9"
  environment-file:
    description: "Conda environment yml"
    required: true
  activate-environment:
    description: "Conda environment to activate"
    default: "modin"

runs:
  using: "composite"
  steps:
    - name: Get current week
      id: get-week
      # use current week as cache key to periodically refresh the cache,
      # as cache is based on requirements, but dependencies push
      # updated versions at some irregular pace
      run: echo "thisweek=$(/bin/date -u '+%Y.w%W')" >> $GITHUB_OUTPUT
      shell: bash
    - name: Cache conda
      id: cache-conda
      uses: actions/cache@v4
      with:
        path: |
          ~/conda_pkgs_dir
          ~/.cache/pip
        key:
          ${{ runner.os }}-conda-${{ steps.get-week.outputs.thisweek }}-${{ hashFiles(inputs.environment-file) }}
    - uses: conda-incubator/setup-miniconda@v3
      with:
        miniforge-variant: Miniforge3
        miniforge-version: latest
        use-mamba: true
        activate-environment: ${{ inputs.activate-environment }}
        environment-file: ${{ inputs.environment-file }}
        python-version: ${{ inputs.python-version }}
        channel-priority: strict
        # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
        # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
        use-only-tar-bz2: false
    - shell: bash -l {0}
      run: |
        conda run -n ${{ inputs.activate-environment }} pip install .
        conda list -n ${{ inputs.activate-environment }}


================================================
FILE: .github/actions/python-only/action.yml
================================================
name: "Install Python only"
description: "Prepare the environment to run simple tasks"
inputs:
  python-version:
    description: "Python version to install"
    default: "3.9"

runs:
  using: "composite"
  steps:
    - uses: actions/setup-python@v5
      with:
        python-version: ${{ inputs.python-version }}
        architecture: "x64"
        cache: 'pip'


================================================
FILE: .github/actions/run-core-tests/action.yml
================================================
name: "Run core Modin tests"
description: "Run core Modin tests like dataframe or groupby"
inputs:
  runner:
    description: "Runner for tests"
    default: "python -m pytest"
  parallel:
    description: "How to run tests in parallel"
    default: "-n 2"

runs:
  using: "composite"
  steps:
    - uses: ./.github/actions/run-core-tests/group_1
      with:
        runner: ${{ inputs.runner }}
        parallel: ${{ inputs.parallel }}
    - uses: ./.github/actions/run-core-tests/group_2
      with:
        runner: ${{ inputs.runner }}
        parallel: ${{ inputs.parallel }}
    - uses: ./.github/actions/run-core-tests/group_3
      with:
        runner: ${{ inputs.runner }}
        parallel: ${{ inputs.parallel }}
    - uses: ./.github/actions/run-core-tests/group_4
      with:
        runner: ${{ inputs.runner }}
        parallel: ${{ inputs.parallel }}


================================================
FILE: .github/actions/run-core-tests/group_1/action.yml
================================================
name: "Run core Modin tests - group 1"
description: "Run core Modin tests like dataframe or groupby"
inputs:
  runner:
    description: "Runner for tests"
    default: "python -m pytest"
  parallel:
    description: "How to run tests in parallel"
    default: "-n 2"

runs:
  using: "composite"
  steps:
      - run: |
          echo "::group::Running dataframe tests (group 1)..."
          ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_binary.py \
                                                      modin/tests/pandas/dataframe/test_default.py \
                                                      modin/tests/pandas/dataframe/test_indexing.py \
                                                      modin/tests/pandas/dataframe/test_iter.py
          echo "::endgroup::"
        shell: bash -l {0}


================================================
FILE: .github/actions/run-core-tests/group_2/action.yml
================================================
name: "Run core Modin tests - group 2"
description: "Run core Modin tests like dataframe or groupby"
inputs:
  runner:
    description: "Runner for tests"
    default: "python -m pytest"
  parallel:
    description: "How to run tests in parallel"
    default: "-n 2"

runs:
  using: "composite"
  steps:
      - run: |
          echo "::group::Running dataframe tests (group 2)..."
          ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_join_sort.py \
                                                      modin/tests/pandas/dataframe/test_reduce.py \
                                                      modin/tests/pandas/dataframe/test_udf.py \
                                                      modin/tests/pandas/dataframe/test_window.py \
                                                      modin/tests/pandas/dataframe/test_pickle.py \
                                                      modin/tests/pandas/test_repartition.py \
                                                      modin/tests/pandas/test_backend.py
          echo "::endgroup::"
        shell: bash -l {0}


================================================
FILE: .github/actions/run-core-tests/group_3/action.yml
================================================
name: "Run core Modin tests - group 3"
description: "Run core Modin tests like dataframe or groupby"
inputs:
  runner:
    description: "Runner for tests"
    default: "python -m pytest"
  parallel:
    description: "How to run tests in parallel"
    default: "-n 2"

runs:
  using: "composite"
  steps:
      - run: |
          echo "::group::Running tests (group 3)..."
          ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_series.py \
                                                      modin/tests/pandas/dataframe/test_map_metadata.py
          echo "::endgroup::"
        shell: bash -l {0}
      - run: |
          echo "::group::Running range-partitioning tests (group 3)..."
          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_groupby.py
          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_series.py -k "test_unique or test_nunique or drop_duplicates or test_resample"
          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_general.py -k "test_unique"
          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_map_metadata.py -k "drop_duplicates"
          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_join_sort.py -k "merge"
          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_default.py -k "resample"
          echo "::endgroup::"
        shell: bash -l {0}


================================================
FILE: .github/actions/run-core-tests/group_4/action.yml
================================================
name: "Run core Modin tests - group 4"
description: "Run core Modin tests like dataframe or groupby"
inputs:
  runner:
    description: "Runner for tests"
    default: "python -m pytest"
  parallel:
    description: "How to run tests in parallel"
    default: "-n 2"

runs:
  using: "composite"
  steps:
      - run: |
          echo "::group::Running tests (group 4)..."
          ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_rolling.py \
                                                      modin/tests/pandas/test_expanding.py \
                                                      modin/tests/pandas/test_groupby.py \
                                                      modin/tests/pandas/test_reshape.py \
                                                      modin/tests/pandas/test_general.py
          echo "::endgroup::"
        shell: bash -l {0}
      - run: |
          echo "::group::Running concat tests (group 4)..."
          ${{ inputs.runner }} modin/tests/pandas/test_concat.py # Ray and Dask versions fails with -n 2
          echo "::endgroup::"
        shell: bash -l {0}


================================================
FILE: .github/actions/upload-coverage/action.yml
================================================
name: Upload Coverage
description: Upload coverage files

runs:
  using: "composite"

  steps:
    - run: |
        COVERAGE_UUID=$(python3 -c "import uuid; print(uuid.uuid4())")
        mv .coverage .coverage.${COVERAGE_UUID}
        echo "COVERAGE_UUID=${COVERAGE_UUID}" >> $GITHUB_ENV
      id: coverage-uuid
      shell: bash
    - uses: actions/upload-artifact@v4
      with:
        name: coverage-data-${{ env.COVERAGE_UUID }}
        path: .coverage*
        include-hidden-files: true


================================================
FILE: .github/dependabot.yaml
================================================
version: 2
updates:
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "monthly"
    groups:
      github-actions:
        patterns:
          - "*"


================================================
FILE: .github/stale.yml
================================================
# Number of days of inactivity before an Issue or Pull Request becomes stale
daysUntilStale: 365

# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
daysUntilClose: 7

# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
onlyLabels: []

# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
exemptLabels: []

# Set to true to ignore issues in a project (defaults to false)
exemptProjects: false

# Set to true to ignore issues in a milestone (defaults to false)
exemptMilestones: false

# Set to true to ignore issues with an assignee (defaults to false)
exemptAssignees: false

# Label to use when marking as stale
staleLabel: stale

# Comment to post when marking as stale. Set to `false` to disable
markComment: >
  This issue has been automatically marked as stale because it has not had
  recent activity. It will be closed if no further activity occurs within the next
  7 days. Thank you for your contributions.

# Comment to post when removing the stale label.
# unmarkComment: >
#   Your comment here.

# Comment to post when closing a stale Issue or Pull Request.
 closeComment: >
   Closing as stale.


================================================
FILE: .github/workflows/ci-notebooks.yml
================================================
name: ci-notebooks
on:
  pull_request:
    paths:
      - modin/**
      - examples/tutorial/**
      - .github/workflows/ci-notebooks.yml
      - setup.cfg
      - setup.py
      - requirements/env_unidist_linux.yml
concurrency:
  # Cancel other jobs in the same branch. We don't care whether CI passes
  # on old commits.
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
  MODIN_GITHUB_CI: true
jobs:
  test-tutorial-notebooks:
    defaults:
      run:
        shell: bash -l {0}
    name: test tutorial notebooks
    runs-on: ubuntu-latest
    strategy:
      matrix:
        execution: [pandas_on_ray, pandas_on_dask, pandas_on_unidist]
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/python-only
        if: matrix.execution != 'pandas_on_unidist'
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: requirements/env_unidist_linux.yml
          activate-environment: modin_on_unidist
        if: matrix.execution == 'pandas_on_unidist'
      - name: Cache datasets
        uses: actions/cache@v4
        with:
          path: taxi.csv
          # update cache only if notebooks require it to be changed
          key: taxi-csv-dataset-${{ hashFiles('examples/tutorial/jupyter/**') }}
      # replace modin with . in the tutorial requirements file for `pandas_on_ray` and
      # `pandas_on_dask` since we need Modin built from sources
      - run: sed -i 's/modin/./g' examples/tutorial/jupyter/execution/${{ matrix.execution }}/requirements.txt
        if: matrix.execution != 'pandas_on_unidist'
      # install dependencies required for notebooks execution for `pandas_on_ray` and `pandas_on_dask`
      # Override modin-spreadsheet install for now
      - run: |
          pip install -r examples/tutorial/jupyter/execution/${{ matrix.execution }}/requirements.txt
          pip install git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
        if: matrix.execution != 'pandas_on_unidist'
      # Build Modin from sources for `pandas_on_unidist`
      - run: pip install -e .
        if: matrix.execution == 'pandas_on_unidist'
      # install test dependencies
      # NOTE: If you are changing the set of packages installed here, make sure that
      # the dev requirements match them.
      - run: pip install pytest pytest-cov black flake8 flake8-print flake8-no-implicit-concat
        if: matrix.execution != 'pandas_on_unidist'
      - run: pip install flake8-print jupyter nbformat nbconvert
        if: matrix.execution == 'pandas_on_unidist'
      - run: pip list
        if: matrix.execution != 'pandas_on_unidist'
      - run: |
          conda info
          conda list
        if: matrix.execution == 'pandas_on_unidist'
      # setup kernel configuration for `pandas_on_unidist` execution with mpi backend
      - run: python examples/tutorial/jupyter/execution/${{ matrix.execution }}/setup_kernel.py
        if: matrix.execution == 'pandas_on_unidist'
      - run: jupyter kernelspec list
      - run: |
          black --check --diff examples/tutorial/jupyter/execution/${{ matrix.execution }}/test/test_notebooks.py
          black --check --diff examples/tutorial/jupyter/execution/test/utils.py
      - run: |
          flake8 --enable=T examples/tutorial/jupyter/execution/${{ matrix.execution }}/test/test_notebooks.py
          flake8 --enable=T examples/tutorial/jupyter/execution/test/utils.py
      - run: python -m pytest examples/tutorial/jupyter/execution/${{ matrix.execution }}/test/test_notebooks.py


================================================
FILE: .github/workflows/ci-required.yml
================================================
name: ci-required
on: pull_request
concurrency:
  # Cancel other jobs in the same branch. We don't care whether CI passes
  # on old commits.
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
  MODIN_GITHUB_CI: true

jobs:

  check-pr-title:
    runs-on: ubuntu-latest
    steps:
    - uses: Slashgear/action-check-pr-title@v4.3.0
      with:
        # NOTE: If you change the allowed prefixes here, update
        # the documentation about them in /docs/development/contributing.rst
        regexp: '^(?:FEAT|DOCS|FIX|REFACTOR|TEST|PERF)-#\d+:'

  build-docs:
    name: build docs
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 1
      - uses: actions/setup-python@v5
        with:
          python-version: "3.9"
          architecture: "x64"
          cache: "pip"
          cache-dependency-path: '**/requirements-doc.txt'
      - run: pip install -r docs/requirements-doc.txt
      - run: cd docs && sphinx-build -T -E -W -b html . build

  lint-pydocstyle:
    name: lint (pydocstyle)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/python-only
      # The `numpydoc` version here MUST match the versions in the dev requirements files.
      - run: pip install pytest pytest-cov pydocstyle numpydoc==1.6.0
      - run: python -m pytest scripts/test
      - run: pip install -e ".[all]"
      - run: |
          python scripts/doc_checker.py --add-ignore=D101,D102,D103,D105 --disable-numpydoc \
            modin/pandas/dataframe.py modin/pandas/series.py \
            modin/pandas/groupby.py \
            modin/pandas/series_utils.py modin/pandas/general.py \
            modin/pandas/plotting.py modin/pandas/utils.py \
            modin/pandas/iterator.py modin/pandas/indexing.py \
      - run: python scripts/doc_checker.py modin/core/dataframe
      - run: python scripts/doc_checker.py modin/core/execution/dask
      - run: |
          python scripts/doc_checker.py \
            modin/pandas/accessor.py modin/pandas/general.py \
            modin/pandas/groupby.py modin/pandas/indexing.py \
            modin/pandas/iterator.py modin/pandas/plotting.py \
            modin/pandas/series_utils.py modin/pandas/utils.py \
            modin/pandas/base.py \
            modin/pandas/io.py \
            asv_bench/benchmarks/utils \
            asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \
            asv_bench/benchmarks/scalability/__init__.py \
            modin/core/io \
            modin/pandas/series.py \
            modin/core/execution/python \
            modin/pandas/dataframe.py \
            modin/config/__init__.py \
            modin/config/__main__.py \
            modin/config/envvars.py \
            modin/config/pubsub.py
      - run: python scripts/doc_checker.py modin/distributed
      - run: python scripts/doc_checker.py modin/utils.py
      - run: python scripts/doc_checker.py modin/experimental/sklearn
      - run: |
          python scripts/doc_checker.py modin/experimental/xgboost/__init__.py \
            modin/experimental/xgboost/utils.py modin/experimental/xgboost/xgboost.py \
            modin/experimental/xgboost/xgboost_ray.py
      - run: python scripts/doc_checker.py modin/core/execution/ray
      - run: |
          python scripts/doc_checker.py modin/core/execution/dispatching/factories/factories.py \
            modin/core/execution/dispatching/factories/dispatcher.py                            \
      - run: python scripts/doc_checker.py scripts/doc_checker.py
      - run: |
          python scripts/doc_checker.py modin/experimental/pandas/io.py \
            modin/experimental/pandas/__init__.py
      - run: python scripts/doc_checker.py modin/core/storage_formats/base
      - run: python scripts/doc_checker.py modin/core/storage_formats/pandas
      - run: python scripts/doc_checker.py modin/experimental/batch/pipeline.py
      - run: python scripts/doc_checker.py modin/logging

  lint-black-isort:
    name: lint (black and isort)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/python-only
      - run: pip install black>=24.1.0 isort>=5.12
      # NOTE: keep the black command here in sync with the pre-commit hook in
      # /contributing/pre-commit
      - run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py
      - run: isort . --check-only


================================================
FILE: .github/workflows/ci.yml
================================================
name: ci
on:
  pull_request:
    paths:
      # NOTE: keep these paths in sync with the paths that trigger the
      # fuzzydata Github Actions in .github/workflows/fuzzydata-test.yml
      - .github/workflows/**
      - .github/actions/**
      - '!.github/workflows/push-to-main.yml'
      - asv_bench/**
      - modin/**
      - requirements/**
      - scripts/**
      - environment-dev.yml
      - requirements-dev.txt
      - setup.cfg
      - setup.py
      - versioneer.py
  push:
  schedule:
    - cron: "30 2 * * WED"
    - cron: "30 2 * * THU"
concurrency:
  # Cancel other jobs in the same branch. We don't care whether CI passes
  # on old commits.
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
  MODIN_GITHUB_CI: true

jobs:
  python-filter:
    runs-on: ubuntu-latest
    outputs:
      python-version: ${{ steps.choose.outputs.python-version }}
    steps:
    - id: choose
      run: |
        if [[ "${{ github.event.schedule }}" = "30 2 * * WED" ]]
        then
          echo "python-version=3.10" >> "$GITHUB_OUTPUT"
        elif [[ "${{ github.event.schedule }}" = "30 2 * * THU" ]]
        then
          echo "python-version=3.11" >> "$GITHUB_OUTPUT"
        else
          echo "python-version=3.9" >> "$GITHUB_OUTPUT"
        fi

  lint-mypy:
    needs: [python-filter]
    name: lint (mypy)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/python-only
        with:
          python-version: ${{ needs.python-filter.outputs.python-version }}
      - run: pip install -r requirements-dev.txt
      - run: mypy --config-file mypy.ini

  lint-flake8:
    needs: [python-filter]
    name: lint (flake8)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/python-only
        with:
          python-version: ${{ needs.python-filter.outputs.python-version }}
      # NOTE: If you are changing the set of packages installed here, make sure that
      # the dev requirements match them.
      - run: pip install flake8 flake8-print flake8-no-implicit-concat
      # NOTE: keep the flake8 command here in sync with the pre-commit hook in
      # /contributing/pre-commit
      - run: flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py

  test-api-and-no-engine:
    needs: [python-filter]
    name: Test API, headers and no-engine mode
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: requirements/requirements-no-engine.yml
          python-version: ${{ needs.python-filter.outputs.python-version }}
      - run: python -m pytest modin/tests/pandas/test_api.py
      - run: python -m pytest modin/tests/test_executions_api.py
      - run: python -m pytest modin/tests/test_headers.py
      - run: python -m pytest modin/tests/core/test_dispatcher.py::test_add_option
      - uses: ./.github/actions/upload-coverage

  test-clean-install:
    needs: [lint-flake8, python-filter]
    strategy:
      matrix:
        os:
          - ubuntu
          - windows
    runs-on: ${{ matrix.os }}-latest
    defaults:
      run:
        shell: bash -l {0}
    name: test-clean-install-${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/python-only
        with:
          python-version: ${{ needs.python-filter.outputs.python-version }}
      - run: python -m pip install -e ".[all]"
      - name: Ensure Ray and Dask engines start up
        run: |
          MODIN_ENGINE=dask python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
          MODIN_ENGINE=ray python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
      - name: Ensure MPI engine start up
        # Install a working MPI implementation beforehand so mpi4py can link to it
        run: |
          sudo apt-get update
          sudo apt-get install software-properties-common

          sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu jammy main universe restricted multiverse"
          sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu jammy-updates main universe restricted multiverse"
          sudo add-apt-repository "deb http://security.ubuntu.com/ubuntu jammy-security main universe restricted multiverse"
          sudo apt-get update

          sudo apt-get install libmpich-dev=4.0-3 libmpich12=4.0-3 mpich=4.0-3
          python -m pip install -e ".[mpi]"
          # mpi4py 4.1 does not work with the mpich versions above.
          # TODO(https://github.com/modin-project/modin/issues/7615): figure out
          # the correct libmpich versions for mpi4py >= 4.1
          python -m pip install "mpi4py<4.1"
          MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
        if: matrix.os == 'ubuntu'

  test-internals:
    needs: [lint-flake8, python-filter]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    name: test-internals
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
          python-version: ${{ needs.python-filter.outputs.python-version }}
      - name: Internals tests
        run: python -m pytest modin/tests/core/test_dispatcher.py
      - run: python -m pytest modin/tests/config
      - run: python -m pytest modin/tests/test_envvar_catcher.py
      - run: python -m pytest modin/tests/core/storage_formats/base/test_internals.py
      - run: python -m pytest modin/tests/core/storage_formats/pandas/test_internals.py
      - run: python -m pytest modin/tests/test_envvar_npartitions.py
      - run: python -m pytest modin/tests/test_utils.py
      - run: python -m pytest asv_bench/test/test_utils.py
      - run: python -m pytest modin/tests/interchange/dataframe_protocol/base
      - run: python -m pytest modin/tests/test_dataframe_api_standard.py
      - run: python -m pytest modin/tests/test_logging.py
      - run: python -m pytest modin/tests/test_metrics.py
      - run: python -m pytest modin/tests/pandas/extensions
      - uses: ./.github/actions/upload-coverage

  test-defaults:
    needs: [lint-flake8, python-filter]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        execution: [BaseOnPython]
    env:
      MODIN_TEST_DATASET_SIZE: "small"
    name: Test ${{ matrix.execution }} execution, Python ${{ needs.python-filter.outputs.python-version }}"
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
          python-version: ${{ needs.python-filter.outputs.python-version }}
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
      - name: xgboost tests
        run: |
          # TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost
          # when we use collective instead of rabit.
          # Per the thread https://github.com/conda-forge/miniforge/issues/513,
          # remove unused conda packages and caches to avoid `Found incorrect
          # download: joblib` error from mamba.
          mamba clean --all
          mamba install "xgboost>=1.7.1,<2.0.0" scikit-learn -c conda-forge
          python -m pytest modin/tests/experimental/xgboost/test_default.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/tests/core/storage_formats/base/test_internals.py --execution=${{ matrix.execution }}
      - uses: ./.github/actions/run-core-tests
        with:
          runner: python -m pytest --execution=${{ matrix.execution }}
      - uses: ./.github/actions/upload-coverage

  test-asv-benchmarks:
    if: github.event_name == 'pull_request'
    needs: [lint-flake8]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    env:
      MODIN_ENGINE: ray
      MODIN_MEMORY: 1000000000
      MODIN_TEST_DATASET_SIZE: small
    name: test-asv-benchmarks
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 1
      - uses: conda-incubator/setup-miniconda@v3
        with:
          auto-activate-base: true
          activate-environment: ""
          miniforge-variant: Miniforge3
          miniforge-version: latest
          use-mamba: true
      - name: Running benchmarks
        run: |
          git remote add upstream https://github.com/modin-project/modin.git
          git fetch upstream
          if git diff upstream/main --name-only | grep -q "^asv_bench/"; then
              cd asv_bench

              mamba env create -f ../environment-dev.yml
              conda activate modin
              pip install ..

              asv machine --yes

              # check Modin on Ray
              asv run --quick --dry-run --python=same --strict --show-stderr --launch-method=spawn \
                -b ^benchmarks -b ^io -b ^scalability | tee benchmarks.log

              # check pure pandas
              MODIN_ASV_USE_IMPL=pandas asv run --quick --dry-run --python=same --strict --show-stderr --launch-method=spawn \
                -b ^benchmarks -b ^io | tee benchmarks.log
          else
              echo "Benchmarks did not run, no changes detected"
          fi
        if: always()

      - name: Publish benchmarks artifact
        uses: actions/upload-artifact@v4
        with:
          name: Benchmarks log
          path: asv_bench/benchmarks.log
          include-hidden-files: true
        if: failure()

  execution-filter:
    # Choose which executions we want to run all tests for on a pull request.
    # We always test 'native' and 'python' executions completely because they
    # are fast, but we only test ray, dask, and unidist, if we think this pull
    # request is affecting how we execute with those engines specifically.
    runs-on: ubuntu-latest
    outputs:
      ray: ${{ steps.filter.outputs.ray }}
      dask: ${{ steps.filter.outputs.dask }}
      unidist: ${{ steps.filter.outputs.unidist }}
      engines: ${{ steps.engines.outputs.engines }}
      experimental: ${{ steps.experimental.outputs.experimental }}
    steps:
    - uses: actions/checkout@v4
    - uses: dorny/paths-filter@v3
      id: filter
      with:
        filters: |
          shared: &shared
            - 'modin/core/execution/dispatching/**'
          ray:
            - *shared
            - 'modin/core/execution/ray/**'
          dask:
            - *shared
            - 'modin/core/execution/dask/**'
          unidist:
            - *shared
            - 'modin/core/execution/unidist/**'
          experimental:
            - 'modin/experimental/**'
    - uses: actions/setup-python@v5
    - id: engines
      run: |
        python -c "import sys, json; print('engines=' + json.dumps(['python', 'native'] + (sys.argv[1] == 'true' and ['ray'] or []) + (sys.argv[2] == 'true' and ['dask'] or []) ))" \
              "${{ steps.filter.outputs.ray }}" "${{ steps.filter.outputs.dask }}" >> $GITHUB_OUTPUT

  test-all-unidist:
    needs: [lint-flake8, execution-filter, python-filter]
    if: github.event_name == 'push' || needs.execution-filter.outputs.unidist == 'true'
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: [ "${{ needs.python-filter.outputs.python-version }}" ]
        unidist-backend: ["mpi"]
    env:
      MODIN_ENGINE: "Unidist"
      UNIDIST_BACKEND: ${{matrix.unidist-backend}}
      # Only test reading from SQL server and postgres on ubuntu for now.
      # Eventually, we should test on Windows, too, but we will have to set up
      # the servers differently.
      MODIN_TEST_READ_FROM_SQL_SERVER: true
      MODIN_TEST_READ_FROM_POSTGRES: true
    name: test-ubuntu (engine unidist ${{matrix.unidist-backend}}, python ${{matrix.python-version}})
    services:
      moto:
        image: motoserver/moto:5.0.13
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: requirements/env_unidist_linux.yml
          activate-environment: modin_on_unidist
          python-version: ${{matrix.python-version}}
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
      - name: Set up postgres
        # Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from
        # https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3
        run: |
          sudo docker pull postgres
          sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres
      - run: mpiexec -n 1 python -m pytest modin/tests/pandas/internals/test_benchmark_mode.py
      - run: mpiexec -n 1 python -m pytest modin/tests/test_partition_api.py
      - uses: ./.github/actions/run-core-tests
        with:
          runner: mpiexec -n 1 python -m pytest
          parallel: ""
      - run: mpiexec -n 1 python -m pytest modin/tests/numpy
      - run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh
      - run: ./.github/workflows/sql_server/set_up_sql_server.sh
      # need an extra argument "genv" to set environment variables for mpiexec. We need
      # these variables to test writing to the mock s3 filesystem.
      - uses: nick-fields/retry@v3
        # to avoid issues with non-stable `to_csv` tests for unidist on MPI backend.
        # for details see: https://github.com/modin-project/modin/pull/6776
        with:
          timeout_minutes: 15
          max_attempts: 3
          command: |
            conda run --no-capture-output -n modin_on_unidist mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key \
              -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/tests/pandas/test_io.py --verbose
      - run: |
          mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret \
            python -m pytest modin/tests/experimental/test_io_exp.py
      - run: mpiexec -n 1 python -m pytest modin/tests/interchange/dataframe_protocol/test_general.py
      - run: mpiexec -n 1 python -m pytest modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py
      - run: |
          python -m pip install lazy_import
          mpiexec -n 1 python -m pytest modin/tests/pandas/integrations/
      - uses: ./.github/actions/upload-coverage

  test-all:
    needs: [lint-flake8, execution-filter, python-filter]
    strategy:
      matrix:
        os:
          - ubuntu
          - windows
        python-version: [ "${{ needs.python-filter.outputs.python-version }}" ]
        # On push, run the tests for all engines. Otherwise, for pull requests,
        # only run tests for engines that depend on files changed in this PR.
        engine: ${{ fromJSON( (github.event_name == 'push' && '["python", "ray", "dask", "native"]') || needs.execution-filter.outputs.engines ) }}
        test_task:
          - group_1
          - group_2
          - group_3
          - group_4
        exclude: # python and native engines only have one task group that contains all the tests
          - engine: "python"
            test_task: "group_2"
          - engine: "native"
            test_task: "group_2"
          - engine: "python"
            test_task: "group_3"
          - engine: "native"
            test_task: "group_3"
          - engine: "python"
            test_task: "group_4"
          - engine: "native"
            test_task: "group_4"
    runs-on: ${{ matrix.os }}-latest
    defaults:
      run:
        shell: bash -l {0}
    env:
      MODIN_ENGINE: ${{matrix.engine}}
      # Only test reading from SQL server and postgres on ubuntu for now.
      # Eventually, we should test on Windows, too, but we will have to set up
      # the servers differently.
      MODIN_TEST_READ_FROM_SQL_SERVER: ${{ matrix.os == 'ubuntu' }}
      MODIN_TEST_READ_FROM_POSTGRES: ${{ matrix.os == 'ubuntu' }}
    name: test-${{ matrix.os }} (engine ${{matrix.engine}}, python ${{matrix.python-version}}, ${{matrix.test_task}})
    services:
      # Using workaround https://github.com/actions/runner/issues/822#issuecomment-1524826092
      moto:
        # we only need moto service on Ubuntu and for group_4 task, or for native or python engine.
        image: ${{ (matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4')) && 'motoserver/moto:5.0.13' || '' }}
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - name: Set native storage format
        run: echo "MODIN_STORAGE_FORMAT=Native" >> $GITHUB_ENV
        if: matrix.engine == 'native'
      - name: Limit ray memory
        run: echo "MODIN_MEMORY=1000000000" >> $GITHUB_ENV
        if: matrix.os == 'ubuntu' && matrix.engine == 'ray'
      - name: Tell Modin to use existing ray cluster
        run: echo "MODIN_RAY_CLUSTER=True" >> $GITHUB_ENV
        if: matrix.os == 'windows' && matrix.engine == 'ray'
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
          python-version: ${{matrix.python-version}}
      - name: Start local ray cluster
        # Try a few times to start ray to work around
        # https://github.com/modin-project/modin/issues/4562
        uses: nick-fields/retry@v3
        with:
          timeout_minutes: 5
          max_attempts: 5
          command: ray start --head --port=6379 --object-store-memory=1000000000
        if: matrix.os == 'windows' && matrix.engine == 'ray'
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
        if: matrix.os == 'ubuntu'
      - name: Set up postgres
        # Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from
        # https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3
        run: |
          sudo docker pull postgres
          sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres
        if: matrix.os == 'ubuntu'

    # BEGIN partitioned execution tests. We run these tests along with group 1,
    # or if we are on the "python" engine, which only has a single group. We
    # skip these tests on the "native" engine, which does not use partitions.

      - run: python -m pytest modin/tests/pandas/internals/test_benchmark_mode.py
        if: matrix.engine != 'native' && (matrix.engine == 'python' || matrix.test_task == 'group_1')
      - run: python -m pytest modin/tests/test_partition_api.py
        # Skip this test for python because we do not define unwrap_partitions()
        # for python execution.
        if: matrix.engine != 'native' && matrix.engine != 'python' && matrix.test_task == 'group_1'
      - name: xgboost tests
        run: |
          # TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost
          # when we use collective instead of rabit.
          mamba install "xgboost>=1.7.1,<2.0.0" scikit-learn -c conda-forge
          python -m pytest -n 2 \
                  modin/tests/experimental/xgboost/test_default.py \
                  modin/tests/experimental/xgboost/test_xgboost.py \
                  modin/tests/experimental/xgboost/test_dmatrix.py
        if: matrix.engine != 'native' && matrix.os != 'windows' && (matrix.engine == 'python' || matrix.test_task == 'group_1')
      - run: python -m pytest -n 2 modin/tests/experimental/test_pipeline.py
        if: matrix.engine != 'native'  && (matrix.engine == 'python' || matrix.test_task == 'group_1')


    # END partitioned execution tests.


    # BEGIN test groups.
    # Run all the tests in the corresponding group for this instance of the
    # test matrix. For example, if we are in the matrix's 'group_4', run the
    # tests for 'group_4'. For each of 'native' and 'python' engines,  we run
    # all tests in a single job, so we ignore the grouping.

      - uses: ./.github/actions/run-core-tests/group_1
        with:
          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.
          # See https://github.com/modin-project/modin/issues/7387.
          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_1'
      - uses: ./.github/actions/run-core-tests/group_2
        with:
          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.
          # See https://github.com/modin-project/modin/issues/7387.
          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_2'
      - uses: ./.github/actions/run-core-tests/group_3
        with:
          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.
          # See https://github.com/modin-project/modin/issues/7387.
          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_3'
      - uses: ./.github/actions/run-core-tests/group_4
        with:
          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.
          # See https://github.com/modin-project/modin/issues/7387.
          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/tests/numpy
        # Native execution does not support the modin Numpy API.
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'

    # END test groups.


    # BEGIN some tests that we run along with group 4 for engines other than
    # 'native' and 'python'. 'native' and 'python' jobs will run these tests
    # along with all other tests in a single group.

      - run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh
        if: matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4')
      - run: ./.github/workflows/sql_server/set_up_sql_server.sh
        if: matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4')
      # Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail.
      - run: python -m pytest modin/tests/pandas/test_io.py --verbose
        timeout-minutes: 60
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'
      - run: python -m pytest modin/tests/experimental/test_io_exp.py
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'
      - run: python -m pytest modin/tests/interchange/dataframe_protocol/test_general.py
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'
      - run: python -m pytest modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'
      - run: python -m pytest modin/tests/polars/test_dataframe.py
      - run: |
          python -m pip install lazy_import
          python -m pytest modin/tests/pandas/integrations/
        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'


    # END tests that run on group 4, or in the single group for 'native' and
    # python' engines.

      - uses: ./.github/actions/upload-coverage
      - name: Stop local ray cluster
        run: ray stop
        if: matrix.os == 'windows' && matrix.engine == 'ray'

  test-sanity:
    # The "sanity" tests run on each pull request to test that a subset of the
    # full tests work with the slower engines (ray, dask, and unidist-MPI).
    needs: [lint-flake8, execution-filter, python-filter]
    # If we don't need to run any sanity tests, the job matrix that we generate
    # here gives a single job with all the matrix fields empty (that is, os,
    # execution, etc. are not set, so we treat them as "").
    # so, if the matrix is going to be empty, we need to skip this job
    # completely. This bizarre behavior is not in the official documentation,
    # of GitHub actions matrices, but someone does mention it here:
    # https://stackoverflow.com/a/77118991
    if: |
      github.event_name == 'pull_request' &&
      (
        needs.execution-filter.outputs.ray != 'true' ||
        needs.execution-filter.outputs.dask != 'true' ||
        needs.execution-filter.outputs.unidist != 'true'
      )
    strategy:
      matrix:
        os:
          - ubuntu
          - windows
        python-version: [ "${{ needs.python-filter.outputs.python-version }}" ]
        running-all-ray-tests: [ "${{ needs.execution-filter.outputs.ray }}" ]
        running-all-dask-tests: [ "${{needs.execution-filter.outputs.dask}}" ]
        running-all-unidist-tests: [ "${{needs.execution-filter.outputs.unidist}}" ]
        execution: [ray, dask, unidist]
        # If we're going to run all ray tests because we've detected a
        # change to the ray engine, we don't need to run these sanity tests
        # on ray. Likewise for dask and unidist.
        exclude:
          - running-all-ray-tests: 'true'
            execution: ray
          - running-all-dask-tests: 'true'
            execution: dask
          - running-all-unidist-tests: 'true'
            execution: unidist
    runs-on: ${{ matrix.os }}-latest
    defaults:
      run:
        shell: bash -l {0}
    env:
      MODIN_ENGINE: ${{ matrix.execution }}
      UNIDIST_BACKEND: "mpi"
      PARALLEL: ${{ matrix.execution != 'unidist' && matrix.os != 'windows' && '-n 2' || '' }}
      PYTEST_COMMAND: >-
        ${{
          (
            (matrix.execution == 'ray' || matrix.execution == 'dask') &&
            'python -m pytest'
          ) ||
          (
            matrix.execution == 'unidist' &&
            'mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest'
          ) ||
          'UNKNOWN_PYTEST_COMMAND'
        }}
    name: test-${{ matrix.os }}-sanity (engine ${{ matrix.execution }}, python ${{matrix.python-version}})
    services:
      moto:
        image: ${{ matrix.os != 'windows' && 'motoserver/moto:5.0.13' || '' }}
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: ${{ matrix.os == 'ubuntu' && matrix.execution == 'unidist' && 'requirements/env_unidist_linux.yml' || matrix.os == 'windows' && matrix.execution == 'unidist' && 'requirements/env_unidist_win.yml' || 'environment-dev.yml' }}
          activate-environment: ${{ matrix.execution == 'unidist' && 'modin_on_unidist' || 'modin' }}
          python-version: ${{matrix.python-version}}
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
        if: matrix.os != 'windows'
      - name: Limit ray memory
        run: echo "MODIN_MEMORY=1000000000" >> $GITHUB_ENV
        if: matrix.os != 'windows' && matrix.execution == 'ray'
      - name: Tell Modin to use existing ray cluster
        run: echo "MODIN_RAY_CLUSTER=True" >> $GITHUB_ENV
        if: matrix.os == 'windows' && matrix.execution == 'ray'
      - name: Start local ray cluster
        # Try a few times to start ray to work around
        # https://github.com/modin-project/modin/issues/4562
        uses: nick-fields/retry@v3
        with:
          timeout_minutes: 5
          max_attempts: 5
          command: ray start --head --port=6379 --object-store-memory=1000000000
        if: matrix.os == 'windows' && matrix.execution == 'ray'
      - run: MODIN_BENCHMARK_MODE=True $PYTEST_COMMAND modin/tests/pandas/internals/test_benchmark_mode.py
      - run: $PYTEST_COMMAND $PARALLEL modin/tests/test_partition_api.py
      - run: $PYTEST_COMMAND modin/tests/pandas/extensions
      - name: xgboost tests
        run: |
          # TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost
          # when we use collective instead of rabit.
          mamba install "xgboost>=1.7.1,<2.0.0" scikit-learn -c conda-forge
          $PYTEST_COMMAND $PARALLEL \
                  modin/tests/experimental/xgboost/test_default.py \
                  modin/tests/experimental/xgboost/test_xgboost.py \
                  modin/tests/experimental/xgboost/test_dmatrix.py
        if: matrix.os != 'windows' && needs.execution-filter.outputs.experimental == 'true'
      - run: $PYTEST_COMMAND $PARALLEL modin/tests/experimental/test_pipeline.py
        if: matrix.os != 'windows' && matrix.execution != 'unidist' && needs.execution-filter.outputs.experimental == 'true'
      - name: "test DF: binary, default, iter"
        run: |
          $PYTEST_COMMAND $PARALLEL \
                  modin/tests/pandas/dataframe/test_binary.py \
                  modin/tests/pandas/dataframe/test_default.py \
                  modin/tests/pandas/dataframe/test_iter.py
        if: matrix.os != 'windows'
      - name: "test DF: reduce, udf, window, pickle"
        run: |
          $PYTEST_COMMAND $PARALLEL \
                  modin/tests/pandas/dataframe/test_reduce.py \
                  modin/tests/pandas/dataframe/test_udf.py \
                  modin/tests/pandas/dataframe/test_window.py \
                  modin/tests/pandas/dataframe/test_pickle.py
        if: matrix.os != 'windows'
      - run: $PYTEST_COMMAND modin/tests/pandas/test_series.py
        if: matrix.execution == 'ray'
      - run: $PYTEST_COMMAND -m "not exclude_in_sanity" modin/tests/pandas/test_series.py
        if: matrix.execution != 'ray'
      - run: $PYTEST_COMMAND modin/tests/pandas/dataframe/test_map_metadata.py
        if: matrix.execution == 'ray'
      - run: $PYTEST_COMMAND -m "not exclude_in_sanity" modin/tests/pandas/dataframe/test_map_metadata.py
        if: matrix.execution != 'ray'
      - name: "test rolling, expanding, reshape, general, concat"
        run: |
          $PYTEST_COMMAND $PARALLEL \
                  modin/tests/pandas/test_rolling.py \
                  modin/tests/pandas/test_expanding.py \
                  modin/tests/pandas/test_reshape.py \
                  modin/tests/pandas/test_general.py \
                  modin/tests/pandas/test_concat.py
        if: matrix.os != 'windows'
      - run: $PYTEST_COMMAND $PARALLEL modin/tests/numpy
      - run: $PYTEST_COMMAND -m "not exclude_in_sanity" modin/tests/pandas/test_io.py --verbose
        if: matrix.execution != 'unidist'
      - uses: nick-fields/retry@v3
        # to avoid issues with non-stable `to_csv` tests for unidist on MPI backend.
        # for details see: https://github.com/modin-project/modin/pull/6776
        with:
          timeout_minutes: 15
          max_attempts: 3
          command: conda run --no-capture-output -n modin_on_unidist $PYTEST_COMMAND -m "not exclude_in_sanity" modin/tests/pandas/test_io.py --verbose
        if: matrix.execution == 'unidist'
      - run: $PYTEST_COMMAND modin/tests/experimental/test_io_exp.py
      - run: $PYTEST_COMMAND $PARALLEL modin/tests/interchange/dataframe_protocol/test_general.py
      - run: $PYTEST_COMMAND $PARALLEL modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py
      - name: Stop local ray cluster
        run: ray stop
        if: matrix.os == 'windows' && matrix.execution == 'ray'
      - uses: ./.github/actions/upload-coverage

  test-experimental:
    needs: [lint-flake8, python-filter]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    env:
      MODIN_ENGINE: "python"
      MODIN_EXPERIMENTAL: "True"
    name: test experimental
    services:
      moto:
        image: motoserver/moto:5.0.13
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
          python-version: ${{ needs.python-filter.outputs.python-version }}
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
      - run: python -m pytest -n 2 modin/tests/pandas/dataframe/test_map_metadata.py
      - run: python -m pytest -n 2 modin/tests/pandas/test_series.py
      # Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail.
      - run: python -m pytest modin/tests/pandas/test_io.py --verbose
      - uses: ./.github/actions/upload-coverage

  test-spreadsheet:
    needs: [lint-flake8, python-filter]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: [ "${{ needs.python-filter.outputs.python-version }}" ]
        engine: ["ray", "dask"]
    env:
      MODIN_EXPERIMENTAL: "True"
      MODIN_ENGINE: ${{matrix.engine}}
    name: test-spreadsheet (engine ${{matrix.engine}}, python ${{matrix.python-version}})
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
          python-version: ${{matrix.python-version}}
      - run: python -m pytest modin/tests/experimental/spreadsheet/test_general.py

  test-native-dataframe-interoperability:
    needs: [ lint-flake8]
    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' }}
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: ["3.9"]
    env:
      # Test interoperability between PandasOnPython dataframes/series and
      # native dataframes/series.
      MODIN_ENGINE: "Python"
    name: test-native-dataframe-interoperability python ${{matrix.python-version}})
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
          python-version: ${{matrix.python-version}}
      - run: python -m pytest modin/tests/pandas/native_df_interoperability/ -n 2
      - uses: ./.github/actions/upload-coverage

  merge-coverage-artifacts:
    needs: [test-internals, test-api-and-no-engine, test-defaults, test-all-unidist, test-all, test-experimental, test-sanity, test-native-dataframe-interoperability]
    if: always()  # we need to run it regardless of some job being skipped, like in PR
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    steps:
      - name: Merge Artifacts
        uses: actions/upload-artifact/merge@v4
        with:
          name: coverage-data
          pattern: coverage-data-*
          include-hidden-files: true
          delete-merged: true

  upload-coverage:
    needs: [merge-coverage-artifacts, python-filter]
    if: always()  # we need to run it regardless of some job being skipped, like in PR
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/python-only
        with:
          python-version: ${{ needs.python-filter.outputs.python-version }}
      - name: Download coverage data
        uses: actions/download-artifact@v4
        with:
          name: coverage-data
      - run: pip install coverage
      - name: Combine coverage
        run: python -m coverage combine
      - name: Generate coverage report in xml format
        run: python -m coverage xml
      - uses: codecov/codecov-action@v4
        with:
          fail_ci_if_error: ${{ github.event_name == 'push' }}  # do not care about uploads in PR
          token: ${{ secrets.CODECOV_TOKEN }} # this token is available at https://app.codecov.io/account/github/modin-project/


================================================
FILE: .github/workflows/codeql/codeql-config.yml
================================================
name: "Modin CodeQL config"

paths:
  - modin/** 


================================================
FILE: .github/workflows/codeql.yml
================================================
name: "CodeQL"

on:
  push:
    branches: [ "main" ]
  pull_request:
    branches: [ "main" ]

concurrency:
  # Cancel other jobs in the same branch. We don't care whether CI passes
  # on old commits.
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
  MODIN_GITHUB_CI: true
jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write

    strategy:
      fail-fast: false
      matrix:
        language: [ python ]

    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - name: Initialize CodeQL
        uses: github/codeql-action/init@v3
        with:
          languages: ${{ matrix.language }}
          queries: +security-and-quality
          config-file: ./.github/workflows/codeql/codeql-config.yml

      - name: Autobuild
        uses: github/codeql-action/autobuild@v3

      - name: Perform CodeQL Analysis
        uses: github/codeql-action/analyze@v3
        with:
          category: "/language:${{ matrix.language }}"


================================================
FILE: .github/workflows/fuzzydata-test.yml
================================================
name: fuzzy
on:
  pull_request:
    paths:
      # NOTE: keep these paths in sync with the paths that trigger the CI Github
      # Actions in .github/workflows/ci.yml
      - .github/workflows/**
      - '!.github/workflows/push-to-main.yml'
      - asv_bench/**
      - modin/**
      - requirements/**
      - scripts/**
      - environment-dev.yml
      - requirements-dev.txt
      - setup.cfg
      - setup.py
      - versioneer.py
concurrency:
  # Cancel other jobs in the same branch. We don't care whether CI passes
  # on old commits.
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
  MODIN_GITHUB_CI: true
jobs:
  test-fuzzydata:
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: ["3.9"]
        engine: ["ray", "dask"]
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
          python-version: ${{matrix.python-version}}
      - name: test-fuzzydata (engine ${{matrix.engine}}, python ${{matrix.python-version}})
        run: python -m pytest modin/tests/experimental/test_fuzzydata.py -Wignore::UserWarning --log-file=/tmp/fuzzydata-test-wf-${{matrix.engine}}/run.log --log-file-level=INFO
        env:
          MODIN_ENGINE: ${{matrix.engine}}
      - uses: actions/upload-artifact@v4
        if: success() || failure()
        with:
           name: fuzzydata-test-workflow-${{matrix.engine}}
           path: /tmp/fuzzydata-test-wf-${{matrix.engine}}/* # Must match output dir in test_fuzzydata.py
           if-no-files-found: error
           include-hidden-files: true


================================================
FILE: .github/workflows/publish-to-pypi.yml
================================================
name: Publish Modin wheel to PyPI

on:
  schedule:
    - cron: "42 0 * * WED"
  push:
    tags:        
      - '*'
  workflow_dispatch:

jobs:
  build-n-publish:
    name: Build and publish Modin wheel to PyPI
    environment: release
    runs-on: ubuntu-latest
    permissions:
      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing

    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0
        fetch-tags: true
    - name: Checkout latest git tag
      run: git checkout $(git describe --tags "$(git rev-list --tags --max-count=1)")
      if: github.event_name == 'push'
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: "3.9.x"

    - name: Install/update tools
      run: python3 -m pip install --upgrade build wheel
    - name: Build a pure Python wheel
      run: python3 setup.py sdist bdist_wheel

    - uses: actions/upload-artifact@v4
      with:
        name: modin-wheel-and-source-tarball 
        path: ./dist/
        include-hidden-files: true

    - name: Publish Modin wheel to PyPI
      if: github.event_name == 'push'
      uses: pypa/gh-action-pypi-publish@release/v1


================================================
FILE: .github/workflows/push-to-main.yml
================================================
name: push-to-main
on:
  push:
    branches:
      - main
concurrency:
  # Cancel other jobs in the same branch. We don't care whether CI passes
  # on old commits.
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
  MODIN_GITHUB_CI: true
jobs:
  test-ray-master:
    runs-on: ubuntu-latest
    defaults:
      run:
        # `shell: bash -l {0}` - special way to activate modin environment
        shell: bash -l {0}
    services:
      moto:
        image: motoserver/moto:5.0.13
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
      - name: install Ray nightly build
        # Use --force-reinstall to always reinstall ray and its dependencies.
        # botocore isn't compatible with urllib3>=2; see #6094 for details
        run: pip install --force-reinstall "urllib3<2" https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
      - name: Conda environment
        run: |
          conda info
          conda list
      - run: sudo apt update && sudo apt install -y libhdf5-dev
      - name: Run parallelizable Modin Tests
        run: >
          python -m pytest -n 2
          modin/tests/pandas/dataframe/test_binary.py
          modin/tests/pandas/dataframe/test_default.py
          modin/tests/pandas/dataframe/test_indexing.py
          modin/tests/pandas/dataframe/test_iter.py
          modin/tests/pandas/dataframe/test_join_sort.py
          modin/tests/pandas/dataframe/test_map_metadata.py
          modin/tests/pandas/dataframe/test_reduce.py
          modin/tests/pandas/dataframe/test_udf.py
          modin/tests/pandas/dataframe/test_window.py
          modin/tests/pandas/test_series.py
          modin/tests/numpy/test_array.py
          modin/tests/numpy/test_array_creation.py
          modin/tests/numpy/test_array_arithmetic.py
          modin/tests/numpy/test_array_axis_functions.py
          modin/tests/numpy/test_array_logic.py
          modin/tests/numpy/test_array_linalg.py
          modin/tests/numpy/test_array_indexing.py
          modin/tests/numpy/test_array_math.py
          modin/tests/numpy/test_array_shaping.py
          modin/tests/pandas/test_rolling.py
          modin/tests/pandas/test_expanding.py
          modin/tests/pandas/test_concat.py
          modin/tests/pandas/test_groupby.py
          modin/tests/pandas/test_reshape.py
          modin/tests/pandas/test_general.py
      - name: Run non-parallelizable Modin Tests
        run: >
          python -m pytest
          modin/tests/pandas/test_io.py
          modin/tests/experimental/test_io_exp.py

  test-docs:
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    name: test docs
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/mamba-env
        with:
          environment-file: environment-dev.yml
      - run: sudo apt update && sudo apt install -y libhdf5-dev
      - name: Docstring URL validity check
        run: python -m pytest modin/tests/test_docstring_urls.py


================================================
FILE: .github/workflows/sql_server/set_up_sql_server.sh
================================================
# This script sets up a SQL server listening at 0.0.0.0:1234.

# If any step fails, we can't set up a valid SQL server for unit tests.
set -e

# Pull the 2019 SQL server docker container image by following:
# https://docs.microsoft.com/en-us/sql/linux/quickstart-install-connect-docker?view=sql-server-ver15&pivots=cs1-powershell#pullandrun2019
sudo docker pull mcr.microsoft.com/mssql/server:2019-latest
sudo docker run -d --name example_sql_server -e 'ACCEPT_EULA=Y' -e 'SA_PASSWORD=Strong.Pwd-123' -p 1433:1433 mcr.microsoft.com/mssql/server:2019-latest


# Wait 10 seconds because if we don't the server typically will not be ready
# to accept connections by the time we want to make them.
sleep 10


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
scripts/gh-users-cache.json

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/
docs/flow/modin/configs_help.csv

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
*.DS_Store

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# vscode settings
.vscode/

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Cscope and Tags
tags
cscope.files
cscope.out

# PYTest Benchmarks
.benchmarks/

# Dask workspace
dask-worker-space/
node_modules

# Asv stuff
asv_bench/.asv/
asv_bench/modin/

# Sublime stuff
*.sublime-workspace
*.sublime-project


================================================
FILE: .readthedocs.yaml
================================================
# .readthedocs.yaml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Set the version of Python and other tools you might need
build:
  os: ubuntu-20.04
  tools:
    python: "3.9"

# Build documentation in the docs/ directory with Sphinx
sphinx:
   configuration: docs/conf.py

formats: all

python:
   install:
   - requirements: docs/requirements-doc.txt


================================================
FILE: CODEOWNERS
================================================
# These owners will be the default owners for everything in
# the repo unless a later match takes precedence,
*    @modin-project/modin-core @devin-petersohn @mvashishtha @RehanSD @YarShev @vnlitvinov @anmyachev @dchigarev


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
 advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
 address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
 professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at conduct@gr-oss.io. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


# Certain code used and distributed in this package is forked from pandas
# (https://github.com/pandas-dev/pandas). The pandas LICENSE
# below applies to those certain forked components in this project:

BSD 3-Clause License

Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.

Copyright (c) 2011-2025, Open source contributors.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
  contributors may be used to endorse or promote products derived from
  this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: LICENSE_HEADER
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: MANIFEST.in
================================================
include versioneer.py
include modin/_version.py
include modin/tests/pandas/data/*.csv


================================================
FILE: NOTICE
================================================
Modin

Copyright (c) 2018-2024 Modin Developers.


================================================
FILE: README.md
================================================
<p align="center"><a href="https://modin.readthedocs.io"><img width=77% alt="" src="https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/MODIN_ver2_hrz.png?raw=true"></a></p>
<h2 align="center">Scale your pandas workflows by changing one line of code</h2>

<div align="center">

| <h3>Dev Community & Support</h3> | <h3>Forums</h3> | <h3>Socials</h3> | <h3>Docs</h3> |
|:---: | :---: | :---: | :---: |
| [![Slack](https://img.shields.io/badge/Slack-4A154B?style=for-the-badge&logo=slack&logoColor=white)](https://join.slack.com/t/modin-project/shared_invite/zt-yvk5hr3b-f08p_ulbuRWsAfg9rMY3uA) | [![Stack Overflow](https://img.shields.io/badge/-Stackoverflow-FE7A16?style=for-the-badge&logo=stack-overflow&logoColor=white)](https://stackoverflow.com/questions/tagged/modin) | <a href="https://twitter.com/modin_project"><img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/modin_project?style=social" height=28 align="center"></a> | <a href="https://modin.readthedocs.io/en/latest/?badge=latest"><img alt="" src="https://readthedocs.org/projects/modin/badge/?version=latest" height=28 align="center"></a> |

</div>

<p align="center">
<a href="https://pepy.tech/project/modin"><img src="https://static.pepy.tech/personalized-badge/modin?period=total&units=international_system&left_color=black&right_color=blue&left_text=Downloads" align="center"></a>
<a href="https://codecov.io/gh/modin-project/modin"><img src="https://codecov.io/gh/modin-project/modin/branch/main/graph/badge.svg" align="center"/></a>
<a href="https://github.com/modin-project/modin/actions/workflows/push-to-main.yml?query=event%3Apush"><img src="https://github.com/modin-project/modin/actions/workflows/push-to-main.yml/badge.svg?branch=main" align="center"></a>
<a href="https://github.com/modin-project/modin/actions/workflows/ci.yml?query=event%3Apush"><img src="https://github.com/modin-project/modin/actions/workflows/ci.yml/badge.svg?branch=main" align="center"></a>
<a href="https://pypi.org/project/modin/"><img src="https://badge.fury.io/py/modin.svg" alt="PyPI version" align="center"></a>
<a href="https://modin.org/modin-bench/#/"><img src="https://img.shields.io/badge/benchmarked%20by-asv-blue.svg" align="center"></a>
</p>

### What is Modin?

Modin is a drop-in replacement for [pandas](https://github.com/pandas-dev/pandas). While pandas is
single-threaded, Modin lets you instantly speed up your workflows by scaling pandas so it uses all of your
cores. Modin works especially well on larger datasets, where pandas becomes painfully slow or runs
[out of memory](https://modin.readthedocs.io/en/latest/getting_started/why_modin/out_of_core.html).
Also, Modin comes with the [additional APIs](https://modin.readthedocs.io/en/latest/usage_guide/advanced_usage/index.html#additional-apis)
to improve user experience.

By simply replacing the import statement, Modin offers users effortless speed and scale for their pandas workflows:

<img src="https://github.com/modin-project/modin/raw/main/docs/img/Import.gif" style="display: block;margin-left: auto;margin-right: auto;" width="100%"></img>

In the GIFs below, Modin (left) and pandas (right) perform *the same pandas operations* on a 2GB dataset. The only difference between the two notebook examples is the import statement. 

<table class="tg">
<thead>
  <tr>
    <th class="tg-0lax" style="text-align: center;"><img src="https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/MODIN_ver2_hrz.png?raw=True" height="35px"></th>
    <th class="tg-0lax" style="text-align: center;"><img src="https://pandas.pydata.org/static/img/pandas.svg" height="50px"></img></th>
  </tr>
</thead>
<tbody>
  <tr>
    <td class="tg-0lax"><img src="https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/Modin.gif"></img></td>
    <td class="tg-0lax"><img src="https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/Pandas.gif"></img></td>
  </tr>
</tbody>
</table>

The charts below show the speedup you get by replacing pandas with Modin based on the examples above. The example notebooks can be found [here](examples/jupyter). To learn more about the speedups you could get with Modin and try out some examples on your own, check out our [10-minute quickstart guide](https://modin.readthedocs.io/en/latest/getting_started/quickstart.html) to try out some examples on your own!

<img src="https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/Modin_Speedup.svg" style="display: block;margin-left: auto;margin-right: auto;" width="100%"></img>

### Installation

#### From PyPI

Modin can be installed with `pip` on Linux, Windows and MacOS:

```bash
pip install "modin[all]" # (Recommended) Install Modin with Ray and Dask engines.
```

If you want to install Modin with a specific engine, we recommend:

```bash
pip install "modin[ray]" # Install Modin dependencies and Ray.
pip install "modin[dask]" # Install Modin dependencies and Dask.
pip install "modin[mpi]" # Install Modin dependencies and MPI through unidist.
```

To get Modin on MPI through unidist (as of unidist 0.5.0) fully working
it is required to have a working MPI implementation installed beforehand.
Otherwise, installation of `modin[mpi]` may fail. Refer to
[Installing with pip](https://unidist.readthedocs.io/en/latest/installation.html#installing-with-pip)
section of the unidist documentation for more details about installation.

**Note:** Since Modin 0.30.0 we use a reduced set of Ray dependencies: `ray` instead of `ray[default]`.
This means that the dashboard and cluster launcher are no longer installed by default.
If you need those, consider installing `ray[default]` along with `modin[ray]`.

Modin automatically detects which engine(s) you have installed and uses that for scheduling computation.

#### From conda-forge

Installing from [conda forge](https://github.com/conda-forge/modin-feedstock) using `modin-all`
will install Modin and three engines: [Ray](https://github.com/ray-project/ray), [Dask](https://github.com/dask/dask) and
[MPI through unidist](https://github.com/modin-project/unidist).

```bash
conda install -c conda-forge modin-all
```

Each engine can also be installed individually (and also as a combination of several engines):

```bash
conda install -c conda-forge modin-ray  # Install Modin dependencies and Ray.
conda install -c conda-forge modin-dask # Install Modin dependencies and Dask.
conda install -c conda-forge modin-mpi # Install Modin dependencies and MPI through unidist.
```

**Note:** Since Modin 0.30.0 we use a reduced set of Ray dependencies: `ray-core` instead of `ray-default`.
This means that the dashboard and cluster launcher are no longer installed by default.
If you need those, consider installing `ray-default` along with `modin-ray`.

Refer to
[Installing with conda](https://unidist.readthedocs.io/en/latest/installation.html#installing-with-conda)
section of the unidist documentation for more details on how to install a specific MPI implementation to run on.

To speed up conda installation we recommend using libmamba solver. To do this install it in a base environment:

```bash
conda install -n base conda-libmamba-solver
```

and then use it during istallation either like:

```bash
conda install -c conda-forge modin-ray --experimental-solver=libmamba
```

or starting from conda 22.11 and libmamba solver 22.12 versions:

```bash
conda install -c conda-forge modin-ray --solver=libmamba
```

#### Choosing a Compute Engine

If you want to choose a specific compute engine to run on, you can set the environment
variable `MODIN_ENGINE` and Modin will do computation with that engine:

```bash
export MODIN_ENGINE=ray  # Modin will use Ray
export MODIN_ENGINE=dask  # Modin will use Dask
export MODIN_ENGINE=unidist # Modin will use Unidist
```

If you want to choose the Unidist engine, you should set the additional environment 
variable ``UNIDIST_BACKEND``. Currently, Modin only supports MPI through unidist:

```bash
export UNIDIST_BACKEND=mpi # Unidist will use MPI backend
```

This can also be done within a notebook/interpreter before you import Modin:

```python
import modin.config as modin_cfg
import unidist.config as unidist_cfg

modin_cfg.Engine.put("ray")  # Modin will use Ray
modin_cfg.Engine.put("dask")  # Modin will use Dask

modin_cfg.Engine.put('unidist') # Modin will use Unidist
unidist_cfg.Backend.put('mpi') # Unidist will use MPI backend
```

_Note: You should not change the engine after your first operation with Modin as it will result in undefined behavior._

#### Which engine should I use?

On Linux, MacOS, and Windows you can install and use either Ray, Dask or MPI through unidist. There is no knowledge required
to use either of these engines as Modin abstracts away all of the complexity, so feel
free to pick either!

### Pandas API Coverage

<p align="center">

| pandas Object     | Modin's Ray Engine Coverage                                                          | Modin's Dask Engine Coverage | Modin's Unidist Engine Coverage |
|-------------------|:------------------------------------------------------------------------------------:|:---------------:|:---------------:|
| `pd.DataFrame`    | <img src=https://img.shields.io/badge/api%20coverage-90.8%25-hunter.svg> | <img src=https://img.shields.io/badge/api%20coverage-90.8%25-hunter.svg> | <img src=https://img.shields.io/badge/api%20coverage-90.8%25-hunter.svg> |
| `pd.Series`       | <img src=https://img.shields.io/badge/api%20coverage-88.05%25-green.svg> | <img src=https://img.shields.io/badge/api%20coverage-88.05%25-green.svg> | <img src=https://img.shields.io/badge/api%20coverage-88.05%25-green.svg> 
| `pd.read_csv`     | ✅                                               | ✅ | ✅ |
| `pd.read_table`   | ✅                                               | ✅ | ✅ |
| `pd.read_parquet` | ✅                                               | ✅ | ✅ |
| `pd.read_sql`     | ✅                                               | ✅ | ✅ |
| `pd.read_feather` | ✅                                               | ✅ | ✅ |
| `pd.read_excel`   | ✅                                               | ✅ | ✅ |
| `pd.read_json`    | [✳️](https://github.com/modin-project/modin/issues/554)                                         | [✳️](https://github.com/modin-project/modin/issues/554) | [✳️](https://github.com/modin-project/modin/issues/554) |
| `pd.read_<other>` | [✴️](https://modin.readthedocs.io/en/latest/supported_apis/io_supported.html) | [✴️](https://modin.readthedocs.io/en/latest/supported_apis/io_supported.html) | [✴️](https://modin.readthedocs.io/en/latest/supported_apis/io_supported.html) |

</p>
Some pandas APIs are easier to implement than others, so if something is missing feel
free to open an issue!

### More about Modin

For the complete documentation on Modin, visit our [ReadTheDocs](https://modin.readthedocs.io/en/latest/index.html) page.

#### Scale your pandas workflow by changing a single line of code.

_Note: In local mode (without a cluster), Modin will create and manage a local (Dask or Ray) cluster for the execution._

To use Modin, you do not need to specify how to distribute the data, or even know how many
cores your system has. In fact, you can continue using your previous
pandas notebooks while experiencing a considerable speedup from Modin, even on a single
machine. Once you've changed your import statement, you're ready to use Modin just like
you would with pandas!

#### Faster pandas, even on your laptop

<img align="right" style="display:inline;" height="350" width="300" src="https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/read_csv_benchmark.png?raw=true"></a>

The `modin.pandas` DataFrame is an extremely light-weight parallel DataFrame.
Modin transparently distributes the data and computation so that you can continue using the same pandas API
while working with more data faster. Because it is so light-weight,
Modin provides speed-ups of up to 4x on a laptop with 4 physical cores.

In pandas, you are only able to use one core at a time when you are doing computation of
any kind. With Modin, you are able to use all of the CPU cores on your machine. Even with a
traditionally synchronous task like `read_csv`, we see large speedups by efficiently
distributing the work across your entire machine.

```python
import modin.pandas as pd

df = pd.read_csv("my_dataset.csv")
```

#### Modin can handle the datasets that pandas can't 

Often data scientists have to switch between different tools
for operating on datasets of different sizes. Processing large dataframes with pandas
is slow, and pandas does not support working with dataframes that are too large to fit
into the available memory. As a result, pandas workflows that work well
for prototyping on a few MBs of data do not scale to tens or hundreds of GBs (depending on the size
of your machine). Modin supports operating on data that does not fit in memory, so that you can comfortably
work with hundreds of GBs without worrying about substantial slowdown or memory errors.
With [cluster](https://modin.readthedocs.io/en/latest/getting_started/using_modin/using_modin_cluster.html)
and [out of core](https://modin.readthedocs.io/en/latest/getting_started/why_modin/out_of_core.html)
support, Modin is a DataFrame library with both great single-node performance and high
scalability in a cluster.

#### Modin Architecture

We designed [Modin's architecture](https://modin.readthedocs.io/en/latest/development/architecture.html)
to be modular so we can plug in different components as they develop and improve:

<img src="https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/modin_architecture.png" alt="Modin's architecture" width="75%"></img>

### Other Resources

#### Getting Started with Modin

- [Documentation](https://modin.readthedocs.io/en/latest/)
- [10-min Quickstart Guide](https://modin.readthedocs.io/en/latest/getting_started/quickstart.html)
- [Examples and Tutorials](https://modin.readthedocs.io/en/latest/getting_started/examples.html)
- [Videos and Blogposts](https://modin.readthedocs.io/en/latest/getting_started/examples.html#talks-podcasts)
- [Benchmarking Modin](https://modin.readthedocs.io/en/latest/usage_guide/benchmarking.html)

#### Modin Community

- [Slack](https://join.slack.com/t/modin-project/shared_invite/zt-yvk5hr3b-f08p_ulbuRWsAfg9rMY3uA)
- [Twitter](https://twitter.com/modin_project)
- [Mailing List](https://groups.google.com/g/modin-dev)
- [GitHub Issues](https://github.com/modin-project/modin/issues)
- [StackOverflow](https://stackoverflow.com/questions/tagged/modin)

#### Learn More about Modin

- [Frequently Asked Questions (FAQs)](https://modin.readthedocs.io/en/latest/getting_started/faq.html)
- [Troubleshooting Guide](https://modin.readthedocs.io/en/latest/getting_started/troubleshooting.html)
- [Development Guide](https://modin.readthedocs.io/en/latest/development/index.html)
- Modin is built on many years of research and development at UC Berkeley. Check out these selected papers to learn more about how Modin works:
  - [Flexible Rule-Based Decomposition and Metadata Independence in Modin](https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf) (VLDB 2021)
  - [Dataframe Systems: Theory, Architecture, and Implementation](https://www2.eecs.berkeley.edu/Pubs/TechRpts/2021/EECS-2021-193.pdf) (PhD Dissertation 2021)
  - [Towards Scalable Dataframe Systems](https://arxiv.org/pdf/2001.00888.pdf) (VLDB 2020)

#### Getting Involved

***`modin.pandas` is currently under active development. Requests and contributions are welcome!***

For more information on how to contribute to Modin, check out the
[Modin Contribution Guide](https://modin.readthedocs.io/en/latest/development/contributing.html).

### License

[Apache License 2.0](LICENSE)


================================================
FILE: asv_bench/README.md
================================================
# Modin ASV benchmarks

## Here are some scenarios in which [ASV](https://asv.readthedocs.io/en/stable/index.html) can be used:

* Check the impact of the new patch on the performance of a certain set of operations:

  `asv continuous -f 1.05 src/main HEAD -b TimeGroupBy --launch-method=spawn`

* Check for presence of errors inside of benchmarks after changing them or writing new ones:

  `asv run --quick --show-stderr --python=same --launch-method=spawn`

* Run entire benchmark suite to get the current times:

  `asv run --launch-method=spawn`

* Check the range of commits for performance degradation:

  ```
  asv run [start_hash]..[end_hash] --launch-method=spawn
  asv publish
  asv preview
  ```

For more consistent results, you may need to use the following parameters which
description is in [ASV docs](https://asv.readthedocs.io/en/stable/benchmarks.html?highlight=sample_time#timing-benchmarks):

* `-a sample_time=1`
* `-a warmup_time=1`

### Notes about using Modin on Ray with Asv:

* `--launch-method=forkserver` is not working;
* Each set of parameters for each test is launched in its own process, which brings
  a large overhead, since for each process redis server and other necessary processes
  from ray initialization are started and destroyed.

## Adding new benchmark

Basic information on writing benchmarks is present [in ASV documentation](https://asv.readthedocs.io/en/stable/writing_benchmarks.html)

Benchmarks from `benchmarks/benchmarks.py`, `benchmarks/scalability/scalability_benchmarks.py` or `benchmarks/io/csv.py`
could be used as a starting point.

Requirements:
* the benchmark should be able to run both on Modin and on Pandas when the appropriate value
of the environment variable `MODIN_ASV_USE_IMPL` is selected.
* the size of the benchmark dataset should depend on the environment variable `MODIN_TEST_DATASET_SIZE`.

## Changing existing benchmark

It should be remembered that the hash calculated from the benchmark source code is used to display the results.
When changing the benchmark, the old results will no longer be displayed in the dashboard. In general, this is the correct
behavior so as not to get a situation when incomparable numbers are displayed in the dashboard.
But it should be noted that there could be changes in the source code when it is still correct to compare
the "before" and "after" versions, for example, name of a variable changed, comment added, etc.
In this case you must either run a new version of the benchmark for all the commits ever accounted for or manually change
the hash in the corresponding result files.

## Pipeline for displaying results in a dashboard

Step 1: checking benchmarks for validity, runs in PRs CI.
  During the test, the benchmarks are run once on small data.
  The implementation can be found in `test-asv-benchmarks` job of [ci.yml](https://github.com/modin-project/modin/blob/main/.github/workflows/ci.yml)

Step 2: running benchmarks with saving the results in [modin-bench@master](https://github.com/modin-project/modin-bench).
  The launch takes place on internal server using specific TeamCity configuration.
  The description of the server can be found in the ["Benchmark list"](https://modin.org/modin-bench/#summarylist?sort=0&dir=asc) tab,
  on the left when you hover the mouse over the machine name. 
  This step starts as scheduled (now every half hour), subject to the presence of new commits in the Modin `main` branch.
  Command to run benchmarks: `asv run HASHFILE:hashfile.txt --show-stderr --machine xeon-e5 --launch-method=spawn`.
  In the file `hashfile.txt` is the last modin commit hash.
  Writing to a `modin-bench@master` triggers 3 step of the pipeline.

Step 3: converting the results to html representation, which is saved in [modin-bench@gh-pages](https://github.com/modin-project/modin-bench)
  The implementation can be found in `deploy-gh-pages` job of [push.yml](https://github.com/modin-project/modin-bench/blob/master/.github/workflows/push.yml)

Basic actions for step 2:
* setup environment variable:
  * export MODIN_TEST_DATASET=Big
  * export MODIN_CPUS=44
* setup git client
* prepare json file with machine description
  * This file should be placed in the user's home directory.
  * ASV does not always automatically create the file with the description of the machine correctly (e.g. due to being run in a container).
  It is recommended to create a file using [asv machine](https://asv.readthedocs.io/en/stable/commands.html?highlight=machine%20description#asv-machine) command, and manually check the result.
  [Example](https://github.com/modin-project/modin-bench/blob/master/results/xeon-e5/machine.json)
* copy old result to folder where new result will appear
  (conflict resolution will be performed by ASV itself instead of git)
* push performance result to modin-bench repository


================================================
FILE: asv_bench/asv.conf.dask.json
================================================
{
    // The version of the config file format.  Do not change, unless
    // you know what you are doing.
    "version": 1,

    // The name of the project being benchmarked
    "project": "modin",

    // The project's homepage
    "project_url": "https://modin.readthedocs.io/",

    // The URL or local path of the source code repository for the
    // project being benchmarked
    "repo": "..",

    // List of branches to benchmark. If not provided, defaults to "master"
    // (for git) or "default" (for mercurial).
    "branches": ["main"],

    // Customizable commands for building, installing, and
    // uninstalling the project. See asv.conf.json documentation.
    "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}[dask]"],

    // The tool to use to create environments.  May be "conda",
    // "virtualenv" or other value depending on the plugins in use.
    // If missing or the empty string, the tool will be automatically
    // determined by looking for tools on the PATH environment
    // variable.
    "environment_type": "conda",

    // timeout in seconds for installing any dependencies in environment
    // defaults to 10 min
    "install_timeout": 6000,

    // the base URL to show a commit for the project.
    "show_commit_url": "https://github.com/modin-project/modin/commit/",

    // The Pythons you'd like to test against.  If not provided, defaults
    // to the current version of Python used to run `asv`.
    "pythons": ["3.9"],

    // The list of conda channel names to be searched for benchmark
    // dependency packages in the specified order
    "conda_channels": ["conda-forge", "defaults"],

    // The directory (relative to the current directory) to cache the Python
    // environments in.  If not provided, defaults to "env"
    "env_dir": ".asv/env",

    // The directory (relative to the current directory) that raw benchmark
    // results are stored in.  If not provided, defaults to "results".
    "results_dir": ".asv/results",

    // The directory (relative to the current directory) that the html tree
    // should be written to.  If not provided, defaults to "html".
    "html_dir": ".asv/html",
}


================================================
FILE: asv_bench/asv.conf.json
================================================
{
    // The version of the config file format.  Do not change, unless
    // you know what you are doing.
    "version": 1,

    // The name of the project being benchmarked
    "project": "modin",

    // The project's homepage
    "project_url": "https://modin.readthedocs.io/",

    // The URL or local path of the source code repository for the
    // project being benchmarked
    "repo": "..",

    // List of branches to benchmark. If not provided, defaults to "master"
    // (for git) or "default" (for mercurial).
    "branches": ["main"],

    // Customizable commands for building, installing, and
    // uninstalling the project. See asv.conf.json documentation.
    "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}[ray]"],

    // The tool to use to create environments.  May be "conda",
    // "virtualenv" or other value depending on the plugins in use.
    // If missing or the empty string, the tool will be automatically
    // determined by looking for tools on the PATH environment
    // variable.
    "environment_type": "conda",

    // timeout in seconds for installing any dependencies in environment
    // defaults to 10 min
    "install_timeout": 6000,

    // the base URL to show a commit for the project.
    "show_commit_url": "https://github.com/modin-project/modin/commit/",

    // The Pythons you'd like to test against.  If not provided, defaults
    // to the current version of Python used to run `asv`.
    "pythons": ["3.9"],

    // The list of conda channel names to be searched for benchmark
    // dependency packages in the specified order
    "conda_channels": ["conda-forge", "defaults"],

    // The directory (relative to the current directory) to cache the Python
    // environments in.  If not provided, defaults to "env"
    "env_dir": ".asv/env",

    // The directory (relative to the current directory) that raw benchmark
    // results are stored in.  If not provided, defaults to "results".
    "results_dir": ".asv/results",

    // The directory (relative to the current directory) that the html tree
    // should be written to.  If not provided, defaults to "html".
    "html_dir": ".asv/html",
}


================================================
FILE: asv_bench/asv.conf.unidist.json
================================================
{
    // The version of the config file format.  Do not change, unless
    // you know what you are doing.
    "version": 1,

    // The name of the project being benchmarked
    "project": "modin",

    // The project's homepage
    "project_url": "https://modin.readthedocs.io/",

    // The URL or local path of the source code repository for the
    // project being benchmarked
    "repo": "..",

    // List of branches to benchmark. If not provided, defaults to "master"
    // (for git) or "default" (for mercurial).
    "branches": ["main"],

    // Customizable commands for building, installing, and
    // uninstalling the project. See asv.conf.json documentation.
    "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}[unidist]"],

    // The tool to use to create environments.  May be "conda",
    // "virtualenv" or other value depending on the plugins in use.
    // If missing or the empty string, the tool will be automatically
    // determined by looking for tools on the PATH environment
    // variable.
    "environment_type": "conda",

    // timeout in seconds for installing any dependencies in environment
    // defaults to 10 min
    "install_timeout": 6000,

    // the base URL to show a commit for the project.
    "show_commit_url": "https://github.com/modin-project/modin/commit/",

    // The Pythons you'd like to test against.  If not provided, defaults
    // to the current version of Python used to run `asv`.
    "pythons": ["3.9"],

    // The list of conda channel names to be searched for benchmark
    // dependency packages in the specified order
    "conda_channels": ["conda-forge", "defaults"],

    // The directory (relative to the current directory) to cache the Python
    // environments in.  If not provided, defaults to "env"
    "env_dir": ".asv/env",

    // The directory (relative to the current directory) that raw benchmark
    // results are stored in.  If not provided, defaults to "results".
    "results_dir": ".asv/results",

    // The directory (relative to the current directory) that the html tree
    // should be written to.  If not provided, defaults to "html".
    "html_dir": ".asv/html",
}


================================================
FILE: asv_bench/benchmarks/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin benchmarks."""


================================================
FILE: asv_bench/benchmarks/benchmarks.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""General Modin benchmarks."""

# define `MODIN_CPUS` env var to control the number of partitions
# it should be defined before modin.pandas import (in case of using os.environ)

# define `MODIN_ASV_USE_IMPL` env var to choose library for using in performance
# measurements

import math

import numpy as np

from .utils import (
    GROUPBY_NGROUPS,
    IMPL,
    RAND_HIGH,
    RAND_LOW,
    execute,
    gen_nan_data,
    generate_dataframe,
    get_benchmark_shapes,
    random_booleans,
    random_columns,
    random_string,
    translator_groupby_ngroups,
)


class BaseTimeGroupBy:
    def setup(self, shape, ngroups=5, groupby_ncols=1):
        ngroups = translator_groupby_ngroups(ngroups, shape)
        self.df, self.groupby_columns = generate_dataframe(
            "int",
            *shape,
            RAND_LOW,
            RAND_HIGH,
            groupby_ncols,
            count_groups=ngroups,
        )


class TimeGroupByMultiColumn(BaseTimeGroupBy):
    param_names = ["shape", "ngroups", "groupby_ncols"]
    params = [
        get_benchmark_shapes("TimeGroupByMultiColumn"),
        GROUPBY_NGROUPS,
        [6],
    ]

    def time_groupby_agg_quan(self, *args, **kwargs):
        execute(self.df.groupby(by=self.groupby_columns).agg("quantile"))

    def time_groupby_agg_mean(self, *args, **kwargs):
        execute(self.df.groupby(by=self.groupby_columns).apply(lambda df: df.mean()))


class TimeGroupByDefaultAggregations(BaseTimeGroupBy):
    param_names = ["shape", "ngroups"]
    params = [
        get_benchmark_shapes("TimeGroupByDefaultAggregations"),
        GROUPBY_NGROUPS,
    ]

    def time_groupby_count(self, *args, **kwargs):
        execute(self.df.groupby(by=self.groupby_columns).count())

    def time_groupby_size(self, *args, **kwargs):
        execute(self.df.groupby(by=self.groupby_columns).size())

    def time_groupby_sum(self, *args, **kwargs):
        execute(self.df.groupby(by=self.groupby_columns).sum())

    def time_groupby_mean(self, *args, **kwargs):
        execute(self.df.groupby(by=self.groupby_columns).mean())


class TimeGroupByDictionaryAggregation(BaseTimeGroupBy):
    param_names = ["shape", "ngroups", "operation_type"]
    params = [
        get_benchmark_shapes("TimeGroupByDictionaryAggregation"),
        GROUPBY_NGROUPS,
        ["reduce", "aggregation"],
    ]
    operations = {
        "reduce": ["sum", "count", "prod"],
        "aggregation": ["quantile", "std", "median"],
    }

    def setup(self, shape, ngroups, operation_type):
        super().setup(shape, ngroups)
        self.cols_to_agg = self.df.columns[1:4]
        operations = self.operations[operation_type]
        self.agg_dict = {
            c: operations[i % len(operations)] for i, c in enumerate(self.cols_to_agg)
        }

    def time_groupby_dict_agg(self, *args, **kwargs):
        execute(self.df.groupby(by=self.groupby_columns).agg(self.agg_dict))


class TimeJoin:
    param_names = ["shapes", "how", "sort"]
    params = [
        get_benchmark_shapes("TimeJoin"),
        ["left", "inner"],
        [False],
    ]

    def setup(self, shapes, how, sort):
        self.df1 = generate_dataframe("int", *shapes[0], RAND_LOW, RAND_HIGH)
        self.df2 = generate_dataframe("int", *shapes[1], RAND_LOW, RAND_HIGH)

    def time_join(self, shapes, how, sort):
        # join dataframes on index to get the predictable shape
        execute(self.df1.join(self.df2, how=how, lsuffix="left_", sort=sort))


class TimeJoinStringIndex:
    param_names = ["shapes", "sort"]
    params = [
        get_benchmark_shapes("TimeJoinStringIndex"),
        [True, False],
    ]

    def setup(self, shapes, sort):
        assert shapes[0] % 100 == 0, "implementation restriction"
        level1 = IMPL.Index([f"i-{i}" for i in range(10)], dtype=object).values
        level2 = IMPL.Index(
            [f"i-{i}" for i in range(shapes[0] // 100)], dtype=object
        ).values
        codes1 = np.arange(10).repeat(shapes[0] // 100)
        codes2 = np.tile(np.arange(shapes[0] // 100), 10)
        index2 = IMPL.MultiIndex(levels=[level1, level2], codes=[codes1, codes2])
        self.df_multi = IMPL.DataFrame(
            np.random.randn(len(index2), 4), index=index2, columns=["A", "B", "C", "D"]
        )

        self.key1 = np.tile(level1.take(codes1), 10)
        self.key2 = np.tile(level2.take(codes2), 10)
        self.df = generate_dataframe("int", *shapes, RAND_LOW, RAND_HIGH)
        # just to keep source shape
        self.df = self.df.drop(columns=self.df.columns[-2:])
        self.df["key1"] = self.key1
        self.df["key2"] = self.key2
        execute(self.df)

        self.df_key1 = IMPL.DataFrame(
            np.random.randn(len(level1), 4), index=level1, columns=["A", "B", "C", "D"]
        )
        self.df_key2 = IMPL.DataFrame(
            np.random.randn(len(level2), 4), index=level2, columns=["A", "B", "C", "D"]
        )

    def time_join_dataframe_index_multi(self, shapes, sort):
        execute(self.df.join(self.df_multi, on=["key1", "key2"], sort=sort))

    def time_join_dataframe_index_single_key_bigger(self, shapes, sort):
        execute(self.df.join(self.df_key2, on="key2", sort=sort))

    def time_join_dataframe_index_single_key_small(self, shapes, sort):
        execute(self.df.join(self.df_key1, on="key1", sort=sort))


class TimeMergeDefault:
    param_names = ["shapes", "how", "sort"]
    params = [
        get_benchmark_shapes("TimeMergeDefault"),
        ["left", "inner"],
        [True, False],
    ]

    def setup(self, shapes, how, sort):
        self.df1 = generate_dataframe("int", *shapes[0], RAND_LOW, RAND_HIGH)
        self.df2 = generate_dataframe("int", *shapes[1], RAND_LOW, RAND_HIGH)

    def time_merge(self, shapes, how, sort):
        execute(IMPL.merge(self.df1, self.df2, how=how, sort=sort))


class TimeMerge:
    param_names = ["shapes", "how", "sort"]
    params = [
        get_benchmark_shapes("TimeMerge"),
        ["left", "inner"],
        [True, False],
    ]

    def setup(self, shapes, how, sort):
        self.df1 = generate_dataframe("int", *shapes[0], RAND_LOW, RAND_HIGH)
        self.df2 = generate_dataframe("int", *shapes[1], RAND_LOW, RAND_HIGH)

    def time_merge(self, shapes, how, sort):
        # merge dataframes by index to get the predictable shape
        execute(
            self.df1.merge(
                self.df2, left_index=True, right_index=True, how=how, sort=sort
            )
        )

    def time_merge_dataframe_empty_right(self, shapes, how, sort):
        # Getting an empty dataframe using `iloc` should be very fast,
        # so the impact on the time of the merge operation should be negligible.
        execute(IMPL.merge(self.df1, self.df2.iloc[:0], how=how, sort=sort))

    def time_merge_dataframe_empty_left(self, shapes, how, sort):
        # Getting an empty dataframe using `iloc` should be very fast,
        # so the impact on the time of the merge operation should be negligible.
        execute(IMPL.merge(self.df1.iloc[:0], self.df2, how=how, sort=sort))


class TimeMergeCategoricals:
    param_names = ["shapes", "data_type"]
    params = [
        get_benchmark_shapes("MergeCategoricals"),
        ["object", "category"],
    ]

    def setup(self, shapes, data_type):
        assert len(shapes) == 2
        assert shapes[1] == 2
        size = (shapes[0],)
        self.left = IMPL.DataFrame(
            {
                "X": np.random.choice(range(0, 10), size=size),
                "Y": np.random.choice(["one", "two", "three"], size=size),
            }
        )

        self.right = IMPL.DataFrame(
            {
                "X": np.random.choice(range(0, 10), size=size),
                "Z": np.random.choice(["jjj", "kkk", "sss"], size=size),
            }
        )

        if data_type == "category":
            self.left = self.left.assign(Y=self.left["Y"].astype("category"))
            execute(self.left)
            self.right = self.right.assign(Z=self.right["Z"].astype("category"))
            execute(self.right)

    def time_merge_categoricals(self, shapes, data_type):
        execute(IMPL.merge(self.left, self.right, on="X"))


class TimeConcat:
    param_names = ["shapes", "how", "axis", "ignore_index"]
    params = [
        get_benchmark_shapes("TimeConcat"),
        ["inner", "outer"],
        [0, 1],
        [True, False],
    ]

    def setup(self, shapes, how, axis, ignore_index):
        self.df1 = generate_dataframe("int", *shapes[0], RAND_LOW, RAND_HIGH)
        self.df2 = generate_dataframe("int", *shapes[1], RAND_LOW, RAND_HIGH)

    def time_concat(self, shapes, how, axis, ignore_index):
        execute(
            IMPL.concat(
                [self.df1, self.df2], axis=axis, join=how, ignore_index=ignore_index
            )
        )


class TimeBinaryOp:
    param_names = ["shapes", "binary_op", "axis"]
    params = [
        get_benchmark_shapes("TimeBinaryOp"),
        ["mul"],
        [0, 1],
    ]

    def setup(self, shapes, binary_op, axis):
        self.df1 = generate_dataframe("int", *shapes[0], RAND_LOW, RAND_HIGH)
        self.df2 = generate_dataframe("int", *shapes[1], RAND_LOW, RAND_HIGH)
        self.op = getattr(self.df1, binary_op)

    def time_binary_op(self, shapes, binary_op, axis):
        execute(self.op(self.df2, axis=axis))


class TimeBinaryOpSeries:
    param_names = ["shapes", "binary_op"]
    params = [
        get_benchmark_shapes("TimeBinaryOpSeries"),
        ["mul"],
    ]

    def setup(self, shapes, binary_op):
        df1 = generate_dataframe("int", *shapes[0], RAND_LOW, RAND_HIGH)
        df2 = generate_dataframe("int", *shapes[1], RAND_LOW, RAND_HIGH)
        self.series1 = df1[df1.columns[0]]
        self.series2 = df2[df2.columns[0]]
        self.op = getattr(self.series1, binary_op)
        execute(self.series1)
        execute(self.series2)

    def time_binary_op_series(self, shapes, binary_op):
        execute(self.op(self.series2))


class BaseTimeSetItem:
    param_names = ["shape", "item_length", "loc", "is_equal_indices"]

    @staticmethod
    def get_loc(df, loc, axis, item_length):
        locs_dict = {
            "zero": 0,
            "middle": len(df.axes[axis]) // 2,
            "last": len(df.axes[axis]) - 1,
        }
        base_loc = locs_dict[loc]
        range_based_loc = np.arange(
            base_loc, min(len(df.axes[axis]), base_loc + item_length)
        )
        return (
            (df.axes[axis][base_loc], base_loc)
            if len(range_based_loc) == 1
            else (df.axes[axis][range_based_loc], range_based_loc)
        )

    def setup(self, shape, item_length, loc, is_equal_indices):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH).copy()
        self.loc, self.iloc = self.get_loc(
            self.df, loc, item_length=item_length, axis=1
        )

        self.item = self.df[self.loc] + 1
        self.item_raw = self.item.to_numpy()
        if not is_equal_indices:
            self.item.index = reversed(self.item.index)


class TimeSetItem(BaseTimeSetItem):
    params = [
        get_benchmark_shapes("TimeSetItem"),
        [1],
        ["zero", "middle", "last"],
        [True, False],
    ]

    def time_setitem_qc(self, *args, **kwargs):
        self.df[self.loc] = self.item
        execute(self.df)

    def time_setitem_raw(self, *args, **kwargs):
        self.df[self.loc] = self.item_raw
        execute(self.df)


class TimeInsert(BaseTimeSetItem):
    params = [
        get_benchmark_shapes("TimeInsert"),
        [1],
        ["zero", "middle", "last"],
        [True, False],
    ]

    def time_insert_qc(self, *args, **kwargs):
        self.df.insert(loc=self.iloc, column=random_string(), value=self.item)
        execute(self.df)

    def time_insert_raw(self, *args, **kwargs):
        self.df.insert(loc=self.iloc, column=random_string(), value=self.item_raw)
        execute(self.df)


class TimeArithmetic:
    param_names = ["shape", "axis"]
    params = [
        get_benchmark_shapes("TimeArithmetic"),
        [0, 1],
    ]

    def setup(self, shape, axis):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)

    def time_sum(self, shape, axis):
        execute(self.df.sum(axis=axis))

    def time_count(self, shape, axis):
        execute(self.df.count(axis=axis))

    def time_median(self, shape, axis):
        execute(self.df.median(axis=axis))

    def time_nunique(self, shape, axis):
        execute(self.df.nunique(axis=axis))

    def time_apply(self, shape, axis):
        execute(self.df.apply(lambda df: df.sum(), axis=axis))

    def time_mean(self, shape, axis):
        execute(self.df.mean(axis=axis))

    def time_mode(self, shape, axis):
        execute(self.df.mode(axis=axis))

    def time_add(self, shape, axis):
        execute(self.df.add(2, axis=axis))

    def time_mul(self, shape, axis):
        execute(self.df.mul(2, axis=axis))

    def time_mod(self, shape, axis):
        execute(self.df.mod(2, axis=axis))

    def time_abs(self, shape, axis):
        execute(self.df.abs())

    def time_aggregate(self, shape, axis):
        execute(self.df.aggregate(lambda df: df.sum(), axis=axis))

    def time_is_in(self, shape, axis):
        execute(self.df.isin([0, 2]))

    def time_transpose(self, shape, axis):
        execute(self.df.transpose())


class TimeSortValues:
    param_names = ["shape", "columns_number", "ascending_list"]
    params = [
        get_benchmark_shapes("TimeSortValues"),
        [1, 2, 10, 100],
        [False, True],
    ]

    def setup(self, shape, columns_number, ascending_list):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)
        self.columns = random_columns(self.df.columns, columns_number)
        self.ascending = (
            random_booleans(columns_number)
            if ascending_list
            else bool(random_booleans(1)[0])
        )

    def time_sort_values(self, shape, columns_number, ascending_list):
        execute(self.df.sort_values(self.columns, ascending=self.ascending))


class TimeDrop:
    param_names = ["shape", "axis", "drop_ncols"]
    params = [
        get_benchmark_shapes("TimeDrop"),
        [0, 1],
        [1, 0.8],
    ]

    def setup(self, shape, axis, drop_ncols):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)
        drop_count = (
            int(len(self.df.axes[axis]) * drop_ncols)
            if isinstance(drop_ncols, float)
            else drop_ncols
        )
        self.labels = self.df.axes[axis][:drop_count]

    def time_drop(self, shape, axis, drop_ncols):
        execute(self.df.drop(self.labels, axis=axis))


class TimeHead:
    param_names = ["shape", "head_count"]
    params = [
        get_benchmark_shapes("TimeHead"),
        [5, 0.8],
    ]

    def setup(self, shape, head_count):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)
        self.head_count = (
            int(head_count * len(self.df.index))
            if isinstance(head_count, float)
            else head_count
        )

    def time_head(self, shape, head_count):
        execute(self.df.head(self.head_count))


class TimeTail:
    param_names = ["shape", "tail_count"]
    params = [
        get_benchmark_shapes("TimeTail"),
        [5, 0.8],
    ]

    def setup(self, shape, tail_count):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)
        self.tail_count = (
            int(tail_count * len(self.df.index))
            if isinstance(tail_count, float)
            else tail_count
        )

    def time_tail(self, shape, tail_count):
        execute(self.df.tail(self.tail_count))


class TimeExplode:
    param_names = ["shape"]
    params = [
        get_benchmark_shapes("TimeExplode"),
    ]

    def setup(self, shape):
        self.df = generate_dataframe(
            "int", *shape, RAND_LOW, RAND_HIGH, gen_unique_key=True
        )

    def time_explode(self, shape):
        execute(self.df.explode("col1"))


class TimeFillnaSeries:
    param_names = ["value_type", "shape", "limit"]
    params = [
        ["scalar", "dict", "Series"],
        get_benchmark_shapes("TimeFillnaSeries"),
        [None, 0.8],
    ]

    def setup(self, value_type, shape, limit):
        self.series = gen_nan_data(*shape)

        if value_type == "scalar":
            self.value = 18.19
        elif value_type == "dict":
            self.value = {k: k * 1.23 for k in range(shape[0])}
        elif value_type == "Series":
            self.value = IMPL.Series(
                [k * 1.23 for k in range(shape[0])], index=IMPL.RangeIndex(shape[0])
            )
        else:
            assert False
        limit = int(limit * shape[0]) if limit else None
        self.kw = {"value": self.value, "limit": limit}

    def time_fillna(self, value_type, shape, limit):
        execute(self.series.fillna(**self.kw))

    def time_fillna_inplace(self, value_type, shape, limit):
        self.series.fillna(inplace=True, **self.kw)
        execute(self.series)


class TimeFillnaDataFrame:
    param_names = ["value_type", "shape", "limit"]
    params = [
        ["scalar", "dict", "DataFrame", "Series"],
        get_benchmark_shapes("TimeFillnaDataFrame"),
        [None, 0.8],
    ]

    def setup(self, value_type, shape, limit):
        self.df = gen_nan_data(*shape)
        columns = self.df.columns

        if value_type == "scalar":
            self.value = 18.19
        elif value_type == "dict":
            self.value = {k: i * 1.23 for i, k in enumerate(columns)}
        elif value_type == "Series":
            self.value = IMPL.Series(
                [i * 1.23 for i in range(len(columns))], index=columns
            )
        elif value_type == "DataFrame":
            self.value = IMPL.DataFrame(
                {
                    k: [i + j * 1.23 for j in range(shape[0])]
                    for i, k in enumerate(columns)
                },
                index=IMPL.RangeIndex(shape[0]),
                columns=columns,
            )
        else:
            assert False
        limit = int(limit * shape[0]) if limit else None
        self.kw = {"value": self.value, "limit": limit}

    def time_fillna(self, value_type, shape, limit):
        execute(self.df.fillna(**self.kw))

    def time_fillna_inplace(self, value_type, shape, limit):
        self.df.fillna(inplace=True, **self.kw)
        execute(self.df)


class BaseTimeValueCounts:
    def setup(self, shape, ngroups=5, subset=1):
        ngroups = translator_groupby_ngroups(ngroups, shape)
        self.df, self.subset = generate_dataframe(
            "int",
            *shape,
            RAND_LOW,
            RAND_HIGH,
            groupby_ncols=subset,
            count_groups=ngroups,
        )


class TimeValueCountsFrame(BaseTimeValueCounts):
    param_names = ["shape", "ngroups", "subset"]
    params = [
        get_benchmark_shapes("TimeValueCountsFrame"),
        GROUPBY_NGROUPS,
        [2, 10],
    ]

    def time_value_counts(self, *args, **kwargs):
        execute(self.df.value_counts(subset=self.subset))


class TimeValueCountsSeries(BaseTimeValueCounts):
    param_names = ["shape", "ngroups", "bins"]
    params = [
        get_benchmark_shapes("TimeValueCountsSeries"),
        GROUPBY_NGROUPS,
        [None, 3],
    ]

    def setup(self, shape, ngroups, bins):
        super().setup(ngroups=ngroups, shape=shape)
        self.df = self.df[self.subset[0]]

    def time_value_counts(self, shape, ngroups, bins):
        execute(self.df.value_counts(bins=bins))


class TimeIndexing:
    param_names = ["shape", "indexer_type"]
    params = [
        get_benchmark_shapes("TimeIndexing"),
        [
            "bool_array",
            "bool_series",
            "scalar",
            "slice",
            "continuous_slice",
            "numpy_array_take_all_values",
            "python_list_take_10_values",
            "function",
        ],
    ]

    indexer_getters = {
        "bool_array": lambda df: np.array([False, True] * (len(df) // 2)),
        # This boolean-Series is a projection of the source frame, it shouldn't
        # be reimported or triggered to execute:
        "bool_series": lambda df: df.iloc[:, 0] > 50,
        "scalar": lambda df: len(df) // 2,
        "slice": lambda df: slice(0, len(df), 2),
        "continuous_slice": lambda df: slice(len(df) // 2),
        "numpy_array_take_all_values": lambda df: np.arange(len(df)),
        "python_list_take_10_values": lambda df: list(range(min(10, len(df)))),
        "function": lambda df: (lambda df: df.index[::-2]),
    }

    def setup(self, shape, indexer_type):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)

        self.indexer = self.indexer_getters[indexer_type](self.df)
        if isinstance(self.indexer, (IMPL.Series, IMPL.DataFrame)):
            # HACK: Triggering `dtypes` meta-data computation in advance,
            # so it won't affect the `loc/iloc` time:
            self.indexer.dtypes

    def time_iloc(self, shape, indexer_type):
        # Pandas doesn't implement `df.iloc[series boolean_mask]` and raises an exception on it.
        # Replacing this with the semantically equivalent construction:
        if indexer_type != "bool_series":
            execute(self.df.iloc[self.indexer])
        else:
            execute(self.df[self.indexer])

    def time_loc(self, shape, indexer_type):
        execute(self.df.loc[self.indexer])


class TimeIndexingColumns:
    param_names = ["shape"]
    params = [get_benchmark_shapes("TimeIndexing")]

    def setup(self, shape):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)
        self.numeric_indexer = [0, 1]
        self.labels_indexer = self.df.columns[self.numeric_indexer].tolist()

    def time_iloc(self, shape):
        execute(self.df.iloc[:, self.numeric_indexer])

    def time_loc(self, shape):
        execute(self.df.loc[:, self.labels_indexer])

    def time___getitem__(self, shape):
        execute(self.df[self.labels_indexer])


class TimeMultiIndexing:
    param_names = ["shape"]
    params = [get_benchmark_shapes("TimeMultiIndexing")]

    def setup(self, shape):
        df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)

        index = IMPL.MultiIndex.from_product(
            [df.index[: shape[0] // 2], ["bar", "foo"]]
        )
        columns = IMPL.MultiIndex.from_product(
            [df.columns[: shape[1] // 2], ["buz", "fuz"]]
        )

        df.index = index
        df.columns = columns

        self.df = df.sort_index(axis=1)

    def time_multiindex_loc(self, shape):
        execute(
            self.df.loc[
                self.df.index[2] : self.df.index[-2],
                self.df.columns[2] : self.df.columns[-2],
            ]
        )


class TimeResetIndex:
    param_names = ["shape", "drop", "level"]
    params = [
        get_benchmark_shapes("TimeResetIndex"),
        [False, True],
        [None, "level_1"],
    ]

    def setup(self, shape, drop, level):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)

        if level:
            index = IMPL.MultiIndex.from_product(
                [self.df.index[: shape[0] // 2], ["bar", "foo"]],
                names=["level_1", "level_2"],
            )
            self.df.index = index

    def time_reset_index(self, shape, drop, level):
        execute(self.df.reset_index(drop=drop, level=level))


class TimeAstype:
    param_names = ["shape", "dtype", "astype_ncolumns"]
    params = [
        get_benchmark_shapes("TimeAstype"),
        ["float64", "category"],
        ["one", "all"],
    ]

    def setup(self, shape, dtype, astype_ncolumns):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)
        if astype_ncolumns == "all":
            self.astype_arg = dtype
        elif astype_ncolumns == "one":
            self.astype_arg = {"col1": dtype}
        else:
            raise ValueError(f"astype_ncolumns: {astype_ncolumns} isn't supported")

    def time_astype(self, shape, dtype, astype_ncolumns):
        execute(self.df.astype(self.astype_arg))


class TimeDescribe:
    param_names = ["shape"]
    params = [
        get_benchmark_shapes("TimeDescribe"),
    ]

    def setup(self, shape):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)

    def time_describe(self, shape):
        execute(self.df.describe())


class TimeProperties:
    param_names = ["shape"]
    params = [
        get_benchmark_shapes("TimeProperties"),
    ]

    def setup(self, shape):
        self.df = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH)

    def time_shape(self, shape):
        return self.df.shape

    def time_columns(self, shape):
        return self.df.columns

    def time_index(self, shape):
        return self.df.index


class TimeIndexingNumericSeries:
    param_names = ["shape", "dtype", "index_structure"]
    params = [
        get_benchmark_shapes("TimeIndexingNumericSeries"),
        (np.int64, np.uint64, np.float64),
        ("unique_monotonic_inc", "nonunique_monotonic_inc"),
    ]

    def setup(self, shape, dtype, index_structure):
        N = shape[0]
        indices = {
            "unique_monotonic_inc": IMPL.Index(range(N), dtype=dtype),
            "nonunique_monotonic_inc": IMPL.Index(
                list(range(N // 100)) + [(N // 100) - 1] + list(range(N // 100, N - 1)),
                dtype=dtype,
            ),
        }
        self.data = IMPL.Series(np.random.rand(N), index=indices[index_structure])
        self.array = np.arange(N // 2)
        self.index_to_query = N // 2
        self.array_list = self.array.tolist()
        execute(self.data)

    def time_getitem_scalar(self, shape, index, index_structure):
        # not calling execute as execute function fails for scalar
        self.data[self.index_to_query]

    def time_getitem_slice(self, shape, index, index_structure):
        execute(self.data[: self.index_to_query])

    def time_getitem_list_like(self, shape, index, index_structure):
        execute(self.data[[self.index_to_query]])

    def time_getitem_array(self, shape, index, index_structure):
        execute(self.data[self.array])

    def time_getitem_lists(self, shape, index, index_structure):
        execute(self.data[self.array_list])

    def time_iloc_array(self, shape, index, index_structure):
        execute(self.data.iloc[self.array])

    def time_iloc_list_like(self, shape, index, index_structure):
        execute(self.data.iloc[[self.index_to_query]])

    def time_iloc_scalar(self, shape, index, index_structure):
        # not calling execute as execute function fails for scalar
        self.data.iloc[self.index_to_query]

    def time_iloc_slice(self, shape, index, index_structure):
        execute(self.data.iloc[: self.index_to_query])

    def time_loc_array(self, shape, index, index_structure):
        execute(self.data.loc[self.array])

    def time_loc_list_like(self, shape, index, index_structure):
        execute(self.data.loc[[self.index_to_query]])

    def time_loc_scalar(self, shape, index, index_structure):
        self.data.loc[self.index_to_query]

    def time_loc_slice(self, shape, index, index_structure):
        execute(self.data.loc[: self.index_to_query])


class TimeReindex:
    param_names = ["shape"]
    params = [get_benchmark_shapes("TimeReindex")]

    def setup(self, shape):
        rows, cols = shape
        rng = IMPL.date_range(start="1/1/1970", periods=rows, freq="1min")
        self.df = IMPL.DataFrame(
            np.random.rand(rows, cols), index=rng, columns=range(cols)
        )
        self.df["foo"] = "bar"
        self.rng_subset = IMPL.Index(rng[::2])
        self.df2 = IMPL.DataFrame(
            index=range(rows), data=np.random.rand(rows, cols), columns=range(cols)
        )
        level1 = IMPL.Index(
            [f"i-{i}" for i in range(rows // 10)], dtype=object
        ).values.repeat(10)
        level2 = np.tile(
            IMPL.Index([f"i-{i}" for i in range(10)], dtype=object).values, rows // 10
        )
        index = IMPL.MultiIndex.from_arrays([level1, level2])
        self.s = IMPL.Series(np.random.randn(rows), index=index)
        self.s_subset = self.s[::2]
        self.s_subset_no_cache = self.s[::2].copy()

        mi = IMPL.MultiIndex.from_product([rng[: len(rng) // 10], range(10)])
        self.s2 = IMPL.Series(np.random.randn(len(mi)), index=mi)
        self.s2_subset = self.s2[::2].copy()
        execute(self.df), execute(self.df2)
        execute(self.s), execute(self.s_subset)
        execute(self.s2), execute(self.s2_subset)
        execute(self.s_subset_no_cache)

    def time_reindex_dates(self, shape):
        execute(self.df.reindex(self.rng_subset))

    def time_reindex_columns(self, shape):
        execute(self.df2.reindex(columns=self.df.columns[1:5]))

    def time_reindex_multiindex_with_cache(self, shape):
        # MultiIndex._values gets cached (pandas specific)
        execute(self.s.reindex(self.s_subset.index))

    def time_reindex_multiindex_no_cache(self, shape):
        # Copy to avoid MultiIndex._values getting cached (pandas specific)
        execute(self.s.reindex(self.s_subset_no_cache.index.copy()))

    def time_reindex_multiindex_no_cache_dates(self, shape):
        # Copy to avoid MultiIndex._values getting cached (pandas specific)
        execute(self.s2_subset.reindex(self.s2.index.copy()))


class TimeReindexMethod:
    params = [
        get_benchmark_shapes("TimeReindexMethod"),
        ["pad", "backfill"],
        [IMPL.date_range, IMPL.period_range],
    ]
    param_names = ["shape", "method", "constructor"]

    def setup(self, shape, method, constructor):
        N = shape[0]
        self.idx = constructor("1/1/2000", periods=N, freq="1min")
        self.ts = IMPL.Series(np.random.randn(N), index=self.idx)[::2]
        execute(self.ts)

    def time_reindex_method(self, shape, method, constructor):
        execute(self.ts.reindex(self.idx, method=method))


class TimeFillnaMethodSeries:
    params = [get_benchmark_shapes("TimeFillnaMethodSeries"), ["pad", "backfill"]]
    param_names = ["shape", "method"]

    def setup(self, shape, method):
        N = shape[0]
        self.idx = IMPL.date_range("1/1/2000", periods=N, freq="1min")
        ts = IMPL.Series(np.random.randn(N), index=self.idx)[::2]
        self.ts_reindexed = ts.reindex(self.idx)
        self.ts_float32 = self.ts_reindexed.astype("float32")
        execute(self.ts_reindexed), execute(self.ts_float32)

    def time_reindexed(self, shape, method):
        execute(self.ts_reindexed.fillna(method=method))

    def time_float_32(self, shape, method):
        execute(self.ts_float32.fillna(method=method))


class TimeFillnaMethodDataframe:
    params = [get_benchmark_shapes("TimeFillnaMethodDataframe"), ["pad", "backfill"]]
    param_names = ["shape", "method"]

    def setup(self, shape, method):
        self.idx = IMPL.date_range("1/1/2000", periods=shape[0], freq="1min")
        df_ts = IMPL.DataFrame(np.random.randn(*shape), index=self.idx)[::2]
        self.df_ts_reindexed = df_ts.reindex(self.idx)
        self.df_ts_float32 = self.df_ts_reindexed.astype("float32")
        execute(self.df_ts_reindexed), execute(self.df_ts_float32)

    def time_reindexed(self, shape, method):
        execute(self.df_ts_reindexed.fillna(method=method))

    def time_float_32(self, shape, method):
        execute(self.df_ts_float32.fillna(method=method))


class TimeLevelAlign:
    params = [get_benchmark_shapes("TimeLevelAlign")]
    param_names = ["shapes"]

    def setup(self, shapes):
        rows, cols = shapes[0]
        rows_sqrt = round(math.sqrt(rows))
        # the new number of rows may differ from the requested (slightly, so ok)
        rows = rows_sqrt * rows_sqrt
        self.index = IMPL.MultiIndex(
            levels=[np.arange(10), np.arange(rows_sqrt), np.arange(rows_sqrt)],
            codes=[
                np.arange(10).repeat(rows),
                np.tile(np.arange(rows_sqrt).repeat(rows_sqrt), 10),
                np.tile(np.tile(np.arange(rows_sqrt), rows_sqrt), 10),
            ],
        )
        self.df1 = IMPL.DataFrame(
            np.random.randn(len(self.index), cols), index=self.index
        )
        self.df2 = IMPL.DataFrame(np.random.randn(*shapes[1]))
        execute(self.df1), execute(self.df2)

    def time_align_level(self, shapes):
        left, right = self.df1.align(self.df2, level=1, copy=False)
        execute(left), execute(right)

    def time_reindex_level(self, shapes):
        # `reindex` returns the same result here as `align`.
        # Approximately the same performance is expected.
        execute(self.df2.reindex(self.index, level=1))


class TimeDropDuplicatesDataframe:
    params = [get_benchmark_shapes("TimeDropDuplicatesDataframe")]
    param_names = ["shape"]

    def setup(self, shape):
        rows, cols = shape
        N = rows // 10
        K = 10
        data = {}
        # dataframe would have cols-1 keys(strings) and one value(int) column
        for col in range(cols - 1):
            data["key" + str(col + 1)] = IMPL.Index(
                [f"i-{i}" for i in range(N)], dtype=object
            ).values.repeat(K)
        data["value"] = np.random.randn(N * K)
        self.df = IMPL.DataFrame(data)
        execute(self.df)

    def time_drop_dups(self, shape):
        execute(self.df.drop_duplicates(self.df.columns[:-1]))

    def time_drop_dups_inplace(self, shape):
        self.df.drop_duplicates(self.df.columns[:-1], inplace=True)
        execute(self.df)


class TimeDropDuplicatesSeries:
    params = [get_benchmark_shapes("TimeDropDuplicatesSeries")]
    param_names = ["shape"]

    def setup(self, shape):
        rows = shape[0]
        self.series = IMPL.Series(
            np.tile(
                IMPL.Index([f"i-{i}" for i in range(rows // 10)], dtype=object).values,
                10,
            )
        )
        execute(self.series)

    def time_drop_dups(self, shape):
        execute(self.series.drop_duplicates())

    def time_drop_dups_string(self, shape):
        self.series.drop_duplicates(inplace=True)
        execute(self.series)


class TimeDatetimeAccessor:
    params = [get_benchmark_shapes("TimeDatetimeAccessor")]
    param_names = ["shape"]

    def setup(self, shape):
        self.series = IMPL.Series(
            IMPL.timedelta_range("1 days", periods=shape[0], freq="h")
        )
        execute(self.series)

    def time_dt_accessor(self, shape):
        execute(self.series.dt)

    def time_timedelta_days(self, shape):
        execute(self.series.dt.days)

    def time_timedelta_seconds(self, shape):
        execute(self.series.dt.seconds)


class BaseCategories:
    def setup(self, shape):
        rows = shape[0]
        arr = [f"s{i:04d}" for i in np.random.randint(0, rows // 10, size=rows)]
        self.ts = IMPL.Series(arr).astype("category")
        execute(self.ts)


class TimeSetCategories(BaseCategories):
    params = [get_benchmark_shapes("TimeSetCategories")]
    param_names = ["shape"]

    def time_set_categories(self, shape):
        execute(self.ts.cat.set_categories(self.ts.cat.categories[::2]))


class TimeRemoveCategories(BaseCategories):
    params = [get_benchmark_shapes("TimeRemoveCategories")]
    param_names = ["shape"]

    def time_remove_categories(self, shape):
        execute(self.ts.cat.remove_categories(self.ts.cat.categories[::2]))


class BaseReshape:
    def setup(self, shape):
        rows, cols = shape
        k = 10
        arrays = [
            np.arange(rows // k).repeat(k),
            np.roll(np.tile(np.arange(rows // k), k), 25),
        ]
        index = IMPL.MultiIndex.from_arrays(arrays)
        self.df = IMPL.DataFrame(np.random.randn(rows, cols), index=index)
        execute(self.df)


class TimeStack(BaseReshape):
    params = [get_benchmark_shapes("TimeStack")]
    param_names = ["shape"]

    def setup(self, shape):
        super().setup(shape)
        self.udf = self.df.unstack(1)
        execute(self.udf)

    def time_stack(self, shape):
        execute(self.udf.stack())


class TimeUnstack(BaseReshape):
    params = [get_benchmark_shapes("TimeUnstack")]
    param_names = ["shape"]

    def time_unstack(self, shape):
        execute(self.df.unstack(1))


class TimeReplace:
    params = [get_benchmark_shapes("TimeReplace")]
    param_names = ["shape"]

    def setup(self, shape):
        rows, cols = shape
        self.to_replace = {i: getattr(IMPL, "Timestamp")(i) for i in range(rows)}
        self.df = IMPL.DataFrame(np.random.randint(rows, size=(rows, cols)))
        execute(self.df)

    def time_replace(self, shape):
        execute(self.df.replace(self.to_replace))


class TimeGroups:
    params = [get_benchmark_shapes("TimeGroups")]
    param_names = ["shape"]

    def setup(self, shape):
        self.series = IMPL.Series(np.random.randint(0, 100, size=shape[0]))
        execute(self.series)

    # returns a pretty dict thus not calling execute
    def time_series_groups(self, shape):
        self.series.groupby(self.series).groups

    # returns a dict thus not calling execute
    def time_series_indices(self, shape):
        self.series.groupby(self.series).indices


class TimeRepr:
    params = [get_benchmark_shapes("TimeRepr")]
    param_names = ["shape"]

    def setup(self, shape):
        self.df = IMPL.DataFrame(np.random.randn(*shape))
        execute(self.df)

    # returns a string thus not calling execute
    def time_repr(self, shape):
        repr(self.df)


class TimeMaskBool:
    params = [get_benchmark_shapes("TimeMaskBool")]
    param_names = ["shape"]

    def setup(self, shape):
        self.df = IMPL.DataFrame(np.random.randn(*shape))
        self.mask = self.df < 0
        execute(self.df), execute(self.mask)

    def time_frame_mask(self, shape):
        execute(self.df.mask(self.mask))


class TimeIsnull:
    params = [get_benchmark_shapes("TimeIsnull")]
    param_names = ["shape"]

    def setup(self, shape):
        sample = np.array([np.nan, 1.0])
        data = np.random.choice(sample, (shape[0], shape[1]))
        self.df = IMPL.DataFrame(data)
        execute(self.df)

    def time_isnull(self, shape):
        execute(IMPL.isnull(self.df))


class TimeDropna:
    params = (["all", "any"], [0, 1], get_benchmark_shapes("TimeDropna"))
    param_names = ["how", "axis", "shape"]

    def setup(self, how, axis, shape):
        row, col = shape
        self.df = IMPL.DataFrame(np.random.randn(row, col))
        self.df.iloc[row // 20 : row // 10, col // 3 : col // 2] = np.nan
        self.df["foo"] = "bar"
        execute(self.df)

    def time_dropna(self, how, axis, shape):
        execute(self.df.dropna(how=how, axis=axis))


class TimeEquals:
    params = [get_benchmark_shapes("TimeEquals")]
    param_names = ["shape"]

    def setup(self, shape):
        self.df = IMPL.DataFrame(np.random.randn(*shape))
        self.df.iloc[-1, -1] = np.nan
        execute(self.df)

    # returns a boolean thus not calling execute
    def time_frame_float_equal(self, shape):
        self.df.equals(self.df)


from .utils import setup  # noqa: E402, F401


================================================
FILE: asv_bench/benchmarks/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""IO Modin benchmarks."""


================================================
FILE: asv_bench/benchmarks/io/csv.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np

from ..utils import (
    ASV_USE_IMPL,
    IMPL,
    RAND_HIGH,
    RAND_LOW,
    execute,
    generate_dataframe,
    get_benchmark_shapes,
    get_shape_id,
    prepare_io_data,
)


class BaseReadCsv:
    # test data file should be created only once
    def setup_cache(self, test_filename="io_test_file"):
        test_filenames = prepare_io_data(
            test_filename, self.data_type, get_benchmark_shapes(self.__class__.__name__)
        )
        return test_filenames

    def setup(self, test_filenames, shape, *args, **kwargs):
        # ray init
        if ASV_USE_IMPL == "modin":
            IMPL.DataFrame([])
        self.shape_id = get_shape_id(shape)


class TimeReadCsvSkiprows(BaseReadCsv):
    shapes = get_benchmark_shapes("TimeReadCsvSkiprows")
    skiprows_mapping = {
        "lambda_even_rows": lambda x: x % 2,
        "range_uniform": np.arange(1, shapes[0][0] // 10),
        "range_step2": np.arange(1, shapes[0][0], 2),
    }
    data_type = "str_int"

    param_names = ["shape", "skiprows"]
    params = [
        shapes,
        [None, "lambda_even_rows", "range_uniform", "range_step2"],
    ]

    def setup(self, test_filenames, shape, skiprows):
        super().setup(test_filenames, shape, skiprows)
        self.skiprows = self.skiprows_mapping[skiprows] if skiprows else None

    def time_skiprows(self, test_filenames, shape, skiprows):
        execute(IMPL.read_csv(test_filenames[self.shape_id], skiprows=self.skiprows))


class TimeReadCsvTrueFalseValues(BaseReadCsv):
    data_type = "true_false_int"

    param_names = ["shape"]
    params = [get_benchmark_shapes("TimeReadCsvTrueFalseValues")]

    def time_true_false_values(self, test_filenames, shape):
        execute(
            IMPL.read_csv(
                test_filenames[self.shape_id],
                true_values=["Yes", "true"],
                false_values=["No", "false"],
            ),
        )


class TimeReadCsvNamesDtype:
    shapes = get_benchmark_shapes("TimeReadCsvNamesDtype")
    _dtypes_params = ["Int64", "Int64_Timestamp"]
    _timestamp_columns = ["col1", "col2"]

    param_names = ["shape", "names", "dtype"]
    params = [
        shapes,
        ["array-like"],
        _dtypes_params,
    ]

    def _get_file_id(self, shape, dtype):
        return get_shape_id(shape) + dtype

    def _add_timestamp_columns(self, df):
        df = df.copy()
        date_column = IMPL.date_range("2000", periods=df.shape[0], freq="ms")
        for col in self._timestamp_columns:
            df[col] = date_column
        return df

    def setup_cache(self, test_filename="io_test_file_csv_names_dtype"):
        # filenames with a metadata of saved dataframes
        cache = {}
        for shape in self.shapes:
            for dtype in self._dtypes_params:
                df = generate_dataframe(
                    "int", *shape, RAND_LOW, RAND_HIGH, impl="pandas"
                )
                if dtype == "Int64_Timestamp":
                    df = self._add_timestamp_columns(df)

                file_id = self._get_file_id(shape, dtype)
                cache[file_id] = (
                    f"{test_filename}_{file_id}.csv",
                    df.columns.to_list(),
                    df.dtypes.to_dict(),
                )
                df.to_csv(cache[file_id][0], index=False)
        return cache

    def setup(self, cache, shape, names, dtype):
        # ray init
        if ASV_USE_IMPL == "modin":
            IMPL.DataFrame([])
        file_id = self._get_file_id(shape, dtype)
        self.filename, self.names, self.dtype = cache[file_id]

        self.parse_dates = None
        if dtype == "Int64_Timestamp":
            # cached version of dtype should not change
            self.dtype = self.dtype.copy()
            for col in self._timestamp_columns:
                del self.dtype[col]
            self.parse_dates = self._timestamp_columns

    def time_read_csv_names_dtype(self, cache, shape, names, dtype):
        execute(
            IMPL.read_csv(
                self.filename,
                names=self.names,
                header=0,
                dtype=self.dtype,
                parse_dates=self.parse_dates,
            )
        )


from ..utils import setup  # noqa: E402, F401


================================================
FILE: asv_bench/benchmarks/io/parquet.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from ..utils import (
    ASV_USE_IMPL,
    IMPL,
    execute,
    get_benchmark_shapes,
    get_shape_id,
    prepare_io_data_parquet,
)


class TimeReadParquet:
    shapes = get_benchmark_shapes("TimeReadParquet")
    data_type = "str_int"

    param_names = ["shape"]
    params = [
        shapes,
    ]

    # test data file should be created only once
    def setup_cache(self, test_filename="io_test_file"):
        test_filenames = prepare_io_data_parquet(
            test_filename, self.data_type, get_benchmark_shapes(self.__class__.__name__)
        )
        return test_filenames

    def setup(self, test_filenames, shape):
        # ray init
        if ASV_USE_IMPL == "modin":
            IMPL.DataFrame([])
        self.shape_id = get_shape_id(shape)

    def time_read_parquet(self, test_filenames, shape):
        execute(
            IMPL.read_parquet(
                test_filenames[self.shape_id],
            )
        )


from ..utils import setup  # noqa: E402, F401


================================================
FILE: asv_bench/benchmarks/scalability/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Benchmarks measuring how Modin performance scales when MODIN_CPUS are changed."""


================================================
FILE: asv_bench/benchmarks/scalability/scalability_benchmarks.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""These benchmarks are supposed to be run only for modin, since they do not make sense for pandas."""

import modin.pandas as pd

try:
    from modin.pandas.io import from_pandas
except ImportError:
    from modin.pandas.utils import from_pandas

try:
    from modin.pandas.io import to_numpy, to_pandas
except ImportError:
    try:
        from modin.utils import to_numpy, to_pandas
    except ImportError:
        # This provides compatibility with older versions of the Modin, allowing us to test old commits.
        from modin.pandas.utils import to_pandas

import pandas

from ..utils import (
    RAND_HIGH,
    RAND_LOW,
    execute,
    gen_data,
    generate_dataframe,
    get_benchmark_shapes,
)


class TimeFromPandas:
    param_names = ["shape", "cpus"]
    params = [
        get_benchmark_shapes("TimeFromPandas"),
        [4, 16, 32],
    ]

    def setup(self, shape, cpus):
        self.data = pandas.DataFrame(gen_data("int", *shape, RAND_LOW, RAND_HIGH))
        from modin.config import NPartitions

        NPartitions.get = lambda: cpus
        # trigger ray init
        pd.DataFrame([])

    def time_from_pandas(self, shape, cpus):
        execute(from_pandas(self.data))


class TimeToPandas:
    param_names = ["shape", "cpus"]
    params = [
        get_benchmark_shapes("TimeToPandas"),
        [4, 16, 32],
    ]

    def setup(self, shape, cpus):
        from modin.config import NPartitions

        NPartitions.get = lambda: cpus
        self.data = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH, impl="modin")

    def time_to_pandas(self, shape, cpus):
        # to_pandas is already synchronous
        to_pandas(self.data)


class TimeToNumPy:
    param_names = ["shape", "cpus"]
    params = [
        get_benchmark_shapes("TimeToNumPy"),
        [4, 16, 32],
    ]

    def setup(self, shape, cpus):
        from modin.config import NPartitions

        NPartitions.get = lambda: cpus
        self.data = generate_dataframe("int", *shape, RAND_LOW, RAND_HIGH, impl="modin")

    def time_to_numpy(self, shape, cpus):
        # to_numpy is already synchronous
        to_numpy(self.data)


from ..utils import setup  # noqa: E402, F401


================================================
FILE: asv_bench/benchmarks/utils/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin benchmarks utils."""

from .common import (
    IMPL,
    execute,
    gen_data,
    gen_nan_data,
    generate_dataframe,
    get_shape_id,
    prepare_io_data,
    prepare_io_data_parquet,
    random_booleans,
    random_columns,
    random_string,
    setup,
    translator_groupby_ngroups,
)
from .compatibility import ASV_USE_IMPL, ASV_USE_STORAGE_FORMAT
from .data_shapes import GROUPBY_NGROUPS, RAND_HIGH, RAND_LOW, get_benchmark_shapes

__all__ = [
    "ASV_USE_IMPL",
    "ASV_USE_STORAGE_FORMAT",
    "RAND_LOW",
    "RAND_HIGH",
    "GROUPBY_NGROUPS",
    "get_benchmark_shapes",
    "IMPL",
    "execute",
    "get_shape_id",
    "gen_data",
    "gen_nan_data",
    "generate_dataframe",
    "prepare_io_data",
    "prepare_io_data_parquet",
    "random_string",
    "random_columns",
    "random_booleans",
    "translator_groupby_ngroups",
    "setup",
]


================================================
FILE: asv_bench/benchmarks/utils/common.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
The module contains the functionality that is used when benchmarking Modin commits.

In the case of using utilities from the main Modin code, there is a chance that when
benchmarking old commits, the utilities changed, which in turn can unexpectedly affect
the performance results, hence some utility functions are duplicated here.
"""

import logging
import uuid
from typing import Optional, Union

import numpy as np
import pandas

import modin.pandas

from .compatibility import ASV_DATASET_SIZE, ASV_USE_ENGINE, ASV_USE_IMPL
from .data_shapes import RAND_HIGH, RAND_LOW

POSSIBLE_IMPL = {
    "modin": modin.pandas,
    "pandas": pandas,
}
IMPL = POSSIBLE_IMPL[ASV_USE_IMPL]


def translator_groupby_ngroups(groupby_ngroups: Union[str, int], shape: tuple) -> int:
    """
    Translate a string representation of the number of groups, into a number.

    Parameters
    ----------
    groupby_ngroups : str or int
        Number of groups that will be used in `groupby` operation.
    shape : tuple
        Same as pandas.Dataframe.shape.

    Returns
    -------
    int
    """
    if ASV_DATASET_SIZE == "big":
        if groupby_ngroups == "huge_amount_groups":
            return min(shape[0] // 2, 5000)
        return groupby_ngroups
    else:
        return groupby_ngroups


class weakdict(dict):  # noqa: GL08
    __slots__ = ("__weakref__",)


data_cache = dict()
dataframes_cache = dict()


def gen_nan_data(nrows: int, ncols: int) -> dict:
    """
    Generate nan data with caching.

    The generated data are saved in the dictionary and on a subsequent call,
    if the keys match, saved data will be returned. Therefore, we need
    to carefully monitor the changing of saved data and make its copy if needed.

    Parameters
    ----------
    nrows : int
        Number of rows.
    ncols : int
        Number of columns.

    Returns
    -------
    modin.pandas.DataFrame or pandas.DataFrame or modin.pandas.Series or pandas.Series
        DataFrame or Series with shape (nrows, ncols) or (nrows,), respectively.
    """
    cache_key = (ASV_USE_IMPL, nrows, ncols)
    if cache_key in data_cache:
        return data_cache[cache_key]

    logging.info("Generating nan data {} rows and {} columns".format(nrows, ncols))

    if ncols > 1:
        columns = [f"col{x}" for x in range(ncols)]
        data = IMPL.DataFrame(np.nan, index=IMPL.RangeIndex(nrows), columns=columns)
    elif ncols == 1:
        data = IMPL.Series(np.nan, index=IMPL.RangeIndex(nrows))
    else:
        assert False, "Number of columns (ncols) should be >= 1"

    data_cache[cache_key] = data
    return data


def gen_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> dict:
    """
    Generate int data.

    Parameters
    ----------
    nrows : int
        Number of rows.
    ncols : int
        Number of columns.
    rand_low : int
        Low bound for random generator.
    rand_high : int
        High bound for random generator.

    Returns
    -------
    dict
        Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.
    """
    data = {
        "col{}".format(i): np.random.randint(rand_low, rand_high, size=(nrows))
        for i in range(ncols)
    }
    return data


def gen_str_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> dict:
    """
    Generate int data and string data.

    Parameters
    ----------
    nrows : int
        Number of rows.
    ncols : int
        Number of columns.
    rand_low : int
        Low bound for random generator.
    rand_high : int
        High bound for random generator.

    Returns
    -------
    dict
        Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.
        One of the columns with string values.
    """
    data = gen_int_data(nrows, ncols, rand_low, rand_high).copy()
    # convert values in arbitary column to string type
    key = list(data.keys())[0]
    data[key] = [f"str_{x}" for x in data[key]]
    return data


def gen_true_false_int_data(nrows, ncols, rand_low, rand_high):
    """
    Generate int data and string data "true" and "false" values.

    Parameters
    ----------
    nrows : int
        Number of rows.
    ncols : int
        Number of columns.
    rand_low : int
        Low bound for random generator.
    rand_high : int
        High bound for random generator.

    Returns
    -------
    dict
        Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.
        One half of the columns with integer values, another half - with "true" and
        "false" string values.
    """
    data = gen_int_data(nrows // 2, ncols // 2, rand_low, rand_high)

    data_true_false = {
        "tf_col{}".format(i): np.random.choice(
            ["Yes", "true", "No", "false"], size=(nrows - nrows // 2)
        )
        for i in range(ncols - ncols // 2)
    }
    data.update(data_true_false)
    return data


def gen_data(
    data_type: str,
    nrows: int,
    ncols: int,
    rand_low: int,
    rand_high: int,
) -> dict:
    """
    Generate data with caching.

    The generated data are saved in the dictionary and on a subsequent call,
    if the keys match, saved data will be returned. Therefore, we need
    to carefully monitor the changing of saved data and make its copy if needed.

    Parameters
    ----------
    data_type : {"int", "str_int", "true_false_int"}
        Type of data generation.
    nrows : int
        Number of rows.
    ncols : int
        Number of columns.
    rand_low : int
        Low bound for random generator.
    rand_high : int
        High bound for random generator.

    Returns
    -------
    dict
        Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.

    Notes
    -----
    Returned data type depends on the `data_type` parameter in the next way:
    - `data_type`=="int" - all columns will be contain only integer values;
    - `data_type`=="str_int" some of the columns will be of string type;
    - `data_type`=="true_false_int" half of the columns will be filled with
      string values representing "true" and "false" values and another half - with
      integers.
    """
    type_to_generator = {
        "int": gen_int_data,
        "str_int": gen_str_int_data,
        "true_false_int": gen_true_false_int_data,
    }
    cache_key = (data_type, nrows, ncols, rand_low, rand_high)
    if cache_key in data_cache:
        return data_cache[cache_key]

    logging.info(
        "Generating {} data {} rows and {} columns [{}-{}]".format(
            data_type, nrows, ncols, rand_low, rand_high
        )
    )
    assert data_type in type_to_generator
    data_generator = type_to_generator[data_type]

    data = data_generator(nrows, ncols, rand_low, rand_high)
    data_cache[cache_key] = weakdict(data)

    return data


def generate_dataframe(
    data_type: str,
    nrows: int,
    ncols: int,
    rand_low: int,
    rand_high: int,
    groupby_ncols: Optional[int] = None,
    count_groups: Optional[int] = None,
    gen_unique_key: bool = False,
    cache_prefix: str = None,
    impl: str = None,
) -> Union[modin.pandas.DataFrame, pandas.DataFrame]:
    """
    Generate DataFrame with caching.

    The generated dataframes are saved in the dictionary and on a subsequent call,
    if the keys match, one of the saved dataframes will be returned. Therefore, we need
    to carefully monitor that operations that change the dataframe work with its copy.

    Parameters
    ----------
    data_type : str
        Type of data generation;
        supported types: {"int", "str_int"}.
    nrows : int
        Number of rows.
    ncols : int
        Number of columns.
    rand_low : int
        Low bound for random generator.
    rand_high : int
        High bound for random generator.
    groupby_ncols : int, default: None
        Number of columns for which `groupby` will be called in the future;
        to get more stable performance results, we need to have the same number of values
        in each group every benchmarking time.
    count_groups : int, default: None
        Count of groups in groupby columns.
    gen_unique_key : bool, default: False
        Generate `col1` column where all elements are unique.
    cache_prefix : str, optional
        Prefix to add to the cache key of the requested frame.
    impl : str, optional
        Implementation used to create the dataframe;
        supported implemetations: {"modin", "pandas"}.

    Returns
    -------
    modin.pandas.DataFrame or pandas.DataFrame [and list]

    Notes
    -----
    The list of groupby columns names returns when groupby columns are generated.
    """
    assert not (
        (groupby_ncols is None) ^ (count_groups is None)
    ), "You must either specify both parameters 'groupby_ncols' and 'count_groups' or none of them."

    if groupby_ncols and count_groups:
        ncols -= groupby_ncols

    if impl is None:
        impl = ASV_USE_IMPL

    cache_key = (
        impl,
        data_type,
        nrows,
        ncols,
        rand_low,
        rand_high,
        groupby_ncols,
        count_groups,
        gen_unique_key,
    )

    if cache_prefix is not None:
        cache_key = (cache_prefix, *cache_key)

    if cache_key in dataframes_cache:
        return dataframes_cache[cache_key]

    logging.info(
        "Allocating {} DataFrame {}: {} rows and {} columns [{}-{}]".format(
            impl, data_type, nrows, ncols, rand_low, rand_high
        )
    )
    data = gen_data(data_type, nrows, ncols, rand_low, rand_high)

    if groupby_ncols and count_groups:
        groupby_columns = [f"groupby_col{x}" for x in range(groupby_ncols)]
        for groupby_col in groupby_columns:
            data[groupby_col] = np.tile(np.arange(count_groups), nrows // count_groups)

    if gen_unique_key:
        data["col1"] = np.arange(nrows)

    df = POSSIBLE_IMPL[impl].DataFrame(data)

    if groupby_ncols and count_groups:
        dataframes_cache[cache_key] = df, groupby_columns
        return df, groupby_columns

    dataframes_cache[cache_key] = df
    return df


def random_string() -> str:
    """
    Create a 36-character random string.

    Returns
    -------
    str
    """
    return str(uuid.uuid4())


def random_columns(df_columns: list, columns_number: int) -> list:
    """
    Pick sublist of random columns from a given sequence.

    Parameters
    ----------
    df_columns : list
        Columns to choose from.
    columns_number : int
        How many columns to pick.

    Returns
    -------
    list
    """
    return list(np.random.choice(df_columns, size=columns_number))


def random_booleans(number: int) -> list:
    """
    Create random list of booleans with `number` elements.

    Parameters
    ----------
    number : int
        Count of booleans in result list.

    Returns
    -------
    list
    """
    return list(np.random.choice([True, False], size=number))


def execute(df: Union[modin.pandas.DataFrame, pandas.DataFrame]):
    """
    Make sure the calculations are finished.

    Parameters
    ----------
    df : modin.pandas.DataFrame or pandas.Datarame
        DataFrame to be executed.
    """
    if ASV_USE_IMPL == "modin":
        partitions = df._query_compiler._modin_frame._partitions.flatten()
        mgr_cls = df._query_compiler._modin_frame._partition_mgr_cls
        if len(partitions) and hasattr(mgr_cls, "wait_partitions"):
            mgr_cls.wait_partitions(partitions)
            return

        # compatibility with old Modin versions
        all(
            map(
                lambda partition: partition.drain_call_queue() or True,
                partitions,
            )
        )
        if ASV_USE_ENGINE == "ray":
            from ray import wait

            all(map(lambda partition: wait([partition._data]), partitions))
        elif ASV_USE_ENGINE == "dask":
            from dask.distributed import wait

            all(map(lambda partition: wait(partition._data), partitions))
        elif ASV_USE_ENGINE == "python":
            pass

    elif ASV_USE_IMPL == "pandas":
        pass


def get_shape_id(shape: tuple) -> str:
    """
    Join shape numbers into a string with `_` delimiters.

    Parameters
    ----------
    shape : tuple
        Same as pandas.Dataframe.shape.

    Returns
    -------
    str
    """
    return "_".join([str(element) for element in shape])


def prepare_io_data(test_filename: str, data_type: str, shapes: list):
    """
    Prepare data for IO tests with caching.

    Parameters
    ----------
    test_filename : str
        Unique file identifier that is used to distinguish data
        for different tests.
    data_type : {"int", "str_int", "true_false_int"}
        Type of data generation.
    shapes : list
        Data shapes to prepare.

    Returns
    -------
    test_filenames : dict
        Dictionary that maps dataset shape to the file on disk.
    """
    test_filenames = {}
    for shape in shapes:
        shape_id = get_shape_id(shape)
        test_filenames[shape_id] = f"{test_filename}_{shape_id}_{data_type}.csv"
        df = generate_dataframe(data_type, *shape, RAND_LOW, RAND_HIGH, impl="pandas")
        df.to_csv(test_filenames[shape_id], index=False)

    return test_filenames


def prepare_io_data_parquet(test_filename: str, data_type: str, shapes: list):
    """
    Prepare data for IO tests with caching.

    Parameters
    ----------
    test_filename : str
        Unique file identifier that is used to distinguish data
        for different tests.
    data_type : "str_int"
        Type of data generation.
    shapes : list
        Data shapes to prepare.

    Returns
    -------
    test_filenames : dict
        Dictionary that maps dataset shape to the file on disk.
    """
    test_filenames = {}
    for shape in shapes:
        shape_id = get_shape_id(shape)
        test_filenames[shape_id] = f"{test_filename}_{shape_id}_{data_type}.parquet"
        df = generate_dataframe(data_type, *shape, RAND_LOW, RAND_HIGH, impl="pandas")
        df.to_parquet(test_filenames[shape_id], index=False)

    return test_filenames


def setup(*args, **kwargs):  # noqa: GL08
    # This function just needs to be imported into each benchmark file to
    # set up the random seed before each function. ASV run it automatically.
    # https://asv.readthedocs.io/en/latest/writing_benchmarks.html
    np.random.seed(42)


================================================
FILE: asv_bench/benchmarks/utils/compatibility.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Compatibility layer for parameters used by ASV."""

import os

import modin.pandas as pd

try:
    from modin.config import NPartitions

    NPARTITIONS = NPartitions.get()
except ImportError:
    NPARTITIONS = pd.DEFAULT_NPARTITIONS

try:
    from modin.config import AsvImplementation, Engine, StorageFormat, TestDatasetSize

    ASV_USE_IMPL = AsvImplementation.get()
    ASV_DATASET_SIZE = TestDatasetSize.get() or "Small"
    ASV_USE_ENGINE = Engine.get()
    ASV_USE_STORAGE_FORMAT = StorageFormat.get()
except ImportError:
    # The same benchmarking code can be run for different versions of Modin, so in
    # case of an error importing important variables, we'll just use predefined values
    ASV_USE_IMPL = os.environ.get("MODIN_ASV_USE_IMPL", "modin")
    ASV_DATASET_SIZE = os.environ.get("MODIN_TEST_DATASET_SIZE", "Small")
    ASV_USE_ENGINE = os.environ.get("MODIN_ENGINE", "Ray")
    ASV_USE_STORAGE_FORMAT = os.environ.get("MODIN_STORAGE_FORMAT", "Pandas")

ASV_USE_IMPL = ASV_USE_IMPL.lower()
ASV_DATASET_SIZE = ASV_DATASET_SIZE.lower()
ASV_USE_ENGINE = ASV_USE_ENGINE.lower()
ASV_USE_STORAGE_FORMAT = ASV_USE_STORAGE_FORMAT.lower()

assert ASV_USE_IMPL in ("modin", "pandas")
assert ASV_DATASET_SIZE in ("big", "small")
assert ASV_USE_ENGINE in ("ray", "dask", "python", "unidist")
assert ASV_USE_STORAGE_FORMAT in ("pandas")


================================================
FILE: asv_bench/benchmarks/utils/data_shapes.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Define data shapes."""

import json
import os

from .compatibility import ASV_DATASET_SIZE

RAND_LOW = 0
RAND_HIGH = 100

BINARY_OP_DATA_SIZE = {
    "big": [
        [[5000, 5000], [5000, 5000]],
        # the case extremely inefficient
        # [[20, 500_000], [10, 1_000_000]],
        [[500_000, 20], [1_000_000, 10]],
    ],
    "small": [[[250, 250], [250, 250]], [[10_000, 20], [25_000, 10]]],
}
UNARY_OP_DATA_SIZE = {
    "big": [
        [5000, 5000],
        # the case extremely inefficient
        # [10, 1_000_000],
        [1_000_000, 10],
    ],
    "small": [[250, 250], [10_000, 10]],
}
SERIES_DATA_SIZE = {
    "big": [[100_000, 1]],
    "small": [[10_000, 1]],
}
BINARY_OP_SERIES_DATA_SIZE = {
    "big": [
        [[500_000, 1], [1_000_000, 1]],
        [[500_000, 1], [500_000, 1]],
    ],
    "small": [[[5_000, 1], [10_000, 1]]],
}


DEFAULT_GROUPBY_NGROUPS = {
    "big": [100, "huge_amount_groups"],
    "small": [5],
}
GROUPBY_NGROUPS = DEFAULT_GROUPBY_NGROUPS[ASV_DATASET_SIZE]

_DEFAULT_CONFIG_T = [
    (
        UNARY_OP_DATA_SIZE[ASV_DATASET_SIZE],
        [
            # Pandas storage format benchmarks
            "TimeGroupByMultiColumn",
            "TimeGroupByDefaultAggregations",
            "TimeGroupByDictionaryAggregation",
            "TimeSetItem",
            "TimeInsert",
            "TimeArithmetic",
            "TimeSortValues",
            "TimeDrop",
            "TimeHead",
            "TimeTail",
            "TimeExplode",
            "TimeFillna",
            "TimeFillnaDataFrame",
            "TimeValueCountsFrame",
            "TimeValueCountsSeries",
            "TimeIndexing",
            "TimeMultiIndexing",
            "TimeResetIndex",
            "TimeAstype",
            "TimeDescribe",
            "TimeProperties",
            "TimeReindex",
            "TimeReindexMethod",
            "TimeFillnaMethodDataframe",
            "TimeDropDuplicatesDataframe",
            "TimeStack",
            "TimeUnstack",
            "TimeRepr",
            "TimeMaskBool",
            "TimeIsnull",
            "TimeDropna",
            "TimeEquals",
            # IO benchmarks
            "TimeReadCsvSkiprows",
            "TimeReadCsvTrueFalseValues",
            "TimeReadCsvNamesDtype",
            "TimeReadParquet",
            # Scalability benchmarks
            "TimeFromPandas",
            "TimeToPandas",
            "TimeToNumPy",
        ],
    ),
    (
        BINARY_OP_DATA_SIZE[ASV_DATASET_SIZE],
        [
            # Pandas storage format benchmarks
            "TimeJoin",
            "TimeMerge",
            "TimeMergeDefault",
            "TimeConcat",
            "TimeAppend",
            "TimeBinaryOp",
            "TimeLevelAlign",
        ],
    ),
    (
        SERIES_DATA_SIZE[ASV_DATASET_SIZE],
        [
            # Pandas storage format benchmarks
            "TimeFillnaSeries",
            "TimeGroups",
            "TimeIndexingNumericSeries",
            "TimeFillnaMethodSeries",
            "TimeDatetimeAccessor",
            "TimeSetCategories",
            "TimeRemoveCategories",
            "TimeDropDuplicatesSeries",
        ],
    ),
    (
        BINARY_OP_SERIES_DATA_SIZE[ASV_DATASET_SIZE],
        [
            # Pandas storage format benchmarks
            "TimeBinaryOpSeries",
        ],
    ),
]

DEFAULT_CONFIG = {}
DEFAULT_CONFIG["MergeCategoricals"] = (
    [[10_000, 2]] if ASV_DATASET_SIZE == "big" else [[1_000, 2]]
)
DEFAULT_CONFIG["TimeJoinStringIndex"] = (
    [[100_000, 64]] if ASV_DATASET_SIZE == "big" else [[1_000, 4]]
)
DEFAULT_CONFIG["TimeReplace"] = (
    [[10_000, 2]] if ASV_DATASET_SIZE == "big" else [[1_000, 2]]
)
for config in (_DEFAULT_CONFIG_T,):
    for _shape, _names in config:
        DEFAULT_CONFIG.update({_name: _shape for _name in _names})

# Correct shapes in the case when the operation ended with a timeout error
if ASV_DATASET_SIZE == "big":
    DEFAULT_CONFIG["TimeMergeDefault"] = [
        [[1000, 1000], [1000, 1000]],
        [[500_000, 20], [1_000_000, 10]],
    ]
    DEFAULT_CONFIG["TimeLevelAlign"] = [
        [[2500, 2500], [2500, 2500]],
        [[250_000, 20], [500_000, 10]],
    ]
    DEFAULT_CONFIG["TimeStack"] = [
        [1500, 1500],
        [100_000, 10],
    ]
    DEFAULT_CONFIG["TimeUnstack"] = DEFAULT_CONFIG["TimeStack"]

CONFIG_FROM_FILE = None


def get_benchmark_shapes(bench_id: str):
    """
    Get custom benchmark shapes from a json file stored in MODIN_ASV_DATASIZE_CONFIG.

    If `bench_id` benchmark is not found in the file, then the default value will
    be used.

    Parameters
    ----------
    bench_id : str
        Unique benchmark identifier that is used to get shapes.

    Returns
    -------
    list
        Benchmark shapes.
    """
    global CONFIG_FROM_FILE
    if not CONFIG_FROM_FILE:
        try:
            from modin.config import AsvDataSizeConfig

            filename = AsvDataSizeConfig.get()
        except ImportError:
            filename = os.environ.get("MODIN_ASV_DATASIZE_CONFIG", None)
        if filename:
            # should be json
            with open(filename) as _f:
                CONFIG_FROM_FILE = json.load(_f)

    if CONFIG_FROM_FILE and bench_id in CONFIG_FROM_FILE:
        return CONFIG_FROM_FILE[bench_id]
    return DEFAULT_CONFIG[bench_id]


================================================
FILE: asv_bench/test/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: asv_bench/test/test_utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from unittest.mock import Mock, mock_open, patch

import numpy as np
import pytest
from benchmarks.utils import data_shapes, execute, get_benchmark_shapes

import modin.pandas as pd
from modin.config import AsvDataSizeConfig


@pytest.mark.parametrize(
    "asv_config_content, result",
    [
        (
            '{"TimeJoin": [[[10, 10], [15, 15]], [[11, 11], [13, 13]]], \
                "TimeGroupBy": [[11, 11], [13, 13]]}',
            [
                [
                    # binary shapes
                    [[10, 10], [15, 15]],
                    [[11, 11], [13, 13]],
                ],
                [
                    # unary shapes
                    [11, 11],
                    [13, 13],
                ],
            ],
        ),
    ],
)
@patch.object(data_shapes, "CONFIG_FROM_FILE", new=None)
def test_get_benchmark_shapes(asv_config_content, result):
    AsvDataSizeConfig.put("mock_filename")
    with patch("builtins.open", mock_open(read_data=asv_config_content)):
        assert result[0] == get_benchmark_shapes("TimeJoin")
        assert result[1] == get_benchmark_shapes("TimeGroupBy")


@pytest.mark.parametrize(
    "asv_config_content, result",
    [
        (
            '{"TimeJoin": [[[10, 10], [15, 15]]]',
            [[100, 100]],
        ),
    ],
)
@patch.object(data_shapes, "CONFIG_FROM_FILE", new=None)
def test_get_benchmark_shapes_default(asv_config_content, result):
    AsvDataSizeConfig.put(None)
    with patch.object(data_shapes, "DEFAULT_CONFIG", new={"TimeJoin": result}):
        assert result == get_benchmark_shapes("TimeJoin")


def test_execute():
    df = pd.DataFrame(np.random.rand(100, 64))
    partitions = df._query_compiler._modin_frame._partitions.flatten()
    mgr_cls = df._query_compiler._modin_frame._partition_mgr_cls
    with patch.object(mgr_cls, "wait_partitions", new=Mock()):
        execute(df)
        mgr_cls.wait_partitions.assert_called_once()
        assert (mgr_cls.wait_partitions.call_args[0] == partitions).all()


================================================
FILE: ci/teamcity/Dockerfile.teamcity-ci
================================================
# Create images from this container like this (in modin repo root):
#
# git rev-parse HEAD > ci/teamcity/git-rev
#
# tar cf ci/teamcity/modin.tar .
#
# docker build --build-arg ENVIRONMENT=environment-dev.yml -t modin-project/teamcity-ci:${BUILD_NUMBER} -f ci/teamcity/Dockerfile.teamcity-ci ci/teamcity

FROM rayproject/ray:latest

ARG ENVIRONMENT=environment-dev.yml

ADD modin.tar /modin
ADD git-rev /modin/git-rev

WORKDIR /modin
RUN sudo chown -R ray /modin

# Make RUN commands use `bash --login`:
SHELL ["/bin/bash", "--login", "-c"]

# Initialize conda in bash config files:
RUN conda init bash
ENV PATH /home/ray/anaconda3/envs/modin/bin:$PATH

RUN conda config --set channel_priority strict
RUN conda update python -y
RUN conda env create -f ${ENVIRONMENT}
RUN conda install curl PyGithub

# Activate the environment, and make sure it's activated:
# The following line also removed conda initialization from
# ~/.bashrc so conda starts complaining that it should be
# initialized for bash. But it is necessary to do it because
# activation is not always executed when "docker exec" is used
# and then conda initialization overwrites PATH with its base
# environment where python doesn't have any packages installed.
RUN echo "conda activate modin" > ~/.bashrc
RUN echo "Make sure environment is activated"
RUN conda list -n modin


================================================
FILE: ci/teamcity/build-docker.py
================================================
import os
import sys


def execute_command(cmd):
    status = os.system(cmd)
    ec = os.WEXITSTATUS(status)
    if ec != 0:
        raise SystemExit('Command "{}" failed'.format(cmd))


if sys.platform.startswith("linux"):
    execute_command("git rev-parse HEAD > git-rev")
    execute_command(
        "(cd ../.. && git archive -o ci/teamcity/modin.tar $(cat ci/teamcity/git-rev))"
    )
    base_image = "ray-project/deploy"
    requirements = "requirements-dev.txt"
    execute_command(
        "docker build -f Dockerfile.modin-base --build-arg BASE_IMAGE={} -t modin-project/modin-base .".format(
            base_image
        )
    )
else:
    raise SystemExit(
        "TeamCity CI in Docker containers is supported only on Linux at the moment."
    )

execute_command(
    "docker build -f Dockerfile.teamcity-ci --build-arg REQUIREMENTS={} -t modin-project/teamcity-ci .".format(
        requirements
    )
)

if sys.platform.startswith("linux"):
    execute_command("rm ./modin.tar ./git-rev")


================================================
FILE: ci/teamcity/comment_on_pr.py
================================================
"""
Post the comment like the following to the PR:
```
:robot: TeamCity test results bot :robot:

<Logs from pytest>
```
"""

import os
import sys

from github import Github

# Check if this is a pull request or not based on the environment variable
try:
    pr_id = int(os.environ["GITHUB_PR_NUMBER"].split("/")[-1])
except Exception:
    sys.exit(0)

engine = os.environ["MODIN_ENGINE"]

header = """<h1 align="center"><img width=7% alt="" src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/86/Teamcity_Logo.png/600px-Teamcity_Logo.png">
    TeamCity {} test results bot</h1>\n\n""".format(
    engine.title()
)
if engine == "ray":
    pytest_outputs = ["ray_tests.log"]
elif engine == "dask":
    pytest_outputs = ["dask_tests.log"]
elif engine == "python":
    pytest_outputs = ["python_tests.log"]
else:
    raise Exception("Unknown Engine, set `MODIN_ENGINE` environment variable")

full_comment = ""
# Do not include coverage info in PR comment
split_by_first = (
    "----------- coverage: platform linux, python 3.7.5-final-0 -----------"
)
split_by_second = "--------------------------------------------------------------------------------------"

tests_failed = False
for out in pytest_outputs:
    content = open(out, "r").read()
    full_comment += "".join(
        "".join(
            [
                i.split(split_by_first)[0],
                i.split(split_by_first)[-1].split(split_by_second)[-1],
            ]
        )
        for i in content.split("+ python3 -m pytest ")
    )
    tests_failed = tests_failed or ("FAILURES" in full_comment)
    if len(full_comment) > 65_000:
        full_comment = (
            full_comment[-65_000:] + "\n\n<b>Remaining output truncated<b>\n\n"
        )
    full_comment = "<details><summary>Tests Logs</summary>\n\n\n```\n" + full_comment
    full_comment += "\n```\n\n</details>\n"

if not tests_failed:
    header += '<h3 align="center">Tests PASSed</h3>\n\n'
else:
    header += '<h3 align="center">Tests FAILed</h3>\n\n'

full_comment = header + full_comment

token = os.environ["GITHUB_TOKEN"]
g = Github(token)
repo = g.get_repo("modin-project/modin")

pr = repo.get_pull(pr_id)
if any(
    i.user.login == "modin-bot"
    and "TeamCity {} test results bot".format(engine).lower() in i.body.lower()
    for i in pr.get_issue_comments()
):
    pr_comment_list = [
        i
        for i in list(pr.get_issue_comments())
        if i.user.login == "modin-bot"
        and "TeamCity {} test results bot".format(engine).lower() in i.body.lower()
    ]
    assert len(pr_comment_list) == 1, "Too many comments from modin-bot already"
    pr_comment_list[0].edit(full_comment)
else:
    pr.create_issue_comment(full_comment)


================================================
FILE: codecov.yml
================================================
comment: false
coverage:
  status:
    project:
      default:
        branches:
          - main
        target: 85%
    patch:
      default:
        target: 30%


================================================
FILE: contributing/contributing.md
================================================
# Modin dev onboarding

1. [Set up git](https://docs.github.com/en/get-started/quickstart/set-up-git)
1. [install anaconda](https://www.anaconda.com/products/individual#macos). Once installed,
you should reopen your terminal to find "(base)" next to your prompt: ![](conda_prompt.png)
1. [Generate an SSH key](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent) for GitHub
1. Fork the [modin repo](https://github.com/modin-project/modin) on GitHub
1. Clone the forked repo in a local directory of your choice: 
    ```
    git clone ${PATH_TO_REPO}
    ```
    where the path can be found here: ![](clone_my_modin.png)
4. Inside the cloned "modin" directory, add a remote branch called "upstream":
   ```
   git remote add upstream git@github.com:modin-project/modin.git
   ```
   where the upstream link comes from here: ![](clone_upstream_modin.png)
1. Fetch the upstream branch:
    ```
    git fetch upstream
    ```
1. Set the default remote branch for your local main branch. 
    ```
     git branch --set-upstream-to=upstream/main main
    ```
1. Install modin from local source code, and install all its dependencies:
    ```
     pip install -e ".[all]"
    ```
1. Install ipython:
    ```
    pip install ipython
    ```
1. If you ever want to install modin at a release version (not the editable version from your machine): 
    ```
    pip install modin
    ```
1. If you want a specific version:
    ```
    pip install modin==0.11
    ```
1. To upgrade modin to the newest available version:
    ```
    pip install -U modin
    ```
1. Now go back to local modin.
    ```
    pip install -e .
    ```
1. Try out modin in ipython:
    ```
    ipython
    import modin
    modin.__version__
    ```
    You should see the Modin version, which consists of the version, the last commit number, and the last commit hash.

1. List Modin versions:
    ```
    git tag
    ```

1. Get a summary of a particular release:
    ```
    git tag -l --format='%(contents)' 0.11.0
    ```

1. Check out the developer requirements in `requirements-dev.txt`. Install them with:
    ```
    pip install -r requirements-dev.txt
    ```

1. Try a unit test:
    ```
    pytest modin/tests/pandas/test_concat.py
    ```

1. [Add a GPG key](https://docs.github.com/en/authentication/managing-commit-signature-verification/adding-a-new-gpg-key-to-your-github-account ) to your Modin account. Your commits need to be signed with a GPG key. For mac, you can use [Mac GPG](https://gpgtools.org/).


1. (Optional) We recommend a few workflow settings:

    1. If you use Visual Studio Code, auto-format with [black](https://black.readthedocs.io/en/stable/) every time you save changes:
        1. Install [Microsoft's Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python)
        1. Open your VSCode settings, in `Code -> Preferences -> Settings`.
        1. Search for "python formatting provider" and select "black" from the dropdown menu.
        1. Again in settings, search for "format on save" and enable the "Editor: Format on Save" option.
    2. Add a pre-commit hook:
        1. In your modin repository, copy [this pre-commit file](pre-commit) to `.git/hooks/pre-commit`
        1. Every time you try to commit, git will try to run flake8 and mypy, and abort the commit if either one fails. This lets you make sure your commits pass these tests before you push to GitHub.
        1. To bypass the pre-commit hook (e.g. if you don't want to create a pull request, or you already know your code will pass the tests), commit with the flag `--no-verify`.

================================================
FILE: contributing/pre-commit
================================================
#!/bin/sh
#
# Called by "git commit" with no arguments.  The hook should
# exit with non-zero status after issuing an appropriate message if
# it wants to stop the commit.
#

set -e

printf "running black. This script will preempt the commit if black fails.\n"
black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py
printf 'black passed!\n'

printf "running isort. This script will preempt the commit if isort fails.\n"
isort . --check-only
printf 'isort passed!\n'

printf "running flake8. This script will preempt the commit if flake8 fails.\n"
flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py
printf "flake8 passed!\n"

printf "running mypy. This script will preempt the commit if mypy fails.\n"
mypy --config-file mypy.ini
printf "mypy passed!\n"
printf "pre-commit hook finished!\n"


================================================
FILE: docker/Dockerfile
================================================
FROM continuumio/miniconda3

RUN conda install -c conda-forge psutil setproctitle
RUN pip install modin

================================================
FILE: docs/_static/custom.js
================================================
document.addEventListener("DOMContentLoaded", function () {
  var script = document.createElement("script");
  script.type = "module";
  script.id = "runllm-widget-script"

  script.src = "https://widget.runllm.com";

  script.setAttribute("runllm-keyboard-shortcut", "Mod+j"); // cmd-j or ctrl-j to open the widget.
  script.setAttribute("runllm-name", "Modin");
  script.setAttribute("runllm-position", "BOTTOM_RIGHT");
  script.setAttribute("runllm-assistant-id", "164");

  script.async = true;
  document.head.appendChild(script);
});


================================================
FILE: docs/_templates/layout.html
================================================
{% extends "!layout.html" %}
  {% block footer %} {{ super() }}

  <style>
         .wy-nav-content { max-width: 65em; }
  </style>

{% endblock %}

================================================
FILE: docs/conf.py
================================================
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/stable/config

import os

# -- Project information -----------------------------------------------------
import sys
import types

import ray


# stub ray.remote to be a no-op so it doesn't shadow docstrings
def noop_decorator(*args, **kwargs):
    if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
        # This is the case where the decorator is just @ray.remote without parameters.
        return args[0]
    return lambda cls_or_func: cls_or_func


ray.remote = noop_decorator

# fake modules if they're missing
for mod_name in (
    "xgboost",
    "unidist",
    "unidist.config",
):
    try:
        __import__(mod_name)
    except ImportError:
        sys.modules[mod_name] = types.ModuleType(
            mod_name, f"fake {mod_name} for building docs"
        )
if not hasattr(sys.modules["xgboost"], "Booster"):
    sys.modules["xgboost"].Booster = type("Booster", (object,), {})
if not hasattr(sys.modules["unidist"], "remote"):
    sys.modules["unidist"].remote = noop_decorator
if not hasattr(sys.modules["unidist"], "core"):
    sys.modules["unidist"].core = type("core", (object,), {})
if not hasattr(sys.modules["unidist"].core, "base"):
    sys.modules["unidist"].core.base = type("base", (object,), {})
if not hasattr(sys.modules["unidist"].core.base, "object_ref"):
    sys.modules["unidist"].core.base.object_ref = type("object_ref", (object,), {})
if not hasattr(sys.modules["unidist"].core.base.object_ref, "ObjectRef"):
    sys.modules["unidist"].core.base.object_ref.ObjectRef = type("ObjectRef", (object,), {})

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import modin
from modin.config.__main__ import export_config_help

configs_file_path = os.path.abspath(
    os.path.join(os.path.dirname(__file__), "flow/modin/configs_help.csv")
)
# Export configs help to create configs table in the docs/flow/modin/config.rst
export_config_help(configs_file_path)

project = "Modin"
copyright = "2018-2024, Modin Developers."
author = "Modin contributors"

# The short X.Y version
version = "{}".format(modin.__version__)
# The full version, including alpha/beta/rc tags
release = version


# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.napoleon",
    "sphinx.ext.intersphinx",
    "sphinx.ext.todo",
    "sphinx.ext.mathjax",
    "sphinx.ext.githubpages",
    "sphinx.ext.graphviz",
    "sphinxcontrib.plantuml",
    "sphinx_issues",
]


# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = ".rst"

# The master toctree document.
master_doc = "index"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en"

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
html_static_path = ["_static"]
html_js_files = ["custom.js"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"


# -- Options for HTML output -------------------------------------------------

# Maps git branches to Sphinx themes
default_html_theme = "pydata_sphinx_theme"
current_branch = "nature"

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = "pydata_sphinx_theme"

html_favicon = "img/MODIN_ver2.ico"

html_logo = "img/MODIN_ver2.png"

html_context = {"default_mode": "light"}

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
    "navbar_end": ["navbar-icon-links"],
    "sidebarwidth": 270,
    "collapse_navigation": False,
    "navigation_depth": 4,
    "show_toc_level": 2,
    "github_url": "https://github.com/modin-project/modin",
    "icon_links": [
        {
            "name": "PyPI",
            "url": "https://pypi.org/project/modin",
            "icon": "fab fa-python",
        },
        {
            "name": "conda-forge",
            "url": "https://anaconda.org/conda-forge/modin",
            "icon": "fas fa-circle-notch",
        },
        {
            "name": "Join the Slack",
            "url": "https://modin.org/slack.html",
            "icon": "fab fa-slack",
        },
        {
            "name": "Mailing List",
            "url": "https://groups.google.com/forum/#!forum/modin-dev",
            "icon": "fas fa-envelope-square",
        },
    ],
    "navigation_with_keys": True,
}

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# The default pydata_sphinx_theme sidebar templates are
# sidebar-nav-bs.html and search-field.html.
html_sidebars = {}

issues_github_path = "modin-project/modin"


================================================
FILE: docs/contact.rst
================================================
Contact
=======

Slack
-----

Join our `Slack`_ community to connect with Modin users and contributors,
discuss, and ask questions about all things Modin-related.

Mailing List
------------

General questions, potential contributors, and ideas can be directed to the
`developer mailing list`_. It is an open Google Group, so feel free to join anytime! If
you are unsure about where to ask or post something, the mailing list is a good place to
ask as well.

Issues
------

Bug reports and feature requests can be directed to the issues_ page of the Modin
GitHub repo.

.. _Slack: https://modin.org/slack.html
.. _developer mailing list: https://groups.google.com/forum/#!forum/modin-dev
.. _issues: https://github.com/modin-project/modin/issues


================================================
FILE: docs/development/architecture.rst
================================================
System Architecture
===================

In this section, we will lay out the overall system architecture for
Modin, as well as go into detail about the component design, implementation and
other important details. This document also contains important reference
information for those interested in contributing new functionality, bugfixes
and enhancements.

High-Level Architectural View
-----------------------------
The diagram below outlines the general layered view to the components of Modin
with a short description of each major section of the documentation following.


.. image:: /img/modin_architecture.png
   :align: center

Modin is logically separated into different layers that represent the hierarchy of a
typical Database Management System. Abstracting out each component allows us to
individually optimize and swap out components without affecting the rest of the system.
We can implement, for example, new compute kernels that are optimized for a certain type
of data and can simply plug it in to the existing infrastructure by implementing a small
interface. It can still be distributed by our choice of compute engine with the
logic internally.

System View
-----------
A top-down view of Modin’s architecture is detailed below:

.. image:: /img/10000_meter.png
   :align: center

The user - Data Scientist interacts with the Modin system by sending interactive or
batch commands through API and Modin executes them using various execution
engines: Ray, Dask and MPI are currently supported.

Subsystem/Container View
------------------------
If we click down to the next level of details we will see that inside Modin the layered
architecture is implemented using several interacting components:

.. image:: /img/component_view.png
   :align: center

For the simplicity the other execution systems - Dask and MPI are omitted and only Ray execution is shown.

* Dataframe subsystem is the backbone of the dataframe holding and query compilation. It is responsible for
  dispatching the ingress/egress to the appropriate module, getting the pandas API and calling the query
  compiler to convert calls to the internal intermediate Dataframe Algebra.
* Data Ingress/Egress Module is working in conjunction with Dataframe and Partitions subsystem to read data
  split into partitions and send data into the appropriate node for storing.
* Query Planner is subsystem that translates the pandas API to intermediate Dataframe Algebra representation
  DAG and performs an initial set of optimizations.
* Query Executor is responsible for getting the Dataframe Algebra DAG, performing further optimizations based
  on a selected storage format and mapping or compiling the Dataframe Algebra DAG to and actual
  execution sequence.
* Storage formats module is responsible for mapping the abstract operation to an actual executor call, e.g. pandas,
  custom format.
* Orchestration subsystem is responsible for spawning and controlling the actual execution environment for the
  selected execution. It spawns the actual nodes, fires up the execution environment, e.g. Ray, monitors the state
  of executors and provides telemetry

Component View
--------------

User queries which perform data transformation, data ingress or data egress pass through the Modin components
detailed below. The path the query takes is mostly similar across execution systems.

Data Transformation
'''''''''''''''''''

.. image:: /img/generic_data_transform.svg
   :align: center

Query Compiler
""""""""""""""

The :ref:`Query Compiler <query_compiler_def>` receives queries from the pandas API layer. The API layer is
responsible for ensuring a clean input to the Query Compiler. The Query Compiler must
have knowledge of the compute kernels and in-memory format of the data in order to
efficiently compile the query.

The Query Compiler is responsible for sending the compiled query to the Core Modin Dataframe.
In this design, the Query Compiler does not have information about where or when the
query will be executed, and gives the control of the partition layout to the Modin
Dataframe.

In the interest of reducing the pandas API, the Query Compiler layer closely follows the
pandas API, but cuts out a large majority of the repetition.

.. _auto-switch architecture:

Automatic Engine Switching and Casting
""""""""""""""""""""""""""""""""""""""

QueryCompilers which are derived from QueryCompilerCaster can participate in automatic casting when
different query compilers, representing different underlying engines, are used together in a
function. A relative "cost" of casting is used to determine which query compiler everything should
be moved to. Each query compiler must implement the functions, `move_to_cost`, `move_to_me_cost`, 
`max_cost` and `stay_cost` to provide information and query costs associated with different decision
points in cost opimization. With the exception of `max_cost` these methods need to return a 
QCCoercionCost in the range of 0-1000.

These functions have precise meanings:

* `move_to_cost` is the transmission cost of moving the data, including known serialization costs
  from the perspective of that particular compiler. Colloquially, the question being asked of the
  query compiler is, "What is the normalized cost of moving my data to the other engine?"
* `move_to_me_cost` is the execution cost for the data and operation on the proposed *destination*
  query compiler. Since this method is called before the data has been migrated this is a class
  method and the destination query_compiler may have very limited information on the possible cost
  after migration. Factors that may be considered here include available memory, cpu, and the
  unique characteristics of the engine. The question being asked is, "If this data were moved to
  me, what would be the normalized execution cost to perform that operation?"
* `stay_cost` is the execution cost on the current query compilier ( where the data is ). The question
  asked of the query compiler is, "If I were to keep this data on my engine, what would be the normalized
  execution cost?"
* `max_cost` is the maximum cost allowed by this query compiler across all data movements. This method
  sets a normalized upper bound for situations where multiple data frames from different engines all
  need to move to the same engine. The value returned by this method can exceed 
  QCCoercionCost.COST_IMPOSSIBLE

There are generally two places where automatic casting is considered: When two or more DataFrames on
different engines are participating in an operation ( such as pd.concat ) or at registered functions
for particular engines through the `register_function_for_pre_op_switch` and 
`register_function_for_post_op_switch` methods.

Core Modin Dataframe
""""""""""""""""""""

At this layer, operations can be performed lazily. Currently, Modin executes most
operations eagerly in an attempt to behave as pandas does. Some operations, e.g.
``transpose`` are expensive and create full copies of the data in-memory. In these
cases, we can wait until another operation triggers computation. In the future, we plan
to add additional query planning and laziness to Modin to ensure that queries are
performed efficiently.

The structure of the Core Modin Dataframe is extensible, such that any operation that could
be better optimized for a given execution can be overridden and optimized in that way.

This layer has a significantly reduced API from the QueryCompiler and the user-facing
API. Each of these APIs represents a single way of performing a given operation or
behavior.

Core Modin Dataframe API
""""""""""""""""""""""""

More documentation can be found internally in the code_. This API is not complete, but
represents an overwhelming majority of operations and behaviors.

This API can be implemented by other distributed/parallel DataFrame libraries and
plugged in to Modin as well. Create an issue_ or discuss
on our `Slack <https://modin.org/slack.html>`_ for more information!

The :doc:`Core Modin Dataframe </flow/modin/core/dataframe/base/index>` is responsible for the data layout and shuffling, partitioning,
and serializing the tasks that get sent to each partition. Other implementations of the
Modin Dataframe interface will have to handle these as well.

Partition Manager
"""""""""""""""""

The Partition Manager can change the size and shape of the partitions based on the type
of operation. For example, certain operations are complex and require access to an
entire column or row. The Partition Manager can convert the block partitions to row
partitions or column partitions. This gives Modin the flexibility to perform operations
that are difficult in row-only or column-only partitioning schemas.

Another important component of the Partition Manager is the serialization and shipment
of compiled queries to the Partitions. It maintains metadata for the length and width of
each partition, so when operations only need to operate on or extract a subset of the
data, it can ship those queries directly to the correct partition. This is particularly
important for some operations in pandas which can accept different arguments and
operations for different columns, e.g. ``fillna`` with a dictionary.

This abstraction separates the actual data movement and function application from the
Dataframe layer to keep the Core Dataframe API small and separately optimize the data
movement and metadata management.

Partitions
""""""""""

Partitions are responsible for managing a subset of the Dataframe. As mentioned
below, the Dataframe is partitioned both row and column-wise. This gives Modin
scalability in both directions and flexibility in data layout. There are a number of
optimizations in Modin that are implemented in the partitions. Partitions are specific
to the execution framework and in-memory format of the data, allowing Modin to
exploit potential optimizations across both. These optimizations are explained
further on the pages specific to the execution framework.

Execution Engine
''''''''''''''''

This layer performs computation on partitions of the data. The
Modin Dataframe is designed to work with `task parallel`_ frameworks, but integration with
data parallel frameworks should be possible with some effort.

Storage Format
''''''''''''''

The :doc:`storage format </flow/modin/core/storage_formats/index>` describes the in-memory partition type.
The base storage format in Modin is pandas. In the default case, the Modin Dataframe operates on partitions that contain ``pandas.DataFrame`` objects.

Data Ingress
''''''''''''

.. note::
   Data ingress operations (e.g. ``read_csv``) in Modin load data from the source into
   partitions and vice versa for data egress (e.g. ``to_csv``) operation.
   Improved performance is achieved by reading/writing in partitions in parallel.

Data ingress starts with a function in the pandas API layer (e.g. ``read_csv``). Then the user's
query is passed to the :doc:`Factory Dispatcher </flow/modin/core/execution/dispatching>`,
which defines a factory specific for the execution. The factory for execution contains an IO class
(e.g. ``PandasOnRayIO``) whose responsibility is to perform a parallel read/write from/to a file.
This IO class contains class methods with interfaces and names that are similar to pandas IO functions
(e.g. ``PandasOnRayIO.read_csv``). The IO class declares the Modin Dataframe and Query Compiler
classes specific for the execution engine and storage format to ensure the correct object is constructed.
It also declares IO methods that are mix-ins containing a combination of the engine-specific class for
deploying remote tasks, the class for parsing the given file format and the class handling the chunking
of the format-specific file on the head node (see dispatcher classes implementation
:doc:`details </flow/modin/core/io/index>`). The output from the IO class data ingress function is
a :doc:`Modin Dataframe </flow/modin/core/dataframe/pandas/dataframe>`.

.. image:: /img/generic_data_ingress.svg
   :align: center

Data Egress
'''''''''''

Data egress operations (e.g. ``to_csv``) are similar to data ingress operations up to
execution-specific IO class functions construction. Data egress functions of the IO class
are defined slightly different from data ingress functions and created only
specifically for the engine since partitions already have information about its storage
format. Using the IO class, data is exported from partitions to the target file.

.. image:: /img/generic_data_egress.svg
   :align: center

Supported Execution Engines and Storage Formats
'''''''''''''''''''''''''''''''''''''''''''''''

This is a list of execution engines and in-memory formats supported in Modin. If you
would like to contribute a new execution engine or in-memory format, please see the
documentation page on :doc:`contributing </development/contributing>`.

- :doc:`pandas on Ray </development/using_pandas_on_ray>`
    - Uses the Ray_ execution framework.
    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
    - For more information on the execution path, see the :doc:`pandas on Ray </flow/modin/core/execution/ray/implementations/pandas_on_ray/index>` page.
- :doc:`pandas on Dask </development/using_pandas_on_dask>`
    - Uses the `Dask Futures`_ execution framework.
    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
    - For more information on the execution path, see the :doc:`pandas on Dask </flow/modin/core/execution/dask/implementations/pandas_on_dask/index>` page.
- :doc:`pandas on MPI </development/using_pandas_on_mpi>`
    - Uses MPI_ through the Unidist_ execution framework.
    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
    - For more information on the execution path, see the :doc:`pandas on Unidist </flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index>` page.
- :doc:`pandas on Python </development/using_pandas_on_python>`
    - Uses native python execution - mainly used for debugging.
    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
    - For more information on the execution path, see the :doc:`pandas on Python </flow/modin/core/execution/python/implementations/pandas_on_python/index>` page.
- pandas on Snowflake
    - Uses the Snowpark Python library to transpile pandas API calls to SQL queries.
    - The storage format is the custom-defined `Snowflake` format; data remains within Snowflake warehouses until retrieved by pandas API calls.
    - For more information on pandas on Snowflake, refer to Snowflake's `documentation <https://docs.snowflake.com/en/developer-guide/snowpark/python/pandas-on-snowflake>`_ (external link).

.. _directory-tree:

DataFrame Partitioning
----------------------

The Modin DataFrame architecture follows in the footsteps of modern architectures for
database and high performance matrix systems. We chose a partitioning schema that
partitions along both columns and rows because it gives Modin flexibility and
scalability in both the number of columns and the number of rows. The
following figure illustrates this concept.

.. image:: /img/block_partitions_diagram.png
   :align: center

Currently, the main in-memory format of each partition is a
`pandas DataFrame`_ (:doc:`pandas storage format </flow/modin/core/storage_formats/pandas/index>`).

Index
-----

We currently use the ``pandas.Index`` object for indexing both columns and rows. In the
future, we will implement a distributed, pandas-compatible Index object in order to remove
this scaling limitation from the system. Most workloads will not be affected by this scalability limit
since it only appears when operating on more than 10's of billions of columns or rows.
**Important note**: If you are using the
default index (``pandas.RangeIndex``) there is a fixed memory overhead (~200 bytes) and
there will be no scalability issues with the index.

API
---

The API is the outer-most layer that faces users. The following classes contain Modin's implementation of the pandas API:

.. toctree::
   /flow/modin/pandas/base
   /flow/modin/pandas/dataframe
   /flow/modin/pandas/series

Module/Class View
-----------------

Modin's modules layout is shown below. Click on the links to deep dive into Modin's internal implementation
details. The documentation covers most modules, with more docs being added everyday!

.. parsed-literal::
   ├───.github
   ├───asv_bench
   ├───ci
   ├───docker
   ├───docs
   ├───examples
   ├───modin
   │   ├─── :doc:`config </flow/modin/config>`
   |   ├─── :doc:`utils </flow/modin/utils>`
   │   ├───core
   │   │   ├─── :doc:`dataframe </flow/modin/core/dataframe/index>`
   │   │   │   ├─── :doc:`algebra </flow/modin/core/dataframe/algebra>`
   │   │   │   ├─── :doc:`base </flow/modin/core/dataframe/base/index>`
   │   │   │   └─── :doc:`pandas </flow/modin/core/dataframe/pandas/index>`
   │   │   ├───execution
   │   │   │   ├───dask
   │   │   │   │   ├───common
   │   │   │   │   └───implementations
   │   │   │   │       └─── :doc:`pandas_on_dask </flow/modin/core/execution/dask/implementations/pandas_on_dask/index>`
   │   │   │   ├─── :doc:`dispatching </flow/modin/core/execution/dispatching>`
   │   │   │   ├───python
   │   │   │   │   └───implementations
   │   │   │   │       └─── :doc:`pandas_on_python </flow/modin/core/execution/python/implementations/pandas_on_python/index>`
   │   │   │   ├───ray
   │   │   │   │   ├───common
   │   │   │   │   ├─── :doc:`generic </flow/modin/core/execution/ray/generic>`
   │   │   │   │   └───implementations
   │   │   │   │       └─── :doc:`pandas_on_ray </flow/modin/core/execution/ray/implementations/pandas_on_ray/index>`
   │   │   │   └───unidist
   │   │   │       ├───common
   │   │   │       ├─── :doc:`generic </flow/modin/core/execution/unidist/generic>`
   │   │   │       └───implementations
   │   │   │           └─── :doc:`pandas_on_unidist </flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index>`
   │   │   ├─── :doc:`io </flow/modin/core/io/index>`
   │   │   └─── :doc:`storage_formats </flow/modin/core/storage_formats/index>`
   │   │       ├─── :doc:`base </flow/modin/core/storage_formats/base/query_compiler>`
   │   │       └─── :doc:`pandas </flow/modin/core/storage_formats/pandas/index>`
   │   ├───distributed
   │   │   ├───dataframe
   │   │   │   └─── :doc:`pandas </flow/modin/distributed/dataframe/pandas>`
   │   ├─── :doc:`experimental </flow/modin/experimental/index>`
   │   │   ├───core
   |   |   |   └─── :doc:`io </flow/modin/experimental/core/io/index>`
   │   │   ├─── :doc:`pandas </flow/modin/experimental/pandas>`
   │   │   ├─── :doc:`sklearn </flow/modin/experimental/sklearn>`
   │   │   ├───spreadsheet
   │   │   ├─── :doc:`xgboost </flow/modin/experimental/xgboost>`
   │   │   └─── :doc:`batch </flow/modin/experimental/batch>`
   │   └───pandas
   │       ├─── :doc:`dataframe </flow/modin/pandas/dataframe>`
   │       └─── :doc:`series </flow/modin/pandas/series>`
   ├───requirements
   ├───scripts
   └───stress_tests

.. _pandas Dataframe: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
.. _Ray: https://github.com/ray-project/ray
.. _Unidist: https://github.com/modin-project/unidist
.. _MPI: https://www.mpi-forum.org/
.. _code: https://github.com/modin-project/modin/blob/main/modin/core/dataframe
.. _Dask: https://github.com/dask/dask
.. _Dask Futures: https://docs.dask.org/en/latest/futures.html
.. _issue: https://github.com/modin-project/modin/issues
.. _task parallel: https://en.wikipedia.org/wiki/Task_parallelism
.. _experimental features: /usage_guide/advanced_usage/index.html


================================================
FILE: docs/development/contributing.rst
================================================
Contributing
============

Getting Started
---------------

If you're interested in getting involved in the development of Modin, but aren't sure
where start, take a look at the issues tagged `Good first issue`_ or Documentation_.
These are issues that would be good for getting familiar with the codebase and better
understanding some of the more complex components of the architecture. There is
documentation here about the :doc:`architecture </development/architecture>` that you will
want to review in order to get started.

Also, feel free to join the discussions on the `developer mailing list`_.

If you want a quick guide to getting your development environment setup, please
use `the contributing instructions on GitHub`_.

Certificate of Origin
---------------------

To keep a clear track of who did what, we use a `sign-off` procedure (same requirements
for using the signed-off-by process as the Linux kernel has
https://www.kernel.org/doc/html/v4.17/process/submitting-patches.html) on patches or pull
requests that are being sent. The sign-off is a simple line at the end of the explanation
for the patch, which certifies that you wrote it or otherwise have the right to pass it
on as an open-source patch. The rules are pretty simple: if you can certify the below:

CERTIFICATE OF ORIGIN V 1.1
^^^^^^^^^^^^^^^^^^^^^^^^^^^
"By making a contribution to this project, I certify that:

1.) The contribution was created in whole or in part by me and I have the right to
submit it under the open source license indicated in the file; or
2.) The contribution is based upon previous work that, to the best of my knowledge, is
covered under an appropriate open source license and I have the right under that license
to submit that work with modifications, whether created in whole or in part by me, under
the same open source license (unless I am permitted to submit under a different
license), as indicated in the file; or
3.) The contribution was provided directly to me by some other person who certified (a),
(b) or (c) and I have not modified it.
4.) I understand and agree that this project and the contribution are public and that a
record of the contribution (including all personal information I submit with it,
including my sign-off) is maintained indefinitely and may be redistributed consistent
with this project or the open source license(s) involved."


.. code-block:: bash

   This is my commit message

   Signed-off-by: Awesome Developer <developer@example.org>


Code without a proper signoff cannot be merged into the
main branch. Note: You must use your real name (sorry, no pseudonyms or anonymous
contributions.)

The text can either be manually added to your commit body, or you can add either ``-s``
or ``--signoff`` to your usual ``git commit`` commands:


.. code-block:: bash

   git commit --signoff -m "This is my commit message"
   git commit -s -m "This is my commit message"

This will use your default git configuration which is found in .git/config. To change
this, you can use the following commands:

.. code-block:: bash

   git config --global user.name "Awesome Developer"
   git config --global user.email "awesome.developer.@example.org"

If you have authored a commit that is missing the signed-off-by line, you can amend your
commits and push them to GitHub.

.. code-block:: bash

   git commit --amend --signoff

If you've pushed your changes to GitHub already you'll need to force push your branch
after this with ``git push -f``.

Commit Message formatting
-------------------------
We request that your first commit follow a particular format, and we
**require** that your PR title follow the format. The format is:

.. code-block:: bash

    FEAT-#9999: Add `DataFrame.rolling` functionality, to enable rolling window operations

The ``FEAT`` component represents the type of commit. This component of the commit
message can be one of the following:

* FEAT: A new feature that is added
* DOCS: Documentation improvements or updates
* FIX: A bugfix contribution
* REFACTOR: Moving or removing code without change in functionality
* TEST: Test updates or improvements
* PERF: Performance enhancements

The ``#9999`` component of the commit message should be the issue number in the Modin
GitHub issue tracker: https://github.com/modin-project/modin/issues. This is important
because it links commits to their issues.

The commit message should follow a colon (:) and be descriptive and succinct.

A Modin CI job on GitHub will enforce that your pull request title follows the
format we suggest. Note that if you update the PR title, you have to push
another commit (even if it's empty) or amend your last commit for the job to
pick up the new PR title. Re-running the job in Github Actions won't work.

General Rules for committers
----------------------------

- Try to write a PR name as descriptive as possible.
- Try to keep PRs as small as possible. One PR should be making one semantically atomic change.
- Don't merge your own PRs even if you are technically able to do it.

Development Dependencies
------------------------

We recommend doing development in a virtualenv or conda environment, though this decision
is ultimately yours. You will want to run the following in order to install all of the required
dependencies for running the tests and formatting the code:

.. code-block:: bash

  conda env create --file environment-dev.yml
  # or
  pip install -r requirements-dev.txt

Code Formatting and Lint
------------------------

We use black_ for code formatting. Before you submit a pull request, please make sure
that you run the following from the project root:

.. code-block:: bash

  black modin/ asv_bench/benchmarks scripts/doc_checker.py

We also use flake8_ to check linting errors. Running the following from the project root
will ensure that it passes the lint checks on Github Actions:

.. code-block:: bash

  flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py

We test that this has been run on our `Github Actions`_ test suite. If you do this and find
that the tests are still failing, try updating your version of black and flake8.

Adding a test
-------------

If you find yourself fixing a bug or adding a new feature, don't forget to add a test to
the test suite to verify its correctness! More on testing and the layout of the tests
can be found in our testing documentation. We ask that you follow the existing
structure of the tests for ease of maintenance.

Running the tests
-----------------

To run the entire test suite, run the following from the project root:

.. code-block:: bash

  pytest modin/pandas/test

The test suite is very large, and may take a long time if you run every test. If you've
only modified a small amount of code, it may be sufficient to run a single test or some
subset of the test suite. In order to run a specific test run:

.. code-block:: bash

  pytest modin/pandas/test::test_new_functionality

The entire test suite is automatically run for each pull request.

Performance measurement
-----------------------

We use Asv_ tool for performance tracking of various Modin functionality. The results
can be viewed here: `Asv dashboard`_.

More information can be found in the `Asv readme`_.


Building documentation
----------------------

To build the documentation, please follow the steps below from the project root:

.. code-block:: bash

    pip install -r docs/requirements-doc.txt
    sphinx-build -b html docs docs/build

To visualize the documentation locally, run the following from `build` folder:

.. code-block:: bash

    python -m http.server <port>
    # python -m http.server 1234

then open the browser at `0.0.0.0:<port>` (e.g. `0.0.0.0:1234`).

Contributing a new execution framework or in-memory format
----------------------------------------------------------

If you are interested in contributing support for a new execution framework or in-memory
format, please make sure you understand the :doc:`architecture </development/architecture>` of Modin.

The best place to start the discussion for adding a new execution framework or in-memory
format is the `developer mailing list`_.

More docs on this coming soon...

.. _Good first issue: https://github.com/modin-project/modin/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue+%3Abeginner%3A%22
.. _Documentation: https://github.com/modin-project/modin/issues?q=is%3Aissue+is%3Aopen+label%3A%22documentation+%3Abookmark_tabs%3A%22
.. _black: https://github.com/ambv/black
.. _flake8: http://flake8.pycqa.org/en/latest/
.. _Github Actions: https://github.com/features/actions
.. _Asv: https://github.com/airspeed-velocity/asv#airspeed-velocity
.. _developer mailing list: https://groups.google.com/forum/#!forum/modin-dev
.. _Asv dashboard: https://modin.org/modin-bench/#/
.. _Asv readme: https://github.com/modin-project/modin/blob/main/asv_bench/README.md
.. _the contributing instructions on GitHub: https://github.com/modin-project/modin/blob/main/contributing/contributing.md

================================================
FILE: docs/development/index.rst
================================================
Development
===========

.. toctree::
    :maxdepth: 4

    contributing
    architecture
    partition_api
    using_pandas_on_ray
    using_pandas_on_dask
    using_pandas_on_python
    using_pandas_on_mpi

.. meta::
    :description lang=en:
        Development-specific documentation.


================================================
FILE: docs/development/partition_api.rst
================================================
Partition API in Modin
======================

When you are working with a :py:class:`~modin.pandas.dataframe.DataFrame`, you can unwrap its remote partitions
to get the raw futures objects compatible with the execution engine (e.g. ``ray.ObjectRef`` for Ray).
In addition to unwrapping of the remote partitions we also provide an API to construct a ``modin.pandas.DataFrame``
from raw futures objects.

Partition IPs
-------------
For finer grained placement control, Modin also provides an API to get the IP addresses of the nodes that hold each partition.
You can pass the partitions having needed IPs to your function. It can help with minimizing of data movement between nodes.

Partition API implementations
-----------------------------
By default, a :py:class:`~modin.pandas.dataframe.DataFrame` stores underlying partitions as ``pandas.DataFrame`` objects.
You can find the specific implementation of Modin's Partition Interface in :doc:`pandas Partition API </flow/modin/distributed/dataframe/pandas>`.

.. toctree::
  :hidden:

  /flow/modin/distributed/dataframe/pandas

Ray engine
----------
However, it is worth noting that for Modin on ``Ray`` engine with ``pandas`` in-memory format IPs of the remote partitions may not match
actual locations if the partitions are lower than 100 kB. Ray saves such objects (<= 100 kB, by default) in in-process store
of the calling process (please, refer to `Ray documentation`_ for more information). We can't get IPs for such objects while maintaining good performance.
So, you should keep in mind this for unwrapping of the remote partitions with their IPs. Several options are provided to handle the case in
``How to handle Ray objects that are lower 100 kB`` section.

Dask engine
-----------
There is no mentioned above issue for Modin on ``Dask`` engine with ``pandas`` in-memory format because ``Dask`` saves any objects
in the worker process that processes a function (please, refer to `Dask documentation`_ for more information).

Unidist engine
--------------
Currently, Modin only supports MPI through unidist. There is no mentioned above issue for
Modin on ``Unidist`` engine using ``MPI`` backend with ``pandas`` in-memory format
because ``Unidist`` saves any objects in the MPI worker process that processes a function
(please, refer to `Unidist documentation`_ for more information).

How to handle Ray objects that are lower than 100 kB
----------------------------------------------------

* If you are sure that each of the remote partitions being unwrapped is higher than 100 kB, you can just import Modin or perform ``ray.init()`` manually.

* If you don't know partition sizes you can pass the option ``_system_config={"max_direct_call_object_size": <nbytes>,}``, where ``nbytes`` is threshold for objects that will be stored in in-process store, to ``ray.init()``.

* You can also start Ray as follows: ``ray start --head --system-config='{"max_direct_call_object_size":<nbytes>}'``.

Note that when specifying the threshold the performance of some Modin operations may change.

.. _`Ray documentation`: https://docs.ray.io/en/master/index.html#
.. _`Dask documentation`: https://distributed.dask.org/en/latest/index.html
.. _`Unidist documentation`: https://unidist.readthedocs.io/en/latest/index.html


================================================
FILE: docs/development/using_pandas_on_dask.rst
================================================
pandas on Dask
==============

This section describes usage related documents for the pandas on Dask component of Modin.

Modin uses pandas as a primary memory format of the underlying partitions and optimizes queries
ingested from the API layer in a specific way to this format. Thus, there is no need to care of choosing it
but you can explicitly specify it anyway as shown below.

One of the execution engines that Modin uses is Dask. To enable the pandas on Dask execution you should set the following environment variables:

.. code-block:: bash

   export MODIN_ENGINE=dask
   export MODIN_STORAGE_FORMAT=pandas

or turn them on in source code:

.. code-block:: python

   import modin.config as cfg
   cfg.Engine.put('dask')
   cfg.StorageFormat.put('pandas')

Using Modin on Dask locally
---------------------------

If you want to run Modin on Dask locally using a single node, just set Modin engine to ``Dask`` and 
continue working with a Modin DataFrame as if it was a pandas DataFrame.
You can either initialize a Dask client on your own and Modin connects to the existing Dask cluster or
allow Modin itself to initialize a Dask client.

.. code-block:: python

  import modin.pandas as pd
  import modin.config as modin_cfg

  modin_cfg.Engine.put("dask")
  df = pd.DataFrame(...)

Using Modin on Dask in a Cluster
--------------------------------

If you want to run Modin on Dask in a cluster, you should set up a Dask cluster and initialize a Dask client.
Once the Dask client is initialized, Modin will be able to connect to it and use the Dask cluster.

.. code-block:: python

  from distributed import Client
  import modin.pandas as pd
  import modin.config as modin_cfg
  
  # Define your cluster here
  cluster = ...
  client = Client(cluster)

  modin_cfg.Engine.put("dask")
  df = pd.DataFrame(...)

To get more information on how to deploy and run a Dask cluster, visit the `Deploy Dask Clusters`_ page.

Conversion between Modin DataFrame and Dask DataFrame
-----------------------------------------------------

Modin DataFrame can be converted to/from Dask DataFrame with no-copy partition conversion.
This allows you to take advantage of both Modin and Dask libraries for maximum performance.

.. code-block:: python

  import modin.pandas as pd
  import modin.config as modin_cfg
  from modin.pandas.io import to_dask, from_dask

  modin_cfg.Engine.put("dask")
  df = pd.DataFrame(...)

  # Convert Modin to Dask DataFrame
  dask_df = to_dask(df)
  
  # Convert Dask to Modin DataFrame
  modin_df = from_dask(dask_df)

.. _Deploy Dask Clusters: https://docs.dask.org/en/stable/deploying.html


================================================
FILE: docs/development/using_pandas_on_mpi.rst
================================================
pandas on MPI through unidist
=============================

This section describes usage related documents for the pandas on MPI through unidist component of Modin.

Modin uses pandas as a primary memory format of the underlying partitions and optimizes queries
ingested from the API layer in a specific way to this format. Thus, there is no need to care of choosing it
but you can explicitly specify it anyway as shown below.

One of the execution engines that Modin uses is MPI through unidist.
To enable the pandas on MPI through unidist execution you should set the following environment variables:

.. code-block:: bash

   export MODIN_ENGINE=unidist
   export MODIN_STORAGE_FORMAT=pandas
   export UNIDIST_BACKEND=mpi

or turn it on in source code:

.. code-block:: python

   import modin.config as modin_cfg
   import unidist.config as unidist_cfg

   modin_cfg.Engine.put('unidist')
   modin_cfg.StorageFormat.put('pandas')
   unidist_cfg.Backend.put('mpi')

To run a python application you should use ``mpiexec -n 1 python <script.py>`` command.

.. code-block:: bash

   mpiexec -n 1 python script.py

For more information on how to run a python application with unidist on MPI backend
please refer to `Unidist on MPI`_ section of the unidist documentation.

As of unidist 0.5.0 there is support for a shared object store for MPI backend.
The feature allows to improve performance in the workloads,
where workers use same data multiple times by reducing data copies.
You can enable the feature by setting the following environment variable:

.. code-block:: bash

   export UNIDIST_MPI_SHARED_OBJECT_STORE=True

or turn it on in source code:

.. code-block:: python

   import unidist.config as unidist_cfg

   unidist_cfg.MpiSharedObjectStore.put(True)

.. _`Unidist on MPI`: https://unidist.readthedocs.io/en/latest/using_unidist/unidist_on_mpi.html

================================================
FILE: docs/development/using_pandas_on_python.rst
================================================
pandas on Python
================

This section describes usage related documents for the pandas on Python component of Modin.

Modin uses pandas as the primary memory format of the underlying partitions and optimizes queries
from the API layer in a specific way to this format. Since it is a default, you do not need to specify
the pandas memory format, but we show how to explicitly set it below.

One of the execution engines that Modin uses is Python. This engine is sequential and used for debugging.
To enable the pandas on Python execution you should set the following environment variables:

.. code-block:: bash

   export MODIN_ENGINE=python
   export MODIN_STORAGE_FORMAT=pandas

or turn a debug mode on:

.. code-block:: bash

   export MODIN_DEBUG=True
   export MODIN_STORAGE_FORMAT=pandas

or do the same in source code:

.. code-block:: python

   import modin.config as cfg
   cfg.Engine.put('python')
   cfg.StorageFormat.put('pandas')

.. code-block:: python

   import modin.config as cfg
   cfg.IsDebug.put(True)
   cfg.StorageFormat.put('pandas')

================================================
FILE: docs/development/using_pandas_on_ray.rst
================================================
pandas on Ray
=============

This section describes usage related documents for the pandas on Ray component of Modin.

Modin uses pandas as a primary memory format of the underlying partitions and optimizes queries
ingested from the API layer in a specific way to this format. Thus, there is no need to care of choosing it
but you can explicitly specify it anyway as shown below.

One of the execution engines that Modin uses is Ray. If you have Ray installed in your system,
Modin also uses it by default to distribute computations.

If you want to be explicit, you could set the following environment variables:

.. code-block:: bash

   export MODIN_ENGINE=ray
   export MODIN_STORAGE_FORMAT=pandas

or turn it on in source code:

.. code-block:: python

   import modin.config as cfg
   cfg.Engine.put('ray')
   cfg.StorageFormat.put('pandas')


================================================
FILE: docs/ecosystem.rst
================================================
Ecosystem
=========

There is a constantly growing number of users and packages using pandas
to address their specific needs in data preparation, analysis and visualization.
pandas is being used ubiquitously and is a good choise to handle small-sized data.
However, pandas scales poorly and is non-interactive on moderate to large datasets.
Modin provides a drop-in replacement API for pandas and scales computation across nodes and
CPUs available. What you need to do to switch to Modin is just replace a single line of code.

.. code-block:: python

    # import pandas as pd
    import modin.pandas as pd

While most packages can consume a pandas DataFrame and operate it efficiently,
this is not the case with a Modin DataFrame due to its distributed nature.
Thus, some packages may lack support for handling Modin DataFrame(s) correctly and,
moreover, efficiently. Modin implements such methods as ``__array__``, ``__dataframe__``, etc.
to facilitate other libraries to consume a Modin DataFrame. If you feel that a certain library
can operate efficiently with a specific format of data, it is possible to convert a Modin DataFrame
to the format preferred.

to_pandas
---------

You can refer to `pandas ecosystem`_ page to get more details on
where pandas can be used and what libraries it powers.

.. code-block:: python

    from modin.pandas.io import to_pandas

    pandas_df = to_pandas(modin_df)

to_numpy
--------

You can refer to `NumPy ecosystem`_ section of NumPy documentation to get more details on
where NumPy can be used and what libraries it powers.

.. code-block:: python

    from modin.pandas.io import to_numpy

    numpy_arr = to_numpy(modin_df)

to_ray
------

You can refer to `Ray Data`_ page to get more details on
where Ray Dataset can be used and what libraries it powers.

.. code-block:: python

    from modin.pandas.io import to_ray

    ray_dataset = to_ray(modin_df)

to_dask
-------

You can refer to `Dask DataFrame`_ page to get more details on
where Dask DataFrame can be used and what libraries it powers.

.. code-block:: python

    from modin.pandas.io import to_dask

    dask_df = to_dask(modin_df)

.. _pandas ecosystem: https://pandas.pydata.org/community/ecosystem.html
.. _NumPy ecosystem: https://numpy.org
.. _Ray Data: https://docs.ray.io/en/latest/data/data.html
.. _Dask DataFrame: https://docs.dask.org/en/stable/dataframe.html


================================================
FILE: docs/flow/modin/config.rst
================================================
:orphan:

Modin Configuration Settings
""""""""""""""""""""""""""""

To adjust Modin's default behavior, you can set the value of Modin
configs by setting an environment variable or by using the
``modin.config`` API. To list all available configs in Modin, please
run ``python -m modin.config`` to print all
Modin configs with descriptions.

Public API
''''''''''

Potentially, the source of configs can be any, but for now only environment
variables are implemented. Any environment variable originate from
:class:`~modin.config.envvars.EnvironmentVariable`, which contains most of
the config API implementation.

.. autoclass:: modin.config.envvars.EnvironmentVariable
  :members: get, put, get_help, get_value_source, once, subscribe

Modin Configs List
''''''''''''''''''

.. csv-table::
   :file: configs_help.csv
   :header-rows: 1

Usage Guide
'''''''''''

See example of interaction with Modin configs below, as it can be seen config
value can be set either by setting the environment variable or by using config
API.

.. code-block:: python

    import os

    # Setting `MODIN_ENGINE` environment variable.
    # Also can be set outside the script.
    os.environ["MODIN_ENGINE"] = "Dask"

    import modin.config
    import modin.pandas as pd

    # Checking initially set `Engine` config,
    # which corresponds to `MODIN_ENGINE` environment
    # variable
    print(modin.config.Engine.get()) # prints 'Dask'

    # Checking default value of `NPartitions`
    print(modin.config.NPartitions.get()) # prints '8'

    # Changing value of `NPartitions`
    modin.config.NPartitions.put(16)
    print(modin.config.NPartitions.get()) # prints '16'

One can also use config variables with a context manager in order to use
some config only for a certain part of the code:

.. code-block:: python

    import modin.config as cfg

    # Default value for this config is 'False'
    print(cfg.RangePartitioning.get()) # False

    # Set the config to 'True' inside of the context-manager
    with cfg.context(RangePartitioning=True):
        print(cfg.RangePartitioning.get()) # True
        df.merge(...) # will use range-partitioning impl

    # Once the context is over, the config gets back to its previous value
    print(cfg.RangePartitioning.get()) # False

    # You can also set multiple config at once when you pass a dictionary to 'cfg.context'
    print(cfg.AsyncReadMode.get()) # False

    with cfg.context(RangePartitioning=True, AsyncReadMode=True):
        print(cfg.RangePartitioning.get()) # True
        print(cfg.AsyncReadMode.get()) # True
    print(cfg.RangePartitioning.get()) # False
    print(cfg.AsyncReadMode.get()) # False


================================================
FILE: docs/flow/modin/core/dataframe/algebra.rst
================================================
:orphan:

Operators Module Description
""""""""""""""""""""""""""""

Brief description
'''''''''''''''''
Most of the functions that are evaluated by `QueryCompiler` can be categorized into
one of the patterns: Map, TreeReduce, Binary, Reduce, etc., called core operators. The ``modin.core.dataframe.algebra``
module provides templates to easily build such types of functions. These templates
are supposed to be used at the `QueryCompiler` level since each built function accepts
and returns `QueryCompiler`.

High-Level Module Overview
''''''''''''''''''''''''''
Each template class implements a
``register`` method, which takes functions to apply and
instantiate the related template. Functions that are passed to ``register`` will be executed
against converted to pandas and preprocessed in a template-specific way partition, so the function
would take one of the pandas object: ``pandas.DataFrame``, ``pandas.Series`` or ``pandas.DataFrameGroupbyObject``.

.. note:: 
    Currently, functions that are built in that way are supported only in a pandas
    storage format (i.e. can be used only in `PandasQueryCompiler`).

Algebra module provides templates for this type of function:

Map operator
-------------
Uniformly apply a function argument to each partition in parallel. 
**Note**: map function should not change the shape of the partitions.

.. figure:: /img/map_evaluation.svg
    :align: center

This operator performs best when the number of partitions equals to the number of CPUs
so that each single partition gets processed in parallel. When the number of partitions is 1.5x greater than
the number of CPUs, Modin applies a heuristic to join some partitions to get "ideal" partitioning so that
each new partition gets processed in parallel.

Reduce operator
---------------
Applies an argument function that reduces each column or row on the specified axis into a scalar, but requires knowledge about the whole axis.
Be aware that providing this knowledge may be expensive because the execution engine has to
concatenate partitions along the specified axis. Also, note that the execution engine expects
that the reduce function returns a one dimensional frame.

.. figure:: /img/reduce_evaluation.svg
    :align: center

This operator performs best when the number of partitions (row or column partitions in depend on the specified axis)
equals to the number of CPUs so that each single axis partition gets processed in parallel.

TreeReduce operator
-------------------
Applies an argument function that reduces specified axis into a scalar. First applies map function to each partition
in parallel, then concatenates resulted partitions along the specified axis and applies reduce
function. In contrast with `Map function` template, here you're allowed to change partition shape
in the map phase. Note that the execution engine expects that the reduce function returns a one dimensional frame.

This operator performs best when the number of partitions (including the initial and intermediate stages)
equals to the number of CPUs so that each single axis partition gets processed in parallel.

Binary operator
---------------
Applies an argument function, that takes exactly two operands (first is always `QueryCompiler`).
If both operands are query compilers then the execution engine broadcasts partitions of
the right operand to the left.

.. figure:: /img/binary_evaluation.svg
    :align: center

.. warning::
    To be able to do frame broadcasting, partitioning along the index axis of both frames
    has to be equal, otherwise they need to be aligned first. The execution engine will do
    it automatically but note that this requires repartitioning, which is a much 
    more expensive operation than the binary function itself.

This operator performs best when both operands have identical partitioning and the number of partitions of an operand
equals to the number of CPUs so that each single partition gets processed in parallel.

Fold operator
-------------
Applies an argument function that requires knowledge of the whole axis. Be aware that providing this knowledge may be
expensive because the execution engine has to concatenate partitions along the specified axis.

This operator performs best when the number of partitions (row or column partitions in depend on the specified axis)
equals to the number of CPUs so that each single axis partition gets processed in parallel.

GroupBy operator
----------------
Evaluates GroupBy aggregation for that type of functions that can be executed via TreeReduce approach.
To be able to form groups engine broadcasts ``by`` partitions to each partition of the source frame.

This operator performs best when the cardinality of ``by`` columns is low (small number of output groups).
At the ``Map`` stage, the operator computes the aggregation for each row partition individually, meaning,
that the ``Reduce`` stage takes a dataframe with the following number of rows:
``num_groups * n_row_parts``. If the number of groups is too high, there's a risk of getting a dataframe
with even bigger than the initial shape at the ``Reduce`` stage.

Default-to-pandas operator
--------------------------
Do :doc:`fallback to pandas </supported_apis/defaulting_to_pandas>` for passed function.

This operator has a performance penalty for going from a partitioned Modin DataFrame to pandas because of
the communication cost and single-threaded nature of pandas.


How to register your own function
'''''''''''''''''''''''''''''''''
Let's examine an example of how to use the algebra module to create your own
new functions.

Imagine you have a complex aggregation that can be implemented into a single query but
doesn't have any implementation in pandas API. If you know how to implement this
aggregation efficiently in a distributed frame, you may want to use one of the above described
patterns (e.g. ``TreeReduce``).

Let's implement a function that counts non-NA values for each column or row
(``pandas.DataFrame.count``). First, we need to determine the function type.
TreeReduce approach would be great: in a map phase, we'll count non-NA cells in each
partition in parallel and then just sum its results in the reduce phase.

To define the TreeReduce function that does `count` + `sum` we just need to register the
appropriate functions and then assign the result to the picked `QueryCompiler`
(`PandasQueryCompiler` in our case):

.. code-block:: python

    from modin.core.storage_formats import PandasQueryCompiler
    from modin.core.dataframe.algebra import TreeReduce

    PandasQueryCompiler.custom_count = TreeReduce.register(pandas.DataFrame.count, pandas.DataFrame.sum)

Then, we want to handle it from the :py:class:`~modin.pandas.dataframe.DataFrame`, so we need to create a way to do that:

.. code-block:: python

    import modin.pandas as pd

    def count_func(self, **kwargs):
        # The constructor allows you to pass in a query compiler as a keyword argument
        return self.__constructor__(query_compiler=self._query_compiler.custom_count(**kwargs))

    pd.DataFrame.count_custom = count_func

And then you can use it like you usually would:

.. code-block:: python

    df.count_custom(axis=1)

Many of the `pandas` API functions can be easily implemented this way, so if you find
out that one of your favorite function is still defaulted to pandas and decide to
contribute to Modin to add its implementation, you may use this example as a reference.


================================================
FILE: docs/flow/modin/core/dataframe/base/dataframe.rst
================================================
ModinDataframe
""""""""""""""

The :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe` is the parent class for all dataframes - regardless of what storage format they are backed by. Its purpose is to define the algebra operators that must be exposed by a dataframe.

This class exposes the dataframe algebra and is meant to be subclassed by all dataframe implementations.
Descendants of this class implement the algebra, and act as the intermediate level
between the query compiler and the underlying execution details (e.g. the conforming partition manager). The class provides
a significantly reduced set of operations that can be composed to form any pandas query.

The :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` is an example of a descendant of this class. It currently has implementations for some of the operators
exposed in this class, and is currently being refactored to include implementations for all of the algebra operators. Please
refer to the :doc:`PandasDataframe documentation </flow/modin/core/dataframe/pandas/dataframe>` for more information.

The :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe` is independent of implementation specific details such as partitioning, storage format, or execution engine.

Public API
----------

.. autoclass:: modin.core.dataframe.base.dataframe.dataframe.ModinDataframe
  :members:


================================================
FILE: docs/flow/modin/core/dataframe/base/index.rst
================================================
Purpose
=======

The :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe` serves the purpose of describing and defining the :doc:`Core Dataframe Algebra </flow/modin/core/dataframe/algebra>`.

It is the core construction element and serves as the client for the :doc:`Modin Query Compiler</flow/modin/core/storage_formats/base/query_compiler>`. Descendants that offer implementations execute the queries from the compiler by invoking functions over partitions via a partition manager.

The partitions and partition manager interfaces are currently implementation-specific, but may
be standardized in the future.

The :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe` and axis partitions are the interfaces that must be implemented by any :doc:`execution backend</flow/modin/core/execution/dispatching>` in order for it to be plugged in to Modin.
These classes are mostly abstract, however very simple and generic enough methods like
:py:meth:`~modin.core.dataframe.base.partitioning.BaseDataframeAxisPartition.force_materialization` can be implemented at the base level because for now we do not expect them to differ in any implementation.

ModinDataframe Interface
========================

* :doc:`ModinDataframe <dataframe>` is an abstract class which represents the algebra operators a dataframe must expose.
* :doc:`BaseDataframeAxisPartition <partitioning/axis_partition>` is an abstract class, representing a joined group of partitions along some axis (either rows or labels).

.. toctree::
    :hidden:

    dataframe
    partitioning/axis_partition


================================================
FILE: docs/flow/modin/core/dataframe/base/partitioning/axis_partition.rst
================================================
BaseDataframeAxisPartition
""""""""""""""""""""""""""

The class is base for any axis partition class and serves as the last level on which
operations that were conveyed from the partition manager are being performed on an entire column or row.

**Note**: ``modin.core.dataframe.base`` intentionally does not describe any particular partition interface,
as it is the partition manager responsibility (if said partition manager is implemented), i.e. it is
too low-level to be present on the base, abstract level.

The class provides an API that has to be overridden by the child classes in order to manipulate
on a list of block partitions (making up column or row partition) they store.

The procedures that use this class and its methods assume that they have some global knowledge
about the entire axis. This may require the implementation to use concatenation or append on the
list of block partitions.

Public API
----------

.. autoclass:: modin.core.dataframe.base.partitioning.axis_partition.BaseDataframeAxisPartition
  :members:


================================================
FILE: docs/flow/modin/core/dataframe/index.rst
================================================
:orphan:

Core Modin Dataframe Objects
============================

Modin partitions data to scale efficiently.
To keep track of everything a few key classes are introduced: ``Dataframe``, ``Partition``, ``AxisPartiton`` and ``PartitionManager``.

* ``Dataframe`` is the class conforming to Dataframe Algebra.
* ``Partition`` is an element of a NxM grid which, when combined, represents the ``Dataframe``
* ``AxisPartition`` is a joined group of ``Partition``-s along some axis (either rows or columns)
* ``PartitionManager`` is the manager that implements the primitives used for Dataframe Algebra operations over ``Partition``-s

Each :doc:`storage format </flow/modin/core/storage_formats/index>`, execution engine, and each execution system (storage format + execution engine)
may have its own implementations of these Core Dataframe's entities.
Current stable implementations are the following:

* :doc:`Base ModinDataframe <base/index>` defines a common interface and algebra operators for `Dataframe` implementations.

Storage format specific:

* :doc:`Modin PandasDataframe <pandas/index>` is an implementation for any frame class of :doc:`pandas storage format </flow/modin/core/storage_formats/pandas/index>`.

Engine specific:

* :doc:`Modin GenericRayDataframe </flow/modin/core/execution/ray/generic>` is an implementation for any frame class that works on Ray execution engine.
* :doc:`Modin GenericUnidistDataframe </flow/modin/core/execution/unidist/generic>` is an implementation for any frame class that works on Unidist execution engine.

Execution system specific:

* :doc:`Modin PandasOnRayDataframe </flow/modin/core/execution/ray/implementations/pandas_on_ray/index>` is a specialization of the Core Modin Dataframe for ``PandasOnRay`` execution.
* :doc:`Modin PandasOnDaskDataframe </flow/modin/core/execution/dask/implementations/pandas_on_dask/index>` is specialization of the Core Modin Dataframe for ``PandasOnDask`` execution.
* :doc:`Modin PandasOnPythonDataframe </flow/modin/core/execution/python/implementations/pandas_on_python/index>` is a specialization of the Core Modin Dataframe for ``PandasOnPython`` execution.
* :doc:`Modin PandasOnUnidistDataframe </flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index>` is a specialization of the Core Modin Dataframe for ``PandasOnUnidist`` execution.

.. note::
    At the current stage of Modin development, the base interfaces of the Dataframe objects are not defined yet.
    So for now the origin of all changes in the Dataframe interfaces is the :doc:`Dataframe for pandas storage format<pandas/index>`.

.. toctree::
    :hidden:

    base/index
    pandas/index


================================================
FILE: docs/flow/modin/core/dataframe/pandas/dataframe.rst
================================================
PandasDataframe
"""""""""""""""

:py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` is a direct descendant of :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe`. Its purpose is to implement the abstract interfaces for usage with all ``pandas``-based :doc:`storage formats</flow/modin/core/storage_formats/index>`.
:py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` could be inherited and augmented further by any specific implementation which needs it to take special care of some behavior or to improve performance for certain execution engine.

The class serves as the intermediate level
between ``pandas`` query compiler and conforming partition manager. All queries formed
at the query compiler layer are ingested by this class and then conveyed jointly with the stored partitions
into the partition manager for processing. Direct partitions manipulation by this class is prohibited except
cases if an operation is strictly private or protected and called inside of the class only. The class provides
significantly reduced set of operations that fit plenty of pandas operations.

Main tasks of :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` are storage of partitions, manipulation with labels of axes and
providing set of methods to perform operations on the internal data.

As mentioned above, ``PandasDataframe`` shouldn't work with stored partitions directly and
the responsibility for modifying partitions array has to lay on :doc:`partitioning/partition_manager`. For example, method
:meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.broadcast_apply_full_axis` redirects applying
function to :meth:`~PandasDataframePartitionManager.broadcast_axis_partitions` method.

``Modin PandasDataframe`` can be created from ``pandas.DataFrame``, ``pyarrow.Table``
(methods :meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.from_pandas`,
:meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.from_arrow` are used respectively). Also,
``PandasDataframe`` can be converted to ``np.array``, ``pandas.DataFrame``
(methods :meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.to_numpy`,
:meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.to_pandas` are used respectively).

Manipulation with labels of axes happens using internal methods for changing labels on the new,
adding prefixes/suffixes etc.

Public API
----------

.. autoclass:: modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe
  :members:


================================================
FILE: docs/flow/modin/core/dataframe/pandas/index.rst
================================================
Modin PandasDataframe Objects
=============================

``modin.core.dataframe.pandas`` is the package which houses common implementations
of different Modin internal classes used by most `pandas`-based :doc:`storage formats</flow/modin/core/storage_formats/index>`.

It also double-serves as the full example of how to implement Modin execution backend pieces (sans the :doc:`execution part</flow/modin/core/execution/dispatching>` which is absent here),
as it implements everything an execution backend needs to be fully conformant to Modin expectations.

* :doc:`PandasDataframe <dataframe>` is the class conforming to Dataframe Algebra.
* :doc:`PandasDataframePartition <partitioning/partition>` implements ``Partition`` interface holding ``pandas.DataFrame``.
* :doc:`PandasDataframeAxisPartition <partitioning/axis_partition>` is a joined group of ``PandasDataframePartition``-s along some axis (either rows or labels)
* :doc:`PandasDataframePartitionManager <partitioning/partition_manager>` is the manager that implements the primitives used for Dataframe Algebra operations over ``PandasDataframePartition``-s
* :doc:`ModinDtypes <metadata/dtypes>`
* :doc:`ModinIndex <metadata/index>`

.. toctree::
    :hidden:

    dataframe
    partitioning/partition
    partitioning/axis_partition
    partitioning/partition_manager
    metadata/dtypes
    metadata/index


================================================
FILE: docs/flow/modin/core/dataframe/pandas/metadata/dtypes.rst
================================================
ModinDtypes
"""""""""""

Public API
----------

.. autoclass:: modin.core.dataframe.pandas.metadata.dtypes.ModinDtypes
  :members:

================================================
FILE: docs/flow/modin/core/dataframe/pandas/metadata/index.rst
================================================
ModinIndex
""""""""""

Public API
----------

.. autoclass:: modin.core.dataframe.pandas.metadata.index.ModinIndex
  :members:

================================================
FILE: docs/flow/modin/core/dataframe/pandas/partitioning/axis_partition.rst
================================================
PandasDataframeAxisPartition
""""""""""""""""""""""""""""

The class implements abstract interface methods from :py:class:`~modin.core.dataframe.base.partitioning.axis_partition.BaseDataframeAxisPartition`
giving the means for a sibling :doc:`partition manager<partition_manager>` to actually work with the axis-wide partitions.

The class is base for any axis partition class of ``pandas`` storage format.

Subclasses must implement ``list_of_blocks`` which represents data wrapped by the :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`
objects and creates something interpretable as a ``pandas.DataFrame``.

See :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.partitioning.axis_partition.PandasOnRayDataframeAxisPartition`
for an example on how to override/use this class when the implementation needs to be augmented.

The :py:class:`~modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition` object has an invariant that requires that this
object is never returned from a function. It assumes that there will always be
``PandasDataframeAxisPartition`` object stored and structures itself accordingly.

Public API
----------

.. autoclass:: modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition
  :members:


================================================
FILE: docs/flow/modin/core/dataframe/pandas/partitioning/partition.rst
================================================
PandasDataframePartition
""""""""""""""""""""""""

The class is base for any partition class of ``pandas`` storage format and serves as the last level
on which operations that were conveyed from the partition manager are being performed on an
individual block partition.

The class provides an API that has to be overridden by child classes in order to manipulate
on data and metadata they store.

The public API exposed by the children of this class is used in :py:class:`~modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager`.

The objects wrapped by the child classes are treated as immutable by ``PandasDataframePartitionManager`` subclasses
and no logic for updating inplace.

Public API
----------

.. autoclass:: modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition
  :members:


================================================
FILE: docs/flow/modin/core/dataframe/pandas/partitioning/partition_manager.rst
================================================
PandasDataframePartitionManager
"""""""""""""""""""""""""""""""

The class is base for any partition manager class of ``pandas`` storage format and serves as
intermediate level between :doc:`Modin PandasDataframe <../dataframe>` and conforming :doc:`partition <partition>` class.
The class is responsible for partitions manipulation and applying a function to individual partitions:
block partitions, row partitions or column partitions, i.e. the class can form axis partitions from
block partitions to apply a function if an operation requires access to an entire column or row.
The class translates frame API into partition API and also can have some preprocessing operations
depending on the partition type for improving performance (for example,
:meth:`~modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager.preprocess_func`).

Main task of partition manager is to keep knowledge of how partitions are stored and managed
internal to itself, so surrounding code could use it via lean enough API without worrying about
implementation details.

Partition manager can apply user-passed (arbitrary) function in different modes:

* block-wise (apply a function to individual block partitions):

  * optionally accepting partition indices along each axis
  * optionally accepting an item to be split so parts of it would be sent to each partition

* along a full axis (apply a function to an entire column or row made up of block partitions when user function needs information about the whole axis)

It can also broadcast partitions from `right` to `left` when executing certain operations making
`right` partitions available for functions executed where `left` live.

..
  TODO: insert more text explaining "broadcast" term

Partition manager also is used to create "logical" partitions, or :doc:`axis partitions <axis_partition>`
by joining existing partitions along specified axis (either rows or labels),
and to concatenate different partition sets along given axis.

It also maintains mapping from "external" (end user-visible) indices along all axes to internal
indices which are actually pairs of indices of partitions and indices inside the partitions,
as well as manages conversion to numpy and pandas representations.


Public API
----------

.. autoclass:: modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager
  :members:


================================================
FILE: docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/dataframe.rst
================================================
PandasOnDaskDataframe
"""""""""""""""""""""

The class is the specific implementation of the dataframe algebra for the `Dask` execution engine.
It serves as an intermediate level between ``pandas`` query compiler and
:py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartitionManager`.

Public API
----------

.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.dataframe.PandasOnDaskDataframe
  :members:


================================================
FILE: docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/index.rst
================================================
:orphan:

PandasOnDask Execution
======================

Queries that perform data transformation, data ingress or data egress using the `pandas on Dask` execution
pass through the Modin components detailed below.

To enable `pandas on Dask` execution, please refer to the usage section in :doc:`pandas on Dask </development/using_pandas_on_dask>`.

Data Transformation
'''''''''''''''''''

.. image:: /img/pandas_on_dask_data_transform.svg
   :align: center

When a user calls any :py:class:`~modin.pandas.dataframe.DataFrame` API, a query starts forming at the `API` layer
to be executed at the `Execution` layer. The `API` layer is responsible for processing the query appropriately,
for example, determining whether the final result should be a ``DataFrame`` or ``Series`` object. This layer is also responsible for sanitizing the input to the
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler`, e.g. validating a parameter from the query
and defining specific intermediate values to provide more context to the query compiler.
The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for
processing the query, received from the :py:class:`~modin.pandas.dataframe.DataFrame` `API` layer,
to determine how to apply it to a subset of the data - either cell-wise or along an axis-wise partition backed by the `pandas`
storage format. The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` maps the query to one of the :doc:`Core Algebra Operators </flow/modin/core/dataframe/algebra>` of
the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.dataframe.PandasOnDaskDataframe` which inherits
generic functionality from the :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.

PandasOnDask Dataframe implementation
-------------------------------------

Modin implements ``Dataframe``, ``PartitionManager``, ``AxisPartition`` and ``Partition`` classes
specifically for the `PandasOnDask` execution.

* :doc:`PandasOnDaskDataframe <dataframe>`
* :doc:`PandasOnDaskDataframePartition <partitioning/partition>`
* :doc:`PandasOnDaskDataframeVirtualPartition <partitioning/virtual_partition>`
* :doc:`PandasOnDaskDataframePartitionManager <partitioning/partition_manager>`

.. toctree::
    :hidden:

    dataframe
    partitioning/partition
    partitioning/virtual_partition
    partitioning/partition_manager


Data Ingress
''''''''''''

.. image:: /img/pandas_on_dask_data_ingress.svg
   :align: center

Data Egress
'''''''''''

.. image:: /img/pandas_on_dask_data_egress.svg
   :align: center


When a user calls any IO function from the ``modin.pandas.io`` module, the `API` layer queries the
:py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` which defines a factory specific for
the execution, namely, the :py:class:`~modin.core.execution.dispatching.factories.factories.PandasOnDaskFactory`. The factory, in turn,
exposes the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO` class
whose responsibility is to perform a parallel read/write from/to a file.

When reading data from a CSV file, for example, the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO` class forwards
the user query to the :meth:`~modin.core.io.text.CSVDispatcher._read` method of :py:class:`~modin.core.io.text.CSVDispatcher`, where the query's parameters are preprocessed
to check if they are supported by the execution (defaulting to pandas if they are not) and computes some metadata
common for all partitions to be read. Then, the file is split into row chunks, and this data is used to launch remote tasks on the Dask workers
via the :meth:`~modin.core.execution.dask.common.engine_wrapper.DaskWrapper.deploy` method of :py:class:`~modin.core.execution.dask.common.engine_wrapper.DaskWrapper`.
On each Dask worker, the :py:class:`~modin.core.storage_formats.pandas.parsers.PandasCSVParser` parses data.
After the remote tasks are finished, additional result postprocessing is performed,
and a new query compiler with the data read is returned.

When writing data to a CSV file, for example, the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO` processes
the user query to execute it on Dask workers. Then, the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO` asks the
:py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskDataframe` to decompose the data into row-wise partitions
that will be written into the file in parallel in Dask workers.

.. note::
   Currently, data egress uses default `pandas` implementation for `pandas on Dask` execution.


================================================
FILE: docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition.rst
================================================
PandasOnDaskDataframePartition
""""""""""""""""""""""""""""""

The class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`,
providing the API to perform operations on a block partition, namely, ``pandas.DataFrame``, using Dask as the execution engine.

In addition to wrapping a ``pandas.DataFrame``, the class also holds the following metadata:

* ``length`` - length of ``pandas.DataFrame`` wrapped
* ``width`` - width of ``pandas.DataFrame`` wrapped
* ``ip`` - node IP address that holds ``pandas.DataFrame`` wrapped

An operation on a block partition can be performed in two modes:

* asynchronously_ - via :meth:`~modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartition.apply`
* lazily_ - via :meth:`~modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartition.add_to_apply_calls`

Public API
----------

.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartition
  :members:

  .. _asynchronously: https://en.wikipedia.org/wiki/Asynchrony_(computer_programming)
  .. _lazily: https://en.wikipedia.org/wiki/Lazy_evaluation


================================================
FILE: docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition_manager.rst
================================================
PandasOnDaskDataframePartitionManager
"""""""""""""""""""""""""""""""""""""

This class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager`
using Dask as the execution engine. This class is responsible for partition manipulation and applying a function to
block/row/column partitions.

Public API
----------

.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartitionManager
  :members:


================================================
FILE: docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/virtual_partition.rst
================================================
PandasOnDaskDataframeVirtualPartition
"""""""""""""""""""""""""""""""""""""

The class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.virtual_partition.PandasOnDaskDataframeVirtualPartition`,
providing the API to perform operations on an axis (column or row) partition using Dask as the execution engine.
The axis partition is a wrapper over a list of block partitions that are stored in this class.

Public API
----------

.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframeVirtualPartition
  :members:

PandasOnDaskDataframeColumnPartition
""""""""""""""""""""""""""""""""""""

Public API
----------

.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframeColumnPartition
  :members:

PandasOnDaskDataframeRowPartition
"""""""""""""""""""""""""""""""""

Public API
----------

.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframeRowPartition
  :members:


================================================
FILE: docs/flow/modin/core/execution/dispatching.rst
================================================
:orphan:

..
    TODO: add links to documentation for mentioned modules.

Factories Module Description
""""""""""""""""""""""""""""

Brief description
'''''''''''''''''
Modin has several execution engines and storage formats, combining them together forms certain executions. 
Calling any :py:class:`~modin.pandas.dataframe.DataFrame` API function will end up in some execution-specific method. The responsibility of dispatching high-level API calls to
execution-specific function belongs to the :ref:`QueryCompiler <query_compiler_def>`, which is determined at the time of the dataframe's creation by the factory of
the corresponding execution. The mission of this module is to route IO function calls from
the API level to its actual execution-specific implementations, which builds the
`QueryCompiler` of the appropriate execution.

Execution representation via Factories
''''''''''''''''''''''''''''''''''''''
Execution is a combination of the :doc:`storage format </flow/modin/core/storage_formats/index>` and an actual execution engine.
For example, ``PandasOnRay`` execution means the combination of the `pandas storage format` and `Ray` engine.

Each storage format has its own :ref:`Query Compiler <query_compiler_def>` which compiles the most efficient queries
for the corresponding :doc:`Core Modin Dataframe </flow/modin/core/dataframe/index>` implementation. Speaking about ``PandasOnRay``
execution, its Query Compiler is :doc:`PandasQueryCompiler </flow/modin/core/storage_formats/pandas/query_compiler>` and the
Dataframe implementation is :doc:`PandasDataframe </flow/modin/core/dataframe/pandas/dataframe>`,
which is general implementation for every execution of the pandas storage format. The actual implementation of ``PandasOnRay`` dataframe
is defined by the :doc:`PandasOnRayDataframe </flow/modin/core/execution/ray/implementations/pandas_on_ray/dataframe>` class that
extends ``PandasDataframe``.

In the scope of this module, each execution is represented with a factory class located in
``modin/core/execution/dispatching/factories/factories.py``. Each factory contains a field that identifies the IO module of the corresponding execution. This IO module is
responsible for dispatching calls of IO functions to their actual implementations in the
underlying IO module. For more information about IO module visit :doc:`IO </flow/modin/core/io/index>` page.

Factory Dispatcher
''''''''''''''''''
The :py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` class provides 
public methods whose interface corresponds to pandas IO functions, the only difference is that they return `QueryCompiler` of the
selected storage format instead of high-level :py:class:`~modin.pandas.dataframe.DataFrame`. ``FactoryDispatcher`` is responsible for routing
these IO calls to the factory which represents the selected execution.

So when you call ``read_csv()`` function and your execution is ``PandasOnRay`` then the
trace would be the following:

.. figure:: /img/factory_dispatching.svg
    :align: center

``modin.pandas.read_csv`` calls ``FactoryDispatcher.read_csv``, which calls ``._read_csv``
function of the factory of the selected execution, in our case it's ``PandasOnRayFactory._read_csv``,
which in turn forwards this call to the actual implementation of ``read_csv`` — to the
``PandasOnRayIO.read_csv``. The result of ``modin.pandas.read_csv`` will return a high-level Modin
DataFrame with the appropriate `QueryCompiler` bound to it, which is responsible for
dispatching all of the further function calls.

Public API
''''''''''

.. automodule:: modin.core.execution.dispatching.factories.factories
    :members:


================================================
FILE: docs/flow/modin/core/execution/python/implementations/pandas_on_python/dataframe.rst
================================================
PandasOnPythonDataframe
"""""""""""""""""""""""

The class is specific implementation of :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`
for `Python` execution engine. It serves as an intermediate level between
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` and
:py:class:`~modin.core.execution.python.implementations.pandas_on_python.partitioning.partition_manager.PandasOnPythonDataframePartitionManager`.

Public API
----------

.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe.PandasOnPythonDataframe
  :members:

================================================
FILE: docs/flow/modin/core/execution/python/implementations/pandas_on_python/index.rst
================================================
:orphan:

PandasOnPython Execution
========================

Queries that perform data transformation, data ingress or data egress using the `pandas on Python` execution
pass through the Modin components detailed below.

`pandas on Python` execution is sequential and it's used for the debug purposes. To enable `pandas on Python` execution,
please refer to the usage section in :doc:`pandas on Python </development/using_pandas_on_python>`.

Data Transformation
'''''''''''''''''''

.. image:: /img/pandas_on_python_data_transform.svg
   :align: center

When a user calls any :py:class:`~modin.pandas.dataframe.DataFrame` API, a query starts forming at the `API` layer
to be executed at the `Execution` layer. The `API` layer is responsible for processing the query appropriately,
for example, determining whether the final result should be a ``DataFrame`` or ``Series`` object. This layer is also responsible for sanitizing the input to the
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler`, e.g. validating a parameter from the query
and defining specific intermediate values to provide more context to the query compiler.
The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for
processing the query, received from the :py:class:`~modin.pandas.dataframe.DataFrame` `API` layer,
to determine how to apply it to a subset of the data - either cell-wise or along an axis-wise partition backed by the `pandas`
storage format. The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` maps the query to one of the :doc:`Core Algebra Operators </flow/modin/core/dataframe/algebra>` of
the :py:class:`~modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe.PandasOnPythonDataframe` which inherits
generic functionality from the :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.

PandasOnPython Dataframe implementation
---------------------------------------

This page describes implementation of :doc:`Modin PandasDataframe Objects </flow/modin/core/dataframe/pandas/index>`
specific for `PandasOnPython` execution. Since Python engine doesn't allow computation parallelization,
operations on partitions are performed sequentially. The absence of parallelization doesn't give any
performance speed-up, so ``PandasOnPython`` is used for testing purposes only.

* :doc:`PandasOnPythonDataframe <dataframe>`
* :doc:`PandasOnPythonDataframePartition <partitioning/partition>`
* :doc:`PandasOnPythonDataframeAxisPartition <partitioning/axis_partition>`
* :doc:`PandasOnPythonDataframePartitionManager <partitioning/partition_manager>`

.. toctree::
    :hidden:

    dataframe
    partitioning/partition
    partitioning/axis_partition
    partitioning/partition_manager


Data Ingress
''''''''''''

.. image:: /img/pandas_on_python_data_ingress.svg
   :align: center

Data Egress
'''''''''''

.. image:: /img/pandas_on_python_data_egress.svg
   :align: center


When a user calls any IO function from the ``modin.pandas.io`` module, the `API` layer queries the
:py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` which defines a factory specific for
the execution, namely, the :py:class:`~modin.core.execution.dispatching.factories.factories.PandasOnPythonFactory`. The factory, in turn,
exposes the :py:class:`~modin.core.execution.python.implementations.pandas_on_python.io.PandasOnPythonIO` class
whose responsibility is a read/write from/to a file.

When reading data from a CSV file, for example, the :py:class:`~modin.core.execution.python.implementations.pandas_on_python.io.io.PandasOnPythonIO` class
reads the data using corresponding `pandas` function (``pandas.read_csv()`` in this case). After the reading is complete, a new query compiler is created from `pandas` object
using :py:meth:`~modin.core.execution.python.implementations.pandas_on_python.io.io.PandasOnPythonIO.from_pandas` and returned.

When writing data to a CSV file, for example, the :py:class:`~modin.core.execution.python.implementations.pandas_on_python.io.PandasOnPythonIO` converts a query compiler
to `pandas` object using :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.to_pandas`. After that, `pandas` writes the data to the file using
corresponding function (``pandas.to_csv()`` in this case).

================================================
FILE: docs/flow/modin/core/execution/python/implementations/pandas_on_python/partitioning/axis_partition.rst
================================================
PandasOnPythonDataframeAxisPartition
""""""""""""""""""""""""""""""""""""

The class is specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition`,
providing the API to perform operations on an axis partition, using Python
as the execution engine. The axis partition is made up of list of block
partitions that are stored in this class.

Public API
----------

.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.virtual_partition.PandasOnPythonDataframeAxisPartition

PandasOnPythonFrameColumnPartition
""""""""""""""""""""""""""""""""""

Public API
----------

.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.virtual_partition.PandasOnPythonDataframeColumnPartition
  :members:

PandasOnPythonFrameRowPartition
"""""""""""""""""""""""""""""""

Public API
----------

.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.virtual_partition.PandasOnPythonDataframeRowPartition
  :members:

================================================
FILE: docs/flow/modin/core/execution/python/implementations/pandas_on_python/partitioning/partition.rst
================================================
PandasOnPythonDataframePartition
""""""""""""""""""""""""""""""""

The class is specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`,
providing the API to perform operations on a block partition using Python as the execution engine.

In addition to wrapping a ``pandas.DataFrame``, the class also holds the following metadata:

* ``length`` - length of ``pandas.DataFrame`` wrapped
* ``width`` - width of ``pandas.DataFrame`` wrapped

An operation on a block partition can be performed in two modes:

* immediately via :meth:`~modin.core.execution.python.implementations.pandas_on_python.partitioning.partition.PandasOnPythonDataframePartition.apply` - 
  in this case accumulated call queue and new function will be executed
  immediately.
* lazily_ via :meth:`~modin.core.execution.python.implementations.pandas_on_python.partitioning.partition.PandasOnPythonDataframePartition.add_to_apply_calls` -
  in this case function will be added to the call queue and no computations
  will be done at the moment.

Public API
----------

.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.partition.PandasOnPythonDataframePartition
  :members:

  .. _lazily: https://en.wikipedia.org/wiki/Lazy_evaluation

================================================
FILE: docs/flow/modin/core/execution/python/implementations/pandas_on_python/partitioning/partition_manager.rst
================================================
PandasOnPythonDataframePartition
""""""""""""""""""""""""""""""""

The class is specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager`
using Python as the execution engine. This class is responsible for partitions manipulation and applying
a function to block/row/column partitions.

Public API
----------

.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.partition_manager.PandasOnPythonDataframePartitionManager
  :members:

================================================
FILE: docs/flow/modin/core/execution/ray/generic.rst
================================================
:orphan:

Generic Ray-based members
=========================

Objects which are storage format agnostic but require specific Ray implementation
are placed in ``modin.core.execution.ray.generic``.

Their purpose is to implement certain parallel I/O operations and to serve
as a foundation for building storage format specific objects:

.. autoclass:: modin.core.execution.ray.generic.io.RayIO
  :members:

.. autoclass:: modin.core.execution.ray.generic.partitioning.GenericRayDataframePartitionManager
  :members:


================================================
FILE: docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/dataframe.rst
================================================
PandasOnRayDataframe
""""""""""""""""""""

The class is specific implementation of :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`
class using Ray distributed engine. It serves as an intermediate level between
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` and
:py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartitionManager`.

Public API
----------

.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.dataframe.PandasOnRayDataframe
  :members:

================================================
FILE: docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/index.rst
================================================
:orphan:

PandasOnRay Execution
=====================

Queries that perform data transformation, data ingress or data egress using the `pandas on Ray` execution
pass through the Modin components detailed below.

To enable `pandas on Ray` execution, please refer to the usage section in :doc:`pandas on Ray </development/using_pandas_on_ray>`.

Data Transformation
'''''''''''''''''''

.. image:: /img/pandas_on_ray_data_transform.svg
   :align: center

When a user calls any :py:class:`~modin.pandas.dataframe.DataFrame` API, a query starts forming at the `API` layer
to be executed at the `Execution` layer. The `API` layer is responsible for processing the query appropriately,
for example, determining whether the final result should be a ``DataFrame`` or ``Series`` object. This layer is also responsible for sanitizing the input to the
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler`, e.g. validating a parameter from the query
and defining specific intermediate values to provide more context to the query compiler.
The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for
processing the query, received from the :py:class:`~modin.pandas.dataframe.DataFrame` `API` layer,
to determine how to apply it to a subset of the data - either cell-wise or along an axis-wise partition backed by the `pandas`
storage format. The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` maps the query to one of the :doc:`Core Algebra Operators </flow/modin/core/dataframe/algebra>` of
the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.dataframe.PandasOnRayDataframe` which inherits
generic functionality from the ``GenericRayDataframe`` and the :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.

..
  TODO: insert a link to ``GenericRayDataframe`` once we add an implementatiton of the class

PandasOnRay Dataframe implementation
------------------------------------

Modin implements ``Dataframe``, ``PartitionManager``, ``VirtualPartition`` (a specific kind of ``AxisPartition`` with the capability
to combine smaller partitions into the one "virtual") and ``Partition`` classes specifically for the ``PandasOnRay`` execution:

* :doc:`PandasOnRayDataframe <dataframe>`
* :doc:`PandasOnRayDataframePartition <partitioning/partition>`
* :doc:`PandasOnRayDataframeVirtualPartition <partitioning/axis_partition>`
* :doc:`PandasOnRayDataframePartitionManager <partitioning/partition_manager>`

.. toctree::
    :hidden:

    dataframe
    partitioning/partition
    partitioning/axis_partition
    partitioning/partition_manager

Data Ingress
''''''''''''

.. image:: /img/pandas_on_ray_data_ingress.svg
   :align: center

Data Egress
'''''''''''

.. image:: /img/pandas_on_ray_data_egress.svg
   :align: center


When a user calls any IO function from the ``modin.pandas.io`` module, the `API` layer queries the
:py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` which defines a factory specific for
the execution, namely, the :py:class:`~modin.core.execution.dispatching.factories.factories.PandasOnRayFactory`. The factory, in turn,
exposes the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO` class
whose responsibility is to perform a parallel read/write from/to a file.

When reading data from a CSV file, for example, the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO` class forwards
the user query to the :meth:`~modin.core.io.text.CSVDispatcher._read` method of :py:class:`~modin.core.io.text.CSVDispatcher`, where the query's parameters are preprocessed
to check if they are supported by the execution (defaulting to pandas if they are not) and computes some metadata
common for all partitions to be read. Then, the file is split into row chunks, and this data is used to launch remote tasks on the Ray workers
via the :meth:`~modin.core.execution.ray.common.RayWrapper.deploy` method of :py:class:`~modin.core.execution.ray.common.RayWrapper`.
On each Ray worker, the :py:class:`~modin.core.storage_formats.pandas.parsers.PandasCSVParser` parses data.
After the remote tasks are finished, additional result postprocessing is performed,
and a new query compiler with the data read is returned.

When writing data to a CSV file, for example, the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO` processes
the user query to execute it on Ray workers. Then, the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO` asks the
:py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.dataframe.PandasOnRayDataframe` to decompose the data into row-wise partitions
that will be written into the file in parallel in Ray workers.

================================================
FILE: docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/axis_partition.rst
================================================
PandasOnRayDataframeVirtualPartition
""""""""""""""""""""""""""""""""""""

This class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition`,
providing the API to perform operations on an axis partition, using Ray as an execution engine. The virtual partition is
a wrapper over a list of block partitions, which are stored in this class, with the capability to combine the smaller partitions into the one "virtual".

Public API
----------

.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframeVirtualPartition
  :members:

PandasOnRayDataframeColumnPartition
"""""""""""""""""""""""""""""""""""

Public API
----------

.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframeColumnPartition
  :members:

PandasOnRayDataframeRowPartition
""""""""""""""""""""""""""""""""

Public API
----------

.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframeRowPartition
  :members:


================================================
FILE: docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition.rst
================================================
PandasOnRayDataframePartition
"""""""""""""""""""""""""""""

The class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`,
providing the API to perform operations on a block partition, namely, ``pandas.DataFrame``, using Ray as an execution engine.

In addition to wrapping a ``pandas.DataFrame``, the class also holds the following metadata:

* ``length`` - length of ``pandas.DataFrame`` wrapped
* ``width`` - width of ``pandas.DataFrame`` wrapped
* ``ip`` - node IP address that holds ``pandas.DataFrame`` wrapped

An operation on a block partition can be performed in two modes:

* asynchronously_ - via :meth:`~modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartition.apply`
* lazily_ - via :meth:`~modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartition.add_to_apply_calls`

Public API
----------

.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartition
  :members:

.. _asynchronously: https://en.wikipedia.org/wiki/Asynchrony_(computer_programming)
.. _lazily: https://en.wikipedia.org/wiki/Lazy_evaluation


================================================
FILE: docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition_manager.rst
================================================
PandasOnRayDataframePartitionManager
""""""""""""""""""""""""""""""""""""

This class is the specific implementation of :py:class:`~modin.core.execution.ray.generic.partitioning.GenericRayDataframePartitionManager`
using Ray distributed engine. This class is responsible for partition manipulation and applying a function to
block/row/column partitions.

Public API
----------

.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartitionManager
  :members:


================================================
FILE: docs/flow/modin/core/execution/unidist/generic.rst
================================================
:orphan:

Generic Unidist-based members
=============================

Objects which are storage format agnostic but require specific Unidist implementation
are placed in ``modin.core.execution.unidist.generic``.

Their purpose is to implement certain parallel I/O operations and to serve
as a foundation for building storage format specific objects:

.. autoclass:: modin.core.execution.unidist.generic.io.UnidistIO
  :members:

.. autoclass:: modin.core.execution.unidist.generic.partitioning.GenericUnidistDataframePartitionManager
  :members:


================================================
FILE: docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/dataframe.rst
================================================
PandasOnUnidistDataframe
""""""""""""""""""""""""

The class is specific implementation of :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`
class using Unidist distributed engine. It serves as an intermediate level between
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` and
:py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartitionManager`.

Public API
----------

.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.dataframe.PandasOnUnidistDataframe
  :members:

================================================
FILE: docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index.rst
================================================
:orphan:

PandasOnUnidist Execution
=========================

Queries that perform data transformation, data ingress or data egress using the `pandas on Unidist` execution
pass through the Modin components detailed below.

To enable `pandas on MPI through unidist` execution,
please refer to the usage section in :doc:`pandas on MPI through unidist </development/using_pandas_on_mpi>`.

Data Transformation
'''''''''''''''''''

.. image:: /img/pandas_on_unidist_data_transform.svg
   :align: center

When a user calls any :py:class:`~modin.pandas.dataframe.DataFrame` API, a query starts forming at the `API` layer
to be executed at the `Execution` layer. The `API` layer is responsible for processing the query appropriately,
for example, determining whether the final result should be a ``DataFrame`` or ``Series`` object. This layer is also responsible for sanitizing the input to the
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler`, e.g. validating a parameter from the query
and defining specific intermediate values to provide more context to the query compiler.
The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for
processing the query, received from the :py:class:`~modin.pandas.dataframe.DataFrame` `API` layer,
to determine how to apply it to a subset of the data - either cell-wise or along an axis-wise partition backed by the `pandas`
storage format. The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` maps the query to one of the :doc:`Core Algebra Operators </flow/modin/core/dataframe/algebra>` of
the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.dataframe.PandasOnUnidistDataframe` which inherits
generic functionality from the ``GenericUnidistDataframe`` and the :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.

..
  TODO: insert a link to ``GenericUnidistDataframe`` once we add an implementatiton of the class

PandasOnUnidist Dataframe implementation
----------------------------------------

Modin implements ``Dataframe``, ``PartitionManager``, ``VirtualPartition`` (a specific kind of ``AxisPartition`` with the capability
to combine smaller partitions into the one "virtual") and ``Partition`` classes specifically for the ``PandasOnUnidist`` execution:

* :doc:`PandasOnUnidistDataframe <dataframe>`
* :doc:`PandasOnUnidistDataframePartition <partitioning/partition>`
* :doc:`PandasOnUnidistDataframeVirtualPartition <partitioning/axis_partition>`
* :doc:`PandasOnUnidistDataframePartitionManager <partitioning/partition_manager>`

.. toctree::
    :hidden:

    dataframe
    partitioning/partition
    partitioning/axis_partition
    partitioning/partition_manager

Data Ingress
''''''''''''

.. image:: /img/pandas_on_unidist_data_ingress.svg
   :align: center

Data Egress
'''''''''''

.. image:: /img/pandas_on_unidist_data_egress.svg
   :align: center


When a user calls any IO function from the ``modin.pandas.io`` module, the `API` layer queries the
:py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` which defines a factory specific for
the execution, namely, the :py:class:`~modin.core.execution.dispatching.factories.factories.PandasOnUnidistFactory`. The factory, in turn,
exposes the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.io.PandasOnUnidistIO` class
whose responsibility is to perform a parallel read/write from/to a file.

When reading data from a CSV file, for example, the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.io.PandasOnUnidistIO` class forwards
the user query to the :meth:`~modin.core.io.text.CSVDispatcher._read` method of :py:class:`~modin.core.io.text.CSVDispatcher`, where the query's parameters are preprocessed
to check if they are supported by the execution (defaulting to pandas if they are not) and computes some metadata
common for all partitions to be read. Then, the file is split into row chunks, and this data is used to launch remote tasks on the Unidist workers
via the :meth:`~modin.core.execution.unidist.common.UnidistWrapper.deploy` method of :py:class:`~modin.core.execution.unidist.common.UnidistWrapper`.
On each Unidist worker, the :py:class:`~modin.core.storage_formats.pandas.parsers.PandasCSVParser` parses data.
After the remote tasks are finished, additional result postprocessing is performed,
and a new query compiler with the data read is returned.

When writing data to a CSV file, for example, the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.io.PandasOnUnidistIO` processes
the user query to execute it on Unidist workers. Then, the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.io.PandasOnUnidistIO` asks the
:py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.dataframe.PandasOnUnidistDataframe` to decompose the data into row-wise partitions
that will be written into the file in parallel in Unidist workers.

================================================
FILE: docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/axis_partition.rst
================================================
PandasOnUnidistDataframeVirtualPartition
""""""""""""""""""""""""""""""""""""""""

This class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition`,
providing the API to perform operations on an axis partition, using Unidist as an execution engine. The virtual partition is
a wrapper over a list of block partitions, which are stored in this class, with the capability to combine the smaller partitions into the one "virtual".

Public API
----------

.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframeVirtualPartition
  :members:

PandasOnUnidistDataframeColumnPartition
"""""""""""""""""""""""""""""""""""""""

Public API
----------

.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframeColumnPartition
  :members:

PandasOnUnidistDataframeRowPartition
""""""""""""""""""""""""""""""""""""

Public API
----------

.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframeRowPartition
  :members:


================================================
FILE: docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition.rst
================================================
PandasOnUnidistDataframePartition
"""""""""""""""""""""""""""""""""

The class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`,
providing the API to perform operations on a block partition, namely, ``pandas.DataFrame``, using Unidist as an execution engine.

In addition to wrapping a ``pandas.DataFrame``, the class also holds the following metadata:

* ``length`` - length of ``pandas.DataFrame`` wrapped
* ``width`` - width of ``pandas.DataFrame`` wrapped
* ``ip`` - node IP address that holds ``pandas.DataFrame`` wrapped

An operation on a block partition can be performed in two modes:

* asynchronously_ - via :meth:`~modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartition.apply`
* lazily_ - via :meth:`~modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartition.add_to_apply_calls`

Public API
----------

.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartition
  :members:

.. _asynchronously: https://en.wikipedia.org/wiki/Asynchrony_(computer_programming)
.. _lazily: https://en.wikipedia.org/wiki/Lazy_evaluation


================================================
FILE: docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition_manager.rst
================================================
PandasOnUnidistDataframePartitionManager
""""""""""""""""""""""""""""""""""""""""

This class is the specific implementation of :py:class:`~modin.core.execution.unidist.generic.partitioning.GenericUnidistDataframePartitionManager`
using Unidist distributed engine. This class is responsible for partition manipulation and applying a function to
block/row/column partitions.

Public API
----------

.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartitionManager
  :members:


================================================
FILE: docs/flow/modin/core/io/index.rst
================================================
:orphan:

IO Module Description
"""""""""""""""""""""

Dispatcher Classes Workflow Overview
''''''''''''''''''''''''''''''''''''

Calls from ``read_*`` functions of execution-specific IO classes (for example, ``PandasOnRayIO`` for
Ray engine and pandas storage format) are forwarded to the ``_read`` function of the file
format-specific class (for example ``CSVDispatcher`` for CSV files), where function parameters are
preprocessed to check if they are supported (defaulting to pandas if not)
and common metadata is computed for all partitions. The file is then split
into chunks (splitting mechanism described below) and the data is used to launch tasks
on the remote workers. After the remote tasks finish, additional
postprocessing is performed on the results, and a new query compiler with the imported data will
be returned.

Data File Splitting Mechanism
'''''''''''''''''''''''''''''

Modin's file splitting mechanism differs depending on the data format type:

* text format type - the file is split into bytes according to user specified arguments.
  In the simplest case, when no row related parameters (such as ``nrows`` or
  ``skiprows``) are passed, data chunk limits (start and end bytes) are derived
  by dividing the file size by the number of partitions (chunks can
  slightly differ between each other because usually end byte may occurs inside a
  line and in that case the last byte of the line should be used instead of initial
  value). In other cases the same splitting mechanism is used, but chunks sizes are
  defined according to the number of lines that each partition should contain.

* columnar store type - the file is split so that each chunk contains approximately the same number of columns.

* SQL type - chunking is obtained by wrapping initial SQL query with a query that
  specifies initial row offset and number of rows in the chunk.

After file splitting is complete, chunks data is passed to the parser functions
(``PandasCSVParser.parse`` for ``read_csv`` function with pandas storage format) for
further processing on each worker.

Submodules Description
''''''''''''''''''''''

``modin.core.io`` module is used mostly for storing utils and dispatcher
classes for reading files of different formats.

* ``io.py`` - class containing basic utils and default implementation of IO functions.

* ``file_dispatcher.py`` - class reading data from different kinds of files and
  handling some util functions common for all formats. Also this class contains ``read``
  function which is entry point function for all dispatchers ``_read`` functions.

* text - directory for storing all text file format dispatcher classes  
  
  * ``text_file_dispatcher.py`` - class for reading text formats files. This class
    holds ``partitioned_file`` function for splitting text format files into chunks,
    ``offset`` function for moving file offset at the specified amount of bytes,
    ``_read_rows`` function for moving file offset at the specified amount of rows
    and many other functions.
  
  * format/feature specific dispatchers: ``csv_dispatcher.py``, ``excel_dispatcher.py``,
    ``fwf_dispatcher.py`` and ``json_dispatcher.py``.

* column_stores - directory for storing all columnar store file format dispatcher classes
  
  * ``column_store_dispatcher.py`` - class for reading columnar type files. This class
    holds ``build_query_compiler`` function that performs file splitting, deploying remote
    tasks and results postprocessing and many other functions.
  
  * format/feature specific dispatchers: ``feather_dispatcher.py``, ``hdf_dispatcher.py``
    and ``parquet_dispatcher.py``.

* sql - directory for storing SQL dispatcher class
  
  * ``sql_dispatcher.py`` -  class for reading SQL queries or database tables.

Public API
''''''''''

.. automodule:: modin.core.io
    :members:

Handling ``skiprows`` Parameter
'''''''''''''''''''''''''''''''

Handling ``skiprows`` parameter by pandas import functions can be very tricky, especially
for ``read_csv`` function because of interconnection with ``header`` parameter. In this section
the techniques of ``skiprows`` processing by both pandas and Modin are covered.

Processing ``skiprows`` by pandas
=================================

Let's consider a simple snippet with ``pandas.read_csv`` in order to understand interconnection
of ``header`` and ``skiprows`` parameters:

.. code-block:: python

  import pandas
  from io import StringIO

  data = """0
  1
  2
  3
  4
  5
  6
  7
  8
  """

  # `header` parameter absence is equivalent to `header="infer"` or `header=0`
  # rows 1, 5, 6, 7, 8 are read with header "0"
  df = pandas.read_csv(StringIO(data), skiprows=[2, 3, 4])
  # rows 5, 6, 7, 8 are read with header "1", row 0 is skipped additionally
  df = pandas.read_csv(StringIO(data), skiprows=[2, 3, 4], header=1)
  # rows 6, 7, 8 are read with header "5", rows 0, 1 are skipped additionally
  df = pandas.read_csv(StringIO(data), skiprows=[2, 3, 4], header=2)

In the examples above list-like ``skiprows`` values are fixed and ``header`` is varied. In the first
example with no ``header`` provided, rows 2, 3, 4 are skipped and row 0 is considered as the header.
In the second example ``header == 1``, so the zeroth row is skipped and the next available row is
considered the header. The third example illustrates when the ``header`` and ``skiprows`` parameters
values are both present - in this case ``skiprows`` rows are dropped first and then the ``header`` is derived
from the remaining rows (rows before header are skipped too).

In the examples above only list-like ``skiprows`` and integer ``header`` parameters are considered,
but the same logic is applicable for other types of the parameters.

Processing ``skiprows`` by Modin
================================

As it can be seen, skipping rows in the pandas import functions is complicated and distributing
this logic across multiple workers can complicate it even more. Thus in some rare corner cases
default pandas implementation is used in Modin to avoid excessive Modin code complication.

Modin uses two techniques for skipping rows:

1) During file partitioning (setting file limits that should be read by each partition)
exact rows can be excluded from partitioning scope, thus they won't be read at all and can be
considered as skipped. This is the most effective way of skipping rows since it doesn't require
any actual data reading and postprocessing, but in this case ``skiprows`` parameter can be an
integer only. When it is possible Modin always uses this approach.

2) Rows for skipping can be dropped after full dataset import. This is more expensive way since
it requires extra IO work and postprocessing afterwards, but ``skiprows`` parameter can be of any
non-integer type supported by ``pandas.read_csv``.

In some cases, if ``skiprows`` is uniformly distributed array (e.g. [1, 2, 3]), ``skiprows`` can be
"squashed" and represented as an integer to make a fastpath by skipping these rows during file partitioning
(using the first option). But if there is a gap between the first row for skipping
and the last line of the header (that will be skipped too since header is read by each partition
to ensure metadata is defined properly), then this gap should be assigned for reading first
by assigning the first partition to read these rows by setting ``pre_reading`` parameter.

Let's consider an example of skipping rows during partitioning when ``header="infer"`` and
``skiprows=[3, 4, 5]``. In this specific case fastpath can be done since ``skiprows`` is uniformly
distributed array, so we can "squash" it to an integer and set "partitioning" skiprows to 3. But
if no additional action is done, these three rows will be skipped right after header line,
that corresponds to ``skiprows=[1, 2, 3]``. To avoid this discrepancy, we need to assign the first
partition to read data between header line and the first row for skipping by setting special
``pre_reading`` parameter to 2. Then, after the skipping of rows considered to be skipped during
partitioning, the rest data will be divided between the rest of partitions, see rows assignment
below:

.. code-block::

  0 - header line (skip during partitioning)
  1 - pre reading (assign to read by the first partition)
  2 - pre reading (assign to read by the first partition)
  3 - "partitioning" skiprows (skip during partitioning)
  4 - "partitioning" skiprows (skip during partitioning)
  5 - "partitioning" skiprows (skip during partitioning)
  6 - data to partition (divide between the rest of partitions)
  7 - data to partition (divide between the rest of partitions)


================================================
FILE: docs/flow/modin/core/storage_formats/base/query_compiler.rst
================================================
BaseQueryCompiler
"""""""""""""""""

Brief description
'''''''''''''''''
:py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler` is an abstract class of query compiler, and sets a common interface
that every other query compiler implementation in Modin must follow. The Base class contains a basic
implementations for most of the interface methods, all of which
:doc:`fallback to pandas </supported_apis/defaulting_to_pandas>`.

Subclassing :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler`
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
If you want to add new type of query compiler to Modin the new class needs to inherit
from :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler` and implement the abstract methods:

- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.from_pandas` build query compiler from pandas DataFrame.
- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.from_arrow` build query compiler from Arrow Table.
- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.to_pandas` get query compiler representation as pandas DataFrame.
- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.default_to_pandas` do :doc:`fallback to pandas </supported_apis/defaulting_to_pandas>` for the passed function. 
- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.finalize` finalize object constructing.
- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.free` trigger memory cleaning.

(Please refer to the code documentation to see the full documentation for these functions).

This is a minimum set of operations to ensure a new query compiler will function in the Modin architecture,
and the rest of the API can safely default to the pandas implementation via the base class implementation. 
To add a storage format specific implementation for some of the query compiler operations, just override 
the corresponding method in your query compiler class.

Example
'''''''
As an exercise let's define a new query compiler in `Modin`, just to see how easy it is.
Usually, the query compiler routes formed queries to the underlying :doc:`frame </flow/modin/core/dataframe/index>` class,
which submits operators to an execution engine. For the sake
of simplicity and independence of this example, our execution engine will be the pandas itself.

We need to inherit a new class from :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler` and implement all of the abstract methods.
In this case, with `pandas` as an execution engine, it's trivial:

.. code-block:: python

    from modin.core.storage_formats import BaseQueryCompiler

    class DefaultToPandasQueryCompiler(BaseQueryCompiler):
        def __init__(self, pandas_df):
            self._pandas_df = pandas_df

        @classmethod
        def from_pandas(cls, df, *args, **kwargs):
            return cls(df)

        @classmethod
        def from_arrow(cls, at, *args, **kwargs):
            return cls(at.to_pandas())

        def to_pandas(self):
            return self._pandas_df.copy()

        def default_to_pandas(self, pandas_op, *args, **kwargs):
            return type(self)(pandas_op(self.to_pandas(), *args, **kwargs))
        
        def finalize(self):
            pass

        def free(self):
            pass

All done! Now you've got a fully functional query compiler, which is ready for extensions
and already can be used in Modin DataFrame:

.. code-block:: python

    import pandas
    pandas_df = pandas.DataFrame({"col1": [1, 2, 2, 1], "col2": [10, 2, 3, 40]})
    # Building our query compiler from pandas object
    qc = DefaultToPandasQueryCompiler.from_pandas(pandas_df)

    import modin.pandas as pd
    # Building Modin DataFrame from newly created query compiler
    modin_df = pd.DataFrame(query_compiler=qc)

    # Got fully functional Modin DataFrame
    >>> print(modin_df.groupby("col1").sum().reset_index())
       col1  col2
    0     1    50
    1     2     5

To be able to select this query compiler as default via ``modin.config`` you also need
to define the combination of your query compiler and pandas engine as an execution
by adding the corresponding factory. To find more information about factories,
visit :doc:`dispatching </flow/modin/core/execution/dispatching>` page.

Query Compiler API
''''''''''''''''''

.. autoclass:: modin.core.storage_formats.base.query_compiler.BaseQueryCompiler
    :members:


================================================
FILE: docs/flow/modin/core/storage_formats/index.rst
================================================
:orphan:

Storage Formats
===============
Storage format is one of the components that form Modin's execution, it describes the type(s)
of objects that are stored in the partitions of the selected Core Modin Dataframe implementation.

The base storage format in Modin is pandas. In that format, Modin Dataframe operates with
partitions that hold ``pandas.DataFrame`` objects. Pandas is the most natural storage format
since high-level DataFrame objects mirror its API.

The storage format + execution engine (Ray, Dask, etc.) form the execution backend. 
The Query Compiler (QC) converts high-level pandas API calls to queries that are understood 
by the execution backend.

.. _query_compiler_def:

Query Compiler
==============

.. toctree::
    :hidden:

    base/query_compiler
    pandas/index

Modin supports several execution backends (storage format + execution engine). Calling any
DataFrame API function will end up in some execution-specific method. The query compiler is
a bridge between pandas DataFrame API and the actual Core Modin Dataframe implementation for the
corresponding execution.

.. image:: /img/simplified_query_flow.svg
    :align: right
    :width: 300px

Each storage format has its own Query Compiler class that implements the most optimal
query routing for the selected format.

Query compilers of all storage formats implement a common API, which is used by the high-level Modin DataFrame
to support dataframe queries. The role of the query compiler is to translate its API into
a pairing of known user-defined functions and dataframe algebra operators. Each query compiler instance contains a
:doc:`Core Modin Dataframe </flow/modin/core/dataframe/base/index>` of the selected execution implementation and queries
it with the compiled queries to get the result. The query compiler object is immutable,
so the result of every method is a new query compiler.

The query compilers API is defined by the :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler` class
and may resemble the pandas API, however, they're not equal. The query compilers API
is significantly reduced in comparison with pandas, since many corner cases or even the
whole methods can be handled at the API layer with the existing API.

The query compiler is the level where Modin stops distinguishing DataFrame and Series (or column) objects.
A Series is represented by a `1xN` query compiler, where the Series name is the column label.
If Series is unnamed, then the label is ``MODIN_UNNAMED_SERIES_LABEL``, which is equal to ``"__reduced__"``. The high-level DataFrame API layer
interprets a one-column query compiler as Series or DataFrame depending on the operation context.

.. note::
    Although we're declaring that there is no difference between DataFrame and Series at the query compiler,
    you still may find methods like ``method_ser`` and ``method_df`` which are implemented differently because they're
    emulating either Series or DataFrame logic, or you may find parameters, which indicates whether this one-column
    query compiler is representing Series or not. All of these are hacks, and we're working on getting rid of them.

High-level module overview
''''''''''''''''''''''''''

This module houses submodules of all of the stable storage formats:

- :doc:`Base module <base/query_compiler>` contains an abstract query compiler class which defines common API.
- :doc:`Pandas module <pandas/index>` contains query compiler and text parsers for pandas storage format.


================================================
FILE: docs/flow/modin/core/storage_formats/pandas/index.rst
================================================
:orphan:

Pandas storage format
"""""""""""""""""""""

.. toctree::
    :hidden:

    query_compiler
    parsers

High-Level Module Overview
''''''''''''''''''''''''''
This module houses submodules which are responsible for communication between
the query compiler level and execution implementation level for pandas storage format:

- :doc:`Query compiler <query_compiler>` is responsible for compiling efficient queries for :doc:`PandasDataframe </flow/modin/core/dataframe/pandas/dataframe>`.
- :doc:`Parsers <parsers>` are responsible for parsing data on workers during IO operations.


================================================
FILE: docs/flow/modin/core/storage_formats/pandas/parsers.rst
================================================
Pandas Parsers Module Description
"""""""""""""""""""""""""""""""""
High-Level Module Overview
''''''''''''''''''''''''''

This module houses parser classes (classes that are used for data parsing on the workers)
and util functions for handling parsing results. ``PandasParser`` is base class for parser
classes with pandas storage format, that contains methods common for all child classes. Other
module classes implement ``parse`` function that performs parsing of specific format data
basing on the chunk information computed in the ``modin.core.io`` module. After
the chunk is parsed, the resulting ``DataFrame``-s will be split into smaller
``DataFrame``-s according to the ``num_splits`` parameter, data type, or number of
rows/columns in the parsed chunk. These frames, along with some additional metadata, are then returned.

.. note:: 
    If you are interested in the data parsing mechanism implementation details, please refer
    to the source code documentation.

Public API
''''''''''

.. automodule:: modin.core.storage_formats.pandas.parsers
    :members:


================================================
FILE: docs/flow/modin/core/storage_formats/pandas/query_compiler.rst
================================================
PandasQueryCompiler
"""""""""""""""""""
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for compiling
a set of known predefined functions and pairing those with dataframe algebra operators in the
:doc:`PandasDataframe </flow/modin/core/dataframe/pandas/dataframe>`, specifically for dataframes backed by
``pandas.DataFrame`` objects.

Each :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` contains an instance of
:py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` which it queries to get the result.

:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` supports methods built by 
the :doc:`algebra module </flow/modin/core/dataframe/algebra>`.
If you want to add an implementation for a query compiler method, visit the algebra module documentation
to see whether the new operation fits one of the existing function templates and can be easily implemented
with them.

Public API
''''''''''
:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` implements common query compilers API
defined by the :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler`. Some functionalities
are inherited from the base class, in the following section only overridden methods are presented.

.. autoclass:: modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler
  :members:


================================================
FILE: docs/flow/modin/distributed/dataframe/pandas.rst
================================================
Pandas partitioning API
=======================

This page contains a description of the API to extract partitions from and build Modin Dataframes.

unwrap_partitions
-----------------

.. autofunction:: modin.distributed.dataframe.pandas.unwrap_partitions

from_partitions
---------------
.. autofunction:: modin.distributed.dataframe.pandas.from_partitions

Example
-------

.. code-block:: python

  import modin.pandas as pd
  from modin.distributed.dataframe.pandas import unwrap_partitions, from_partitions
  import numpy as np
  data = np.random.randint(0, 100, size=(2 ** 10, 2 ** 8))
  df = pd.DataFrame(data)
  partitions = unwrap_partitions(df, axis=0, get_ip=True)
  print(partitions)
  new_df = from_partitions(partitions, axis=0)
  print(new_df)


================================================
FILE: docs/flow/modin/experimental/batch.rst
================================================
Batch Pipeline API 
""""""""""""""""""

This API exposes the ability to pipeline row-parallel batch queries on a Modin DataFrame. Currently,
this feature is only supported for the ``PandasOnRay`` execution.

API
'''

.. automodule:: modin.experimental.batch.pipeline
    :members:


================================================
FILE: docs/flow/modin/experimental/core/io/index.rst
================================================
:orphan:

Experimental IO Module Description
""""""""""""""""""""""""""""""""""

The module is used mostly for storing experimental utils and
dispatcher classes for reading/writing files of different formats.

Submodules Description
''''''''''''''''''''''

* text - directory for storing all text file format dispatcher classes

  * format/feature specific dispatchers: ``csv_glob_dispatcher.py``,
    ``custom_text_dispatcher.py``.

* sql - directory for storing SQL dispatcher class

  * format/feature specific dispatchers: ``sql_dispatcher.py``

* pickle - directory for storing Pickle dispatcher class

  * format/feature specific dispatchers: ``pickle_dispatcher.py``

Public API
''''''''''

.. automodule:: modin.experimental.core.io
    :members:


================================================
FILE: docs/flow/modin/experimental/index.rst
================================================
:orphan:

Experimental Modules Overview
"""""""""""""""""""""""""""""

In some cases Modin can give the user the opportunity to extend (not modify) typical pandas
API or to try new functionality in order to get more flexibility. Depending on the exact
experimental feature user may need to install additional packages, change configurations or
replace the standard Modin import statement ``import modin.pandas as pd`` with modified version
``import modin.experimental.pandas as pd``.

``modin.experimental`` holds experimental functionality that is under development right now
and provides a limited set of functionality:

* :doc:`xgboost <xgboost>`
* :doc:`sklearn <sklearn>`
* :doc:`batch <batch>`


.. toctree::
    :hidden:

    sklearn
    xgboost
    batch


================================================
FILE: docs/flow/modin/experimental/pandas.rst
================================================
:orphan:

Experimental Pandas API
"""""""""""""""""""""""

.. automodule:: modin.experimental.pandas
  :noindex:

Experimental API Reference
''''''''''''''''''''''''''

.. autofunction:: read_sql
.. autofunction:: read_csv_glob
.. autofunction:: read_custom_text
.. autofunction:: read_pickle_glob
.. autofunction:: read_parquet_glob
.. autofunction:: read_json_glob
.. autofunction:: read_xml_glob
.. automethod:: modin.pandas.DataFrame.modin::to_pandas
.. automethod:: modin.pandas.DataFrame.modin::to_ray
.. automethod:: modin.pandas.DataFrame.modin::to_pickle_glob
.. automethod:: modin.pandas.DataFrame.modin::to_parquet_glob
.. automethod:: modin.pandas.DataFrame.modin::to_json_glob
.. automethod:: modin.pandas.DataFrame.modin::to_xml_glob


================================================
FILE: docs/flow/modin/experimental/range_partitioning_groupby.rst
================================================
:orphan:

.. redirect to the new page
.. raw:: html

    <script type="text/javascript">
        window.location.href = '../../../usage_guide/optimization_notes/index.html#range-partitioning-in-modin';
    </script>

================================================
FILE: docs/flow/modin/experimental/reshuffling_groupby.rst
================================================
:orphan:

.. redirect to the new page
.. raw:: html

    <script type="text/javascript">
        window.location.href = '../../../usage_guide/optimization_notes/index.html#range-partitioning-in-modin';
    </script>


================================================
FILE: docs/flow/modin/experimental/sklearn.rst
================================================
Scikit-learn module description
"""""""""""""""""""""""""""""""

This module holds experimental scikit-learn-specific functionality for Modin.

API
'''
.. automodule:: modin.experimental.sklearn.model_selection
    :members:


================================================
FILE: docs/flow/modin/experimental/xgboost.rst
================================================
Modin XGBoost module description
""""""""""""""""""""""""""""""""
High-level Module Overview
''''''''''''''''''''''''''

This module holds classes, public interface and internal functions for distributed XGBoost in Modin.

Public classes :py:class:`~modin.experimental.xgboost.Booster`, :py:class:`~modin.experimental.xgboost.DMatrix`
and function :py:func:`~modin.experimental.xgboost.train` provide the user with familiar XGBoost interfaces.
They are located in the ``modin.experimental.xgboost.xgboost`` module.

The internal module ``modin.experimental.xgboost.xgboost.xgboost_ray`` contains the implementation of Modin XGBoost
for the Ray execution engine. This module mainly consists of the Ray actor-class :py:class:`~modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor`,
a function to distribute Modin's partitions between actors :py:func:`~modin.experimental.xgboost.xgboost_ray._assign_row_partitions_to_actors`,
an internal :py:func:`~modin.experimental.xgboost.xgboost_ray._train`/:py:func:`~modin.experimental.xgboost.xgboost_ray._predict`
function used from the public interfaces and additional util functions for computing cluster resources, actor creations etc.

Public interfaces
'''''''''''''''''

:py:class:`~modin.experimental.xgboost.DMatrix` inherits original class ``xgboost.DMatrix`` and overrides
its constructor, which currently supports only `data` and `label` parameters. Both of the parameters must
be ``modin.pandas.DataFrame``, which will be internally unwrapped to lists of delayed objects of Modin's
row partitions using the function :py:func:`~modin.distributed.dataframe.pandas.unwrap_partitions`.

.. autoclass:: modin.experimental.xgboost.DMatrix
  :members:

:py:class:`~modin.experimental.xgboost.Booster` inherits original class ``xgboost.Booster`` and
overrides method ``predict``. The difference from original class interface for ``predict``
method is changing the type of the `data` parameter to :py:class:`~modin.experimental.xgboost.DMatrix`.

.. autoclass:: modin.experimental.xgboost.Booster
    :members:

:py:func:`~modin.experimental.xgboost.train` function has 2 differences from the original ``train`` function - (1) the
data type of `dtrain` parameter is :py:class:`~modin.experimental.xgboost.DMatrix` and (2) a new parameter `num_actors`.

.. autofunction:: modin.experimental.xgboost.train

Internal execution flow on Ray engine
'''''''''''''''''''''''''''''''''''''

Internal functions :py:func:`~modin.experimental.xgboost.xgboost_ray._train` and
:py:func:`~modin.experimental.xgboost.xgboost_ray._predict` work similar to xgboost.


Training
********

1. The data is passed to the :py:func:`~modin.experimental.xgboost.xgboost_ray._train`
   function as a :py:class:`~modin.experimental.xgboost.DMatrix` object. Lists of ``ray.ObjectRef``
   corresponding to row partitions of Modin DataFrames are extracted by iterating over the 
   :py:class:`~modin.experimental.xgboost.DMatrix`. Example:

   .. code-block:: python

     # Extract lists of row partitions from dtrain (DMatrix object)
     X_row_parts, y_row_parts = dtrain
   ..

2. On this step, the parameter `num_actors` is processed. The internal function :py:func:`~modin.experimental.xgboost.xgboost_ray._get_num_actors`
   examines the value provided by the user. In case the value isn't provided, the `num_actors` will be computed using condition that 1 actor should use maximum 2 CPUs.
   This condition was chosen for using maximum parallel workers with multithreaded XGBoost training (2 threads
   per worker will be used in this case).

.. note:: `num_actors` parameter is made available for public function :py:func:`~modin.experimental.xgboost.train` to allow
  fine-tuning for obtaining the best performance in specific use cases.

3. :py:class:`~modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor` objects are created.

4. Data `dtrain` is split between actors evenly. The internal function
   :py:func:`~modin.experimental.xgboost.xgboost_ray._split_data_across_actors` runs assigning row partitions to actors
   using internal function :py:func:`~modin.experimental.xgboost.xgboost_ray._assign_row_partitions_to_actors`.
   This function creates a dictionary in the form: `{actor_rank: ([part_i0, part_i3, ..], [0, 3, ..]), ..}`.

.. note:: :py:func:`~modin.experimental.xgboost.xgboost_ray._assign_row_partitions_to_actors` takes into account IP
  addresses of row partitions of `dtrain` data to minimize excess data transfer.

5. For each :py:class:`~modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor` object ``set_train_data`` method is
   called remotely. This method runs loading row partitions in actor according to the dictionary with partitions
   distribution from previous step. When data is passed to the actor, the row partitions are automatically materialized
   (``ray.ObjectRef`` -> ``pandas.DataFrame``).

6. ``train`` method of :py:class:`~modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor` class object is called remotely. This method
   runs XGBoost training on local data of actor, connects to ``Rabit Tracker`` for sharing training state between
   actors and returns dictionary with `booster` and `evaluation results`.

7. At the final stage results from actors are returned. `booster` and `evals_result` are returned using ``ray.get``
   function from remote actor.


Prediction
**********

1. The data is passed to :py:func:`~modin.experimental.xgboost.xgboost_ray._predict`
   function as a :py:class:`~modin.experimental.xgboost.DMatrix` object.

2. :py:func:`~modin.experimental.xgboost.xgboost_ray._map_predict` function is applied remotely for each partition
   of the data to make a partial prediction.

3. Result ``modin.pandas.DataFrame`` is created from ``ray.ObjectRef`` objects, obtained in the previous step.


Internal API
''''''''''''
.. autoclass:: modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor
  :members:
  :private-members:

.. autofunction:: modin.experimental.xgboost.xgboost_ray._assign_row_partitions_to_actors
.. autofunction:: modin.experimental.xgboost.xgboost_ray._train
.. autofunction:: modin.experimental.xgboost.xgboost_ray._predict
.. autofunction:: modin.experimental.xgboost.xgboost_ray._get_num_actors
.. autofunction:: modin.experimental.xgboost.xgboost_ray._split_data_across_actors
.. autofunction:: modin.experimental.xgboost.xgboost_ray._map_predict


================================================
FILE: docs/flow/modin/pandas/base.rst
================================================
Base pandas Dataset API
"""""""""""""""""""""""

The class implements functionality that is common to Modin's pandas API for both ``DataFrame`` and ``Series`` classes.

Public API
----------

.. autoclass:: modin.pandas.base.BasePandasDataset
  :noindex:
  :members:


================================================
FILE: docs/flow/modin/pandas/dataframe.rst
================================================
:orphan:

DataFrame Module Overview
"""""""""""""""""""""""""

Modin's ``pandas.DataFrame`` API
''''''''''''''''''''''''''''''''

Modin's ``pandas.DataFrame`` API is backed by a distributed object providing an identical
API to pandas. After the user calls some ``DataFrame`` function, this call is internally
rewritten into a representation that can be processed in parallel by the partitions. These
results can be e.g., reduced to single output, identical to the single threaded
pandas ``DataFrame`` method output.

..
    TODO: add link to the docs with detailed description of queries compilation
    and execution ater DOCS-#2996 is merged.

Public API
----------

.. autoclass:: modin.pandas.dataframe.DataFrame

Usage Guide
'''''''''''

The most efficient way to create Modin ``DataFrame`` is to import data from external
storage using the highly efficient Modin IO methods (for example using ``pd.read_csv``,
see details for Modin IO methods in the :doc:`IO </flow/modin/core/io/index>` page),
but even if the data does not originate from a file, any pandas supported data type or
``pandas.DataFrame`` can be used. Internally, the ``DataFrame`` data is divided into
partitions, which number along an axis usually corresponds to the number of the user's hardware CPUs. If needed,
the number of partitions can be changed by setting ``modin.config.NPartitions``.

Let's consider simple example of creation and interacting with Modin ``DataFrame``:

.. code-block:: python

    import modin.config

    # This explicitly sets the number of partitions
    modin.config.NPartitions.put(4)

    import modin.pandas as pd
    import pandas

    # Create Modin DataFrame from the external file
    pd_dataframe = pd.read_csv("test_data.csv")
    # Create Modin DataFrame from the python object
    # data = {f'col{x}': [f'col{x}_{y}' for y in range(100, 356)] for x in range(4)}
    # pd_dataframe = pd.DataFrame(data)
    # Create Modin DataFrame from the pandas object
    # pd_dataframe = pd.DataFrame(pandas.DataFrame(data))

    # Show created DataFrame
    print(pd_dataframe)

    # List DataFrame partitions. Note, that internal API is intended for
    # developers needs and was used here for presentation purposes
    # only.
    partitions = pd_dataframe._query_compiler._modin_frame._partitions
    print(partitions)

    # Show the first DataFrame partition
    print(partitions[0][0].get())

    Output:

    # created DataFrame

            col0      col1      col2      col3
    0    col0_100  col1_100  col2_100  col3_100
    1    col0_101  col1_101  col2_101  col3_101
    2    col0_102  col1_102  col2_102  col3_102
    3    col0_103  col1_103  col2_103  col3_103
    4    col0_104  col1_104  col2_104  col3_104
    ..        ...       ...       ...       ...
    251  col0_351  col1_351  col2_351  col3_351
    252  col0_352  col1_352  col2_352  col3_352
    253  col0_353  col1_353  col2_353  col3_353
    254  col0_354  col1_354  col2_354  col3_354
    255  col0_355  col1_355  col2_355  col3_355

    [256 rows x 4 columns]

    # List of DataFrame partitions

    [[<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e607f0>]
    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e9a4f0>]
    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e60820>]
    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e609d0>]]

    # The first DataFrame partition
    
            col0      col1      col2      col3
    0   col0_100  col1_100  col2_100  col3_100
    1   col0_101  col1_101  col2_101  col3_101
    2   col0_102  col1_102  col2_102  col3_102
    3   col0_103  col1_103  col2_103  col3_103
    4   col0_104  col1_104  col2_104  col3_104
    ..       ...       ...       ...       ...
    60  col0_160  col1_160  col2_160  col3_160
    61  col0_161  col1_161  col2_161  col3_161
    62  col0_162  col1_162  col2_162  col3_162
    63  col0_163  col1_163  col2_163  col3_163
    64  col0_164  col1_164  col2_164  col3_164

    [65 rows x 4 columns]

As we show in the example above, Modin ``DataFrame`` can be easily created, and supports any input that pandas ``DataFrame`` supports.
Also note that tuning of the ``DataFrame`` partitioning can be done by just setting a single config.


================================================
FILE: docs/flow/modin/pandas/series.rst
================================================
:orphan:

Series Module Overview
""""""""""""""""""""""

Modin's ``pandas.Series`` API
'''''''''''''''''''''''''''''

Modin's ``pandas.Series`` API is backed by a distributed object providing an identical
API to pandas. After the user calls some ``Series`` function, this call is internally rewritten
into a representation that can be processed in parallel by the partitions. These
results can be e.g., reduced to single output, identical to the single threaded
pandas ``Series`` method output.

..
    TODO: add link to the docs with detailed description of queries compilation
    and execution ater DOCS-#2996 is merged.

Public API
----------

.. autoclass:: modin.pandas.series.Series

Usage Guide
'''''''''''

The most efficient way to create Modin ``Series`` is to import data from external
storage using the highly efficient Modin IO methods (for example using ``pd.read_csv``,
see details for Modin IO methods in the :doc:`IO </flow/modin/core/io/index>` page),
but even if the data does not originate from a file, any pandas supported data type or
``pandas.Series`` can be used. Internally, the ``Series`` data is divided into
partitions, which number along an axis usually corresponds to the number of the user's hardware CPUs. If needed,
the number of partitions can be changed by setting ``modin.config.NPartitions``.

Let's consider simple example of creation and interacting with Modin ``Series``:

.. code-block:: python

    import modin.config

    # This explicitly sets the number of partitions
    modin.config.NPartitions.put(4)

    import modin.pandas as pd
    import pandas

    # Create Modin Series from the external file
    pd_series = pd.read_csv("test_data.csv", header=None).squeeze()
    # Create Modin Series from the python object
    # pd_series = pd.Series([x for x in range(256)])
    # Create Modin Series from the pandas object
    # pd_series = pd.Series(pandas.Series([x for x in range(256)]))

    # Show created `Series`
    print(pd_series)

    # List `Series` partitions. Note, that internal API is intended for
    # developers needs and was used here for presentation purposes
    # only.
    partitions = pd_series._query_compiler._modin_frame._partitions
    print(partitions)

    # Show the first `Series` partition
    print(partitions[0][0].get())

    Output:

    # created `Series`

    0      100
    1      101
    2      102
    3      103
    4      104
        ...
    251    351
    252    352
    253    353
    254    354
    255    355
    Name: 0, Length: 256, dtype: int64

    # List of `Series` partitions

    [[<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e607f0>]
    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e9a4f0>]
    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e60820>]
    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e609d0>]]

    # The first `Series` partition
    
        0
    0   100
    1   101
    2   102
    3   103
    4   104
    ..  ...
    60  160
    61  161
    62  162
    63  163
    64  164

    [65 rows x 1 columns]

As we show in the example above, Modin ``Series`` can be easily created, and supports any input that pandas ``Series`` supports.
Also note that tuning of the ``Series`` partitioning can be done by just setting a single config.


================================================
FILE: docs/flow/modin/utils.rst
================================================
:orphan:

Modin Utils
"""""""""""

Here are utilities that can be useful when working with Modin.

Public API
''''''''''

.. autofunction:: modin.utils.try_cast_to_pandas
.. autofunction:: modin.utils.execute


================================================
FILE: docs/getting_started/examples.rst
================================================
Examples and Resources
======================

Here you can find additional resources to learn about Modin. To learn more about 
advanced usage for Modin, please refer to :doc:`Usage Guide </usage_guide/index>` section..

Usage Examples
''''''''''''''

The following notebooks demonstrate how Modin can be used for scalable data science:

- Quickstart Guide to Modin [`Source <https://github.com/modin-project/modin/tree/main/examples/quickstart.ipynb>`__]
- Using Modin with the NYC Taxi Dataset [`Source <https://github.com/modin-project/modin/blob/main/examples/jupyter/Modin_Taxi.ipynb>`__]
- Modin for Machine Learning with scikit-learn [`Source <https://github.com/modin-project/modin/blob/main/examples/modin-scikit-learn-example.ipynb>`__]

Tutorials
'''''''''

The following tutorials cover the basic usage of Modin. `Here <https://www.youtube.com/watch?v=NglkafEmbhE>`__ is a one hour video tutorial that walks through these basic exercises.

- Exercise 1: Introduction to Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_1.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_1.ipynb>`__]
- Exercise 2: Speed Improvements with Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_2.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_2.ipynb>`__]
- Exercise 3: Defaulting to pandas with Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_3.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_3.ipynb>`__]

The following tutorials covers more advanced features in Modin:

- Exercise 4: Experimental Features in Modin (Spreadsheet, Progress Bar) [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_4.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_4.ipynb>`__]
- Exercise 5: Setting up Modin in a Cluster Environment [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_5.ipynb>`__]
- Exercise 6: Running Modin in a Cluster Environment [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_6.ipynb>`__]

How to get required dependencies for the tutorial notebooks and to run them please refer to the respective `README.md <https://github.com/modin-project/modin/tree/main/examples/tutorial/jupyter/README.md>`__ file.

Talks & Podcasts
''''''''''''''''

- `Scaling Interactive Data Science with Modin and Ray <https://www.youtube.com/watch?v=ycSf1IbBGWk>`_ (20 minute, Ray Summit 2021)
- `Unleash The Power Of Dataframes At Any Scale With Modin <https://www.pythonpodcast.com/modin-parallel-dataframe-episode-324/>`_  (40 minute, Python Podcast 2021)
- `[Russian] Distributed Data Processing and XGBoost Training and Prediction with Modin <https://www.youtube.com/watch?v=oo_lxUjsFTM&t=1s>`_ (30 minute, PyCon Russia 2021)
- `[Russian] Efficient Data Science with Modin <https://www.youtube.com/watch?v=cOM82kHRwkM&t=6568s>`_ (30 minute, ISP RAS Open 2021)
- `Modin: Scaling the Capabilities of the Data Scientist, not the Machine <https://www.youtube.com/watch?v=NglkafEmbhE>`_ (1 hour, RISE Camp 2020)
- `Modin: Pandas Scalability with Devin Petersohn <https://softwareengineeringdaily.com/2020/07/23/modin-pandas-scalability-with-devin-petersohn/>`_ (1 hour, Software Engineering Daily Podcast 2020)
- `Introduction to the DataFrame and Modin <https://www.youtube.com/watch?v=_0eVVLXrtfY>`_ (20 minute, RISECamp 2019)
- `Scaling Interactive Pandas Workflows with Modin <https://www.youtube.com/watch?v=-HjLd_3ahCw>`_ (40 minute, PyData NYC 2018)

Community contributions
'''''''''''''''''''''''

Here are some blogposts and articles about Modin:

- `Anaconda Blog: Scale your pandas workflow with Modin by Vasilij Litvinov <https://www.anaconda.com/blog/scale-your-pandas-workflow-with-modin>`_
- `The Modin view of Scaling Pandas by Devin Petersohn <https://towardsdatascience.com/the-modin-view-of-scaling-pandas-825215533122>`_
- `Data Science at Scale with Modin by Areg Melik-Adamyan <https://medium.com/intel-analytics-software/data-science-at-scale-with-modin-5319175e6b9a>`_
- `Speed up Pandas using Modin by Eric D. Brown, D.Sc. <https://pythondata.com/quick-tip-speed-up-pandas-using-modin/>`_
- `Explore Python Libraries: Make Your DataFrames Parallel With Modin by Zachary Bennett <https://www.pluralsight.com/guides/explore-python-libraries:-make-your-dataframes-parallel-with-modin>`_
- `Get faster pandas with Modin, even on your laptops by Parul Pandey <https://towardsdatascience.com/get-faster-pandas-with-modin-even-on-your-laptops-b527a2eeda74>`_
- `How to speedup pandas by changing one line of code by Shrivarsheni <https://www.machinelearningplus.com/python/modin-speedup-pandas/>`_
- `How To Accelerate Pandas With Just One Line Of Code by Analytics India <https://analyticsindiamag.com/how-to-accelerate-pandas-with-just-one-line-of-code-modin/>`_
- `An Easy Introduction to Modin: A Step-by-Step Guide to Accelerating Pandas by Intel <https://www.intel.com/content/www/us/en/developer/articles/technical/modin-step-by-step-guide-to-accelerating-pandas.html#gs.c69er5>`_


Here are some articles contributed by the international community:

- `[Chinese] 用 Modin 来提速 pandas 工作流程 by Python Chinese Community <https://blog.csdn.net/BF02jgtRS00XKtCx/article/details/90709222>`_
- `[German] Was ist Modin? by Dipl.-Ing. (FH) Stefan Luber <https://www.bigdata-insider.de/was-ist-modin-a-982826/>`_
- `[Russian] Ускоряем Pandas при помощи модуля modin by Разработка <https://vc.ru/dev/187095-uskoryaem-pandas-pri-pomoshchi-modulya-modin>`_
- `[Korean] modin 으로 pandas 더 빠르게 사용하기 by 분석뉴비 <https://data-newbie.tistory.com/279>`_

If you would like your articles to be featured here, please `submit a pull request <https://github.com/modin-project/modin/pulls>`_ to let us know!


================================================
FILE: docs/getting_started/faq.rst
================================================
Frequently Asked Questions (FAQs)
=================================

Below, you will find answers to the most commonly asked questions about
Modin. If you still cannot find the answer you are looking for, please post your
question on the #support channel on our Slack_ community or open a Github issue_.

FAQs: Why choose Modin?
-----------------------

What’s wrong with pandas and why should I use Modin?
""""""""""""""""""""""""""""""""""""""""""""""""""""

While pandas works extremely well on small datasets, as soon as you start working with
medium to large datasets that are more than a few GBs, pandas can become painfully
slow or run out of memory. This is because pandas is single-threaded. In other words,
you can only process your data with one core at a time. This approach does not scale to
larger data sets and adding more hardware does not lead to more performance gain.

The :py:class:`~modin.pandas.dataframe.DataFrame` is a highly
scalable, parallel DataFrame. Modin transparently distributes the data and computation so
that you can continue using the same pandas API while being able to work with more data faster.
Modin lets you use all the CPU cores on your machine, and because it is lightweight, it
often has less memory overhead than pandas. See :doc:` Why Modin? </getting_started/why_modin/pandas>`
page to learn more about how Modin is different from pandas.

Why not just improve pandas?
""""""""""""""""""""""""""""

pandas is a massive community and well established codebase. Many of the issues
we have identified and resolved with pandas are fundamental to its current
implementation. While we would be happy to donate parts of Modin that
make sense in pandas, many of these components would require significant (or
total) redesign of the pandas architecture. Modin's architecture goes beyond
pandas, which is why the pandas API is just a thin layer at the user level. To learn
more about Modin's architecture, see the :doc:`architecture </development/architecture>` documentation.

How much faster can I go with Modin compared to pandas?
"""""""""""""""""""""""""""""""""""""""""""""""""""""""

Modin is designed to scale with the amount of hardware available.
Even in a traditionally serial task like ``read_csv``, we see large gains by efficiently
distributing the work across your entire machine. Because it is so light-weight,
Modin provides speed-ups of up to 4x on a laptop with 4 physical cores. This speedup scales
efficiently to larger machines with more cores. We have several published papers_ that
include performance results and comparisons against pandas.

How much more data would I be able to process with Modin?
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""

Often data scientists have to use different tools for operating on datasets of different sizes.
This is not only because processing large dataframes is slow, but also pandas does not support working
with dataframes that don't fit into the available memory. As a result, pandas workflows that work well
for prototyping on a few MBs of data do not scale to tens or hundreds of GBs (depending on the size
of your machine). Modin supports operating on data that does not fit in memory, so that you can comfortably
work with hundreds of GBs without worrying about substantial slowdown or memory errors. For more information,
see :doc:`out-of-memory support </getting_started/why_modin/out_of_core>` for Modin.

How does Modin compare to Dask DataFrame and Koalas?
""""""""""""""""""""""""""""""""""""""""""""""""""""

TLDR: Modin has better coverage of the pandas API, has a flexible backend, better ordering semantics,
and supports both row and column-parallel operations.
Check out :doc:`Modin vs Dask vs Koalas </getting_started/why_modin/modin_vs_dask_vs_koalas>` page detailing
the differences!

How does Modin work under the hood?
"""""""""""""""""""""""""""""""""""

Modin is logically separated into different layers that represent the hierarchy of a
typical Database Management System. User queries which perform data transformation,
data ingress or data egress pass through the Modin Query Compiler which translates
queries from the top-level pandas API Layer that users interact with to the Modin Core
Dataframe layer.
The Modin Core DataFrame is our efficient DataFrame implementation that utilizes a partitioning schema
which allows for distributing tasks and queries. From here, the Modin DataFrame works with engines like
Ray, Dask or Unidist to execute computation, and then return the results to the user.

For more details, take a look at our system :doc:`architecture </development/architecture>`.

FAQs: How to use Modin?
-----------------------

If I’m only using my laptop, can I still get the benefits of Modin?
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

Absolutely! Unlike other parallel DataFrame systems, Modin is an extremely
light-weight, robust DataFrame. Because it is so light-weight, Modin provides
speed-ups of up to 4x on a laptop with 4 physical cores
and allows you to work on data that doesn't fit in your laptop's RAM.

How do I use Jupyter or Colab notebooks with Modin?
"""""""""""""""""""""""""""""""""""""""""""""""""""

You can take a look at this Google Colab installation guide_ and
this notebook tutorial_. Once Modin is installed, simply replace your pandas
import with Modin import:

.. code-block:: python

    # import pandas as pd
    import modin.pandas as pd

Which execution engine (Ray, Dask or Unidist) should I use for Modin?
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

Modin lets you effortlessly speed up your pandas workflows with either Ray_'s, Dask_'s or Unidist_'s execution engine.
You don't need to know anything about either engine in order to use it with Modin. If you only have one engine
installed, Modin will automatically detect which engine you have installed and use that for scheduling computation.
If you don't have a preference, we recommend starting with Modin's default Ray engine.
If you want to use a specific compute engine, you can set the environment variable ``MODIN_ENGINE``
and Modin will do computation with that engine:

.. code-block:: bash

    pip install "modin[ray]" # Install Modin dependencies and Ray to run on Ray
    export MODIN_ENGINE=ray  # Modin will use Ray

    pip install "modin[dask]" # Install Modin dependencies and Dask to run on Dask
    export MODIN_ENGINE=dask  # Modin will use Dask

    pip install "modin[mpi]" # Install Modin dependencies and MPI to run on MPI through unidist.
    export MODIN_ENGINE=unidist  # Modin will use Unidist
    export UNIDIST_BACKEND=mpi   # Unidist will use MPI backend.

This can also be done with:

.. code-block:: python

    import modin.config as modin_cfg
    import unidist.config as unidist_cfg

    modin_cfg.Engine.put("ray")  # Modin will use Ray
    modin_cfg.Engine.put("dask")  # Modin will use Dask

    modin_cfg.Engine.put('unidist') # Modin will use Unidist
    unidist_cfg.Backend.put('mpi') # Unidist will use MPI backend

We plan to support more execution engines in future. If you have a specific request,
please post on the #feature-requests channel on our Slack_ community.

How do I connect Modin to a database via `read_sql`?
""""""""""""""""""""""""""""""""""""""""""""""""""""

To read from a SQL database, you have two options:

1) Pass a connection string, e.g. ``postgresql://reader:NWDMCE5xdipIjRrp@hh-pgsql-public.ebi.ac.uk:5432/pfmegrnargs``
2) Pass an open database connection, e.g. for psycopg2, ``psycopg2.connect("dbname=pfmegrnargs user=reader password=NWDMCE5xdipIjRrp host=hh-pgsql-public.ebi.ac.uk")``

The first option works with both Modin and pandas. If you try the second option
in Modin, Modin will default to pandas because open database connections cannot be pickled.
Pickling is required to send connection details to remote workers.
To handle the unique requirements of distributed database access, Modin has a distributed
database connection called ``ModinDatabaseConnection``:

.. code-block:: python

    import modin.pandas as pd
    from modin.db_conn import ModinDatabaseConnection
    con = ModinDatabaseConnection(
        'psycopg2',
        host='hh-pgsql-public.ebi.ac.uk',
        dbname='pfmegrnargs',
        user='reader',
        password='NWDMCE5xdipIjRrp')
    df = pd.read_sql("SELECT * FROM rnc_database",
            con,
            index_col=None,
            coerce_float=True,
            params=None,
            parse_dates=None,
            chunksize=None)


The ``ModinDatabaseConnection`` will save any arguments you supply it and forward
them to the workers to make their own connections.

How can I contribute to Modin?
""""""""""""""""""""""""""""""

**Modin is currently under active development. Requests and contributions are welcome!**

If you are interested in contributing please check out the :doc:`Contributing Guide</development/contributing>`
and then refer to the :doc:`Development Documentation</development/index>`,
where you can find system architecture, internal implementation details, and other useful information.
Also check out the `Github`_ to view open issues and make contributions.

.. _issue: https://github.com/modin-project/modin/issues
.. _Slack: https://join.slack.com/t/modin-project/shared_invite/zt-yvk5hr3b-f08p_ulbuRWsAfg9rMY3uA
.. _Github: https://github.com/modin-project/modin
.. _Ray: https://github.com/ray-project/ray/
.. _Dask: https://github.com/dask/dask
.. _Unidist: https://github.com/modin-project/unidist
.. _papers: https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf
.. _guide: https://modin.readthedocs.io/en/latest/getting_started/installation.html#installing-on-google-colab
.. _tutorial: https://github.com/modin-project/modin/tree/main/examples/tutorial


================================================
FILE: docs/getting_started/installation.rst
================================================
=============
Installation
=============

.. note:: 
  | *Estimated Reading Time: 15 minutes*
  | If you already installed Modin on your machine, you can skip this section.

There are several ways to install Modin. Most users will want to install with
``pip`` or using ``conda`` tool, but some users may want to build from the main branch
on the `GitHub repo`_. The main branch has the most recent patches, but may be less
stable than a release installed from ``pip`` or ``conda``.

Installing with pip
-------------------

Stable version
""""""""""""""

Modin can be installed with ``pip`` on Linux, Windows and MacOS. 
To install the most recent stable release run the following:

.. code-block:: bash

  pip install -U modin # -U for upgrade in case you have an older version

Modin can be used with :doc:`Ray</development/using_pandas_on_ray>`, :doc:`Dask</development/using_pandas_on_dask>`,
:doc:`Unidist</development/using_pandas_on_mpi>` engines.
If you don't have Ray_, Dask_ or Unidist_ installed, you will need to install Modin with one of the targets:

.. code-block:: bash

  pip install "modin[ray]" # Install Modin dependencies and Ray to run on Ray
  pip install "modin[dask]" # Install Modin dependencies and Dask to run on Dask
  pip install "modin[mpi]" # Install Modin dependencies and MPI to run on MPI through unidist
  pip install "modin[all]" # Install Ray and Dask

To get Modin on MPI through unidist (as of unidist 0.5.0) fully working
it is required to have a working MPI implementation installed beforehand.
Otherwise, installation of ``modin[mpi]`` may fail. Refer to
`Installing with pip`_ section of the unidist documentation for more details about installation.

**Note:** Since Modin 0.30.0 we use a reduced set of Ray dependencies: ``ray`` instead of ``ray[default]``.
This means that the dashboard and cluster launcher are no longer installed by default.
If you need those, consider installing ``ray[default]`` along with ``modin[ray]``.

Modin will automatically detect which engine you have installed and use that for
scheduling computation!

Release candidates
""""""""""""""""""

Before most major releases, we will upload a release candidate to test and check if there are any problems. If you would like to install a pre-release of Modin, run the following:

.. code-block:: bash

  pip install --pre modin

These pre-releases are uploaded for dependencies and users to test their existing code
to ensure that it still works. If you find something wrong, please raise an issue_ or
email the bug reporter: bug_reports@modin.org.

Installing specific dependency sets
"""""""""""""""""""""""""""""""""""

Modin has a number of specific dependency sets for running Modin on different execution engines and
storage formats or for different functionalities of Modin. Here is a list of dependency sets for Modin:

.. code-block:: bash

  pip install "modin[ray]" # If you want to use the Ray execution engine

.. code-block:: bash

  pip install "modin[dask]" # If you want to use the Dask execution engine

.. code-block:: bash

  pip install "modin[mpi]" # If you want to use MPI through unidist execution engine


Consortium Standard-compatible implementation based on Modin
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

.. code-block:: bash

  pip install "modin[consortium-standard]"


Installing on Google Colab
"""""""""""""""""""""""""""

Modin can be used with Google Colab_ via the ``pip`` command, by running the following code in a new cell:

.. code-block:: bash

  !pip install "modin[all]"

Since Colab preloads several of Modin's dependencies by default, we need to restart the Colab environment once Modin is installed by either clicking on the :code:`"RESTART RUNTIME"` button in the installation output or by run the following code:

.. code-block:: python

  # Post-install automatically kill and restart Colab environment
  import os
  os.kill(os.getpid(), 9)

Once you have restarted the Colab environment, you can use Modin in Colab in subsequent sessions.

Note that on the free version of Colab, there is a `limit on the compute resource <https://research.google.com/colaboratory/faq.html>`_. To leverage the full power of Modin, you may have to upgrade to Colab Pro to get access to more compute resources.

Installing with conda
---------------------

Using conda-forge channel
"""""""""""""""""""""""""

Modin releases can be installed using ``conda`` from conda-forge channel. Starting from 0.10.1
it is possible to install modin with chosen engine(s) alongside. Current options are:

+---------------------------------+---------------------------+-----------------------------+
| **Package name in conda-forge** | **Engine(s)**             | **Supported OSs**           |
+---------------------------------+---------------------------+-----------------------------+
| modin                           | Dask_                     |   Linux, Windows, MacOS     |
+---------------------------------+---------------------------+-----------------------------+
| modin-dask                      | Dask                      |   Linux, Windows, MacOS     |
+---------------------------------+---------------------------+-----------------------------+
| modin-ray                       | Ray_                      |       Linux, Windows        |
+---------------------------------+---------------------------+-----------------------------+
| modin-mpi                       | MPI_ through unidist_     |   Linux, Windows, MacOS     |
+---------------------------------+---------------------------+-----------------------------+
| modin-all                       | Dask, Ray, Unidist        |          Linux              |
+---------------------------------+---------------------------+-----------------------------+

**Note:** Since Modin 0.30.0 we use a reduced set of Ray dependencies: ``ray-core`` instead of ``ray-default``.
This means that the dashboard and cluster launcher are no longer installed by default.
If you need those, consider installing ``ray-default`` along with ``modin-ray``.

For installing Dask, Ray and MPI through unidist engines into conda environment following command should be used:

.. code-block:: bash

  conda install -c conda-forge modin-ray modin-dask modin-mpi

All set of engines could be available in conda environment by specifying:

.. code-block:: bash

  conda install -c conda-forge modin-all

or explicitly:

.. code-block:: bash

  conda install -c conda-forge modin-ray modin-dask modin-mpi

Refer to `Installing with conda`_ section of the unidist documentation
for more details on how to install a specific MPI implementation to run on.

``conda`` may be slow installing ``modin-all`` or combitations of execution engines so we currently recommend using libmamba solver for the installation process.
To do this install it in a base environment:

.. code-block:: bash

  conda install -n base conda-libmamba-solver

Then it can be used during installation either like

.. code-block:: bash

  conda install -c conda-forge modin-ray modin- --experimental-solver=libmamba

or starting from conda 22.11 and libmamba solver 22.12 versions

.. code-block:: bash

  conda install -c conda-forge modin-ray --solver=libmamba


Installing from the GitHub main branch
--------------------------------------

If you'd like to try Modin using the most recent updates from the main branch, you can
also use ``pip``.

.. code-block:: bash

  pip install "modin[all] @ git+https://github.com/modin-project/modin"

This will install directly from the repo without you having to manually clone it! Please be aware
that these changes have not made it into a release and may not be completely stable.

If you would like to install Modin with a specific engine, you can use ``modin[ray]`` or ``modin[dask]`` or ``modin[mpi]`` instead of ``modin[all]`` in the command above.

Windows
-------

All Modin engines are available both on Windows and Linux as mentioned above.
Default engine on Windows is :doc:`Ray</development/using_pandas_on_ray>`.
It is also possible to use Windows Subsystem For Linux (WSL_), but this is generally 
not recommended due to the limitations and poor performance of Ray on WSL, a roughly 
2-3x worse than native Windows. 

Building Modin from Source
--------------------------

If you're planning on :doc:`contributing </development/contributing>` to Modin, you will need to ensure that you are
building Modin from the local repository that you are working off of. Occasionally,
there are issues in overlapping Modin installs from pypi and from source. To avoid these
issues, we recommend uninstalling Modin before you install from source:

.. code-block:: bash

  pip uninstall modin

To build from source, you first must clone the repo. We recommend forking the repository first
through the GitHub interface, then cloning as follows:

.. code-block:: bash

  git clone https://github.com/<your-github-username>/modin.git

Once cloned, ``cd`` into the ``modin`` directory and use ``pip`` to install:

.. code-block:: bash

  cd modin
  pip install -e .
  pip install -e ".[all]"  # will install dependencies for all engines

.. _`GitHub repo`: https://github.com/modin-project/modin/tree/main
.. _issue: https://github.com/modin-project/modin/issues
.. _WSL: https://docs.microsoft.com/en-us/windows/wsl/install-win10
.. _Ray: http://ray.readthedocs.io
.. _Dask: https://github.com/dask/dask
.. _MPI: https://www.mpi-forum.org/
.. _Unidist: https://github.com/modin-project/unidist
.. _`Installing with pip`: https://unidist.readthedocs.io/en/latest/installation.html#installing-with-pip
.. _`Installing with conda`: https://unidist.readthedocs.io/en/latest/installation.html#installing-with-conda
.. _`Intel Distribution of Modin`: https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/distribution-of-modin.html#gs.86stqv
.. _`Intel Distribution of Modin Getting Started`: https://www.intel.com/content/www/us/en/developer/articles/technical/intel-distribution-of-modin-getting-started-guide.html
.. |reg|    unicode:: U+000AE .. REGISTERED SIGN
.. _Colab: https://colab.research.google.com/


================================================
FILE: docs/getting_started/quickstart.rst
================================================
Getting Started
===============

.. note:: 
  | *Estimated Reading Time: 10 minutes*
  | You can follow along this tutorial in a Jupyter notebook `here <https://github.com/modin-project/modin/tree/main/examples/quickstart.ipynb>`_. 

.. toctree::
    :hidden:
    :maxdepth: 4
    
    10-min Quickstart Guide <self>
    installation
    using_modin/using_modin
    why_modin/why_modin
    examples
    faq
    troubleshooting

.. meta::
    :description lang=en:
        Introduction to Modin.

Quick Start Guide
-----------------

To install the most recent stable release for Modin run the following:

.. code-block:: bash

  pip install "modin[all]" 

For further instructions on how to install Modin with conda or for specific platforms 
or engines, see our detailed `installation guide <../getting_started/installation.html>`_.

Modin acts as a drop-in replacement for pandas so you simply have to replace the import 
of pandas with the import of Modin as follows to speed up your pandas workflows:

.. code-block:: bash

  # import pandas as pd
  import modin.pandas as pd

Example: Instant Scalability with No Extra Effort
-------------------------------------------------

When working on large datasets, pandas becomes painfully slow or :doc:`runs out of memory</getting_started/why_modin/out_of_core>`. Modin automatically scales up your 
pandas workflows by parallelizing the dataframe operations, so that you can more 
effectively leverage the compute resources available.

For the purpose of demonstration, we will load in modin as ``pd`` and pandas as 
``pandas``.

.. code-block:: python

  import modin.pandas as pd
  import pandas

  #############################################
  ### For the purpose of timing comparisons ###
  #############################################
  import time
  import ray
  # Look at the Ray documentation with respect to the Ray configuration suited to you most.
  ray.init()
  #############################################

In this toy example, we look at the NYC taxi dataset, which is around 200MB in size. You can download `this dataset <https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv>`_ to run the example locally.

.. code-block:: python

  # This may take a few minutes to download
  import urllib.request
  dataset_url = "https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv"
  urllib.request.urlretrieve(dataset_url, "taxi.csv")  

Faster Data Loading with ``read_csv``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. code-block:: python
   
  start = time.time()

  pandas_df = pandas.read_csv(dataset_url, parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"], quoting=3)

  end = time.time()
  pandas_duration = end - start
  print("Time to read with pandas: {} seconds".format(round(pandas_duration, 3)))

By running the same command ``read_csv`` with Modin, we generally get around 4X speedup 
for loading in the data in parallel. 

.. code-block:: python

  start = time.time()

  modin_df = pd.read_csv(dataset_url, parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"], quoting=3)

  end = time.time()
  modin_duration = end - start
  print("Time to read with Modin: {} seconds".format(round(modin_duration, 3)))

  print("Modin is {}x faster than pandas at `read_csv`!".format(round(pandas_duration / modin_duration, 2)))

Faster ``concat`` across multiple dataframes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Our previous ``read_csv`` example operated on a relatively small dataframe. In the 
following example, we duplicate the same taxi dataset 100 times and then concatenate 
them together, resulting in a dataset around 19GB in size.

.. code-block:: python

  start = time.time()

  big_pandas_df = pandas.concat([pandas_df for _ in range(25)])

  end = time.time()
  pandas_duration = end - start
  print("Time to concat with pandas: {} seconds".format(round(pandas_duration, 3)))

.. code-block:: python

  start = time.time()

  big_modin_df = pd.concat([modin_df for _ in range(25)])

  end = time.time()
  modin_duration = end - start
  print("Time to concat with Modin: {} seconds".format(round(modin_duration, 3)))

  print("Modin is {}x faster than pandas at `concat`!".format(round(pandas_duration / modin_duration, 2)))

Modin speeds up the ``concat`` operation by more than 60X, taking less than a second to 
create the large dataframe, while pandas took close to a minute.


Faster ``apply`` over a single column
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The performance benefits of Modin become apparent when we operate on large 
gigabyte-scale datasets. Let's say we want to round up values 
across a single column via the ``apply`` operation. 

.. code-block:: python

  start = time.time()
  rounded_trip_distance_pandas = big_pandas_df["trip_distance"].apply(round)

  end = time.time()
  pandas_duration = end - start
  print("Time to apply with pandas: {} seconds".format(round(pandas_duration, 3)))

.. code-block:: python
  
  start = time.time()

  rounded_trip_distance_modin = big_modin_df["trip_distance"].apply(round)

  end = time.time()
  modin_duration = end - start
  print("Time to apply with Modin: {} seconds".format(round(modin_duration, 3)))

  print("Modin is {}x faster than pandas at `apply` on one column!".format(round(pandas_duration / modin_duration, 2)))

Modin is more than 30X faster at applying a single column of data, operating on 130+ 
million rows in a second.

In short, Modin provides orders of magnitude speed up over pandas for a variety of operations out of the box. 

.. figure:: ../img/quickstart_speedup.svg
   :align: center

Summary
-------

Hopefully, this tutorial demonstrated how Modin delivers significant speedup on pandas 
operations without the need for any extra effort. Throughout example, we moved from 
working with 100MBs of data to 20GBs of data all without having to change anything or 
manually optimize our code to achieve the level of scalable performance that Modin 
provides.

Note that in this quickstart example, we've only shown ``read_csv``, ``concat``, 
``apply``, but these are not the only pandas operations that Modin optimizes for. In 
fact, Modin covers `more than 90\% of the pandas API <https://github.com/modin-project/modin/blob/main/README.md#pandas-api-coverage>`_, yielding considerable speedups for 
many common operations.


================================================
FILE: docs/getting_started/troubleshooting.rst
================================================
Troubleshooting
===============

We hope your experience with Modin is bug-free, but there are some quirks about Modin
that may require troubleshooting. If you are still having issues, please post on
the #support channel on our Slack_ community or open a Github issue_.

Frequently encountered issues
-----------------------------

This is a list of the most frequently encountered issues when using Modin. Some of these
are working as intended, while others are known bugs that are being actively worked on.

Warning during execution: ``defaulting to pandas``
""""""""""""""""""""""""""""""""""""""""""""""""""

Please note, that while Modin covers a large portion of the pandas API, not all functionality is implemented. For methods that are not yet implemented, such as ``asfreq``, you may see the following:

.. code-block:: text

  UserWarning: `DataFrame.asfreq` defaulting to pandas implementation.

To understand which functions will lead to this warning, we have compiled a list of :doc:`currently supported methods </supported_apis/index>`. When you see this warning, Modin defaults to pandas by converting the Modin dataframe to pandas to perform the operation. Once the operation is complete in pandas, it is converted back to a Modin dataframe. These operations will have a high overhead due to the communication involved and will take longer than pandas. When this is happening, a warning will be given to the user to inform them that this operation will take longer than usual. You can learn more about this :doc:`here </supported_apis/defaulting_to_pandas>`.

If you would like to request a particular method be implemented, feel free to open an
`issue`_. Before you open an issue please make sure that someone else has not already
requested that functionality.

Hanging on ``import modin.pandas as pd``
""""""""""""""""""""""""""""""""""""""""

This can happen when Ray fails to start. It will keep retrying, but often it is faster
to just restart the notebook or interpreter. Generally, this should not happen. Most
commonly this is encountered when starting multiple notebooks or interpreters in quick
succession.

**Solution**

Restart your interpreter or notebook kernel.

**Avoiding this Error**

Avoid starting many Modin notebooks or interpreters in quick succession. Wait 2-3
seconds before starting the next one.

Importing heterogeneous data using ``read_csv``
"""""""""""""""""""""""""""""""""""""""""""""""

Since Modin's ``read_csv`` imports data in parallel, it is possible for data across
partitions to be heterogeneously typed (this can happen when columns contain
heterogeneous data, i.e. values in the same column are of different types). An example
of how this is handled is shown below.

.. code-block:: python

  import os
  import pandas
  import modin.pandas as pd
  from modin.config import NPartitions

  NPartitions.put(2)

  test_filename = "test.csv"
  # data with heterogeneous values in the first column
  data = """one,2
  3,4
  5,6
  7,8
  9.0,10
  """
  kwargs = {
      # names of the columns to set, if `names` parameter is set,
      # header inffering from the first data row/rows will be disabled
      "names": ["col1", "col2"],

      # explicit setting of data type of column/columns with heterogeneous
      # data will force partitions to read data with correct dtype
      # "dtype": {"col1": str},
  }


  try :
      with open(test_filename, "w") as f:
          f.write(data)

      pandas_df = pandas.read_csv(test_filename, **kwargs)
      pd_df = pd.read_csv(test_filename, **kwargs)

      print(pandas_df)
      print(pd_df)
  finally:
      os.remove(test_filename)

  Output:

  pandas_df:
    col1  col2
  0  one     2
  1    3     4
  2    5     6
  3    7     8
  4  9.0    10

  pd_df:
    col1  col2
  0  one     2
  1    3     4
  2    5     6
  3  7.0     8
  4  9.0    10


In this case, ``col1`` of the `DataFrame` read by pandas contains only ``str`` data
because the first value ("one") is inferred to have type ``str``, which forces pandas to handle the rest of the values in the column
as strings. The first Modin partition (the first three rows) handles the data as pandas does,
but the second partition (the last two rows) reads the data as floats. This is because the
second column contains an int and a float, and thus the column type is inferred to be float. As a
result, `7` is interpreted as `7.0`, which differs from the pandas output.

The above example demonstrates heterogenous data import with str, int, and float types,
but heterogeneous data consisting of other data/parameter combinations can also result in 
data type mismatches with pandas.

**Solution**

When heterogeneous data is detected, a warning will be raised.
Currently, these discrepancies aren't properly handled
by Modin, so to avoid this issue, you need to set the ``dtype`` parameter of ``read_csv``
manually to force the correct data type coercion during data import. Note that 
to avoid excessive performance degradation, the ``dtype`` value should only be set for columns that may contain heterogenous data.
as possible (specify ``dtype`` parameter only for columns with heterogeneous data).

Specifying the ``dtype`` parameter will work well in most cases. If the file
contains a column that should be interpreted as the index
(the ``index_col`` parameter is specified) there may still be type discrepancies in the index, since the ``dtype`` parameter is only responsible for data
fields. If in the above example, ``kwargs`` was set like so:

.. code-block:: python

  kwargs = {
      "names": ["col1", "col2"],
      "dtype": {"col1": str},
      "index_col": "col1",
  }

The resulting Modin DataFrame will contain incorrect values - just as if ``dtype``
had not been specified:

.. code-block:: python

  col1
  one      2
  3        4
  5        6
  7.0      8
  9.0     10

One workaround is to import the data without setting the ``index_col`` parameter, and then 
set the index column using the ``DataFrame.set_index`` function as shown in
the example below:

.. code-block:: python

  pd_df = pd.read_csv(filename, dtype=data_dtype, index_col=None)
  pd_df = pd_df.set_index(index_col_name)
  pd_df.index.name = None


Using Modin with python multiprocessing
"""""""""""""""""""""""""""""""""""""""

We strongly recommend against using a distributed execution engine (e.g. Ray or Dask)
in conjunction with Python multiprocessing because that can lead to undefined behavior.
One such example is shown below:

.. code-block:: python

  import modin.pandas as pd

  # Ray engine is used by default
  df = pandas.DataFrame([1, 2, 3])

  def f(arg):
    return df + arg

  if __name__ == '__main__':
    from multiprocessing import Pool

    with Pool(5) as p:
        print(p.map(f, [1]))

Although this example may work on your machine, we do not recommend it, because
the Python multiprocessing library will duplicate Ray clusters, causing both
excessive resource usage and conflict over the available resources.

Poor performance of the first operation with Modin on Ray engine
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

There might be cases when the first operation with Modin on Ray engine is much slower than the subsequent calls of the operation.
That happens because Ray workers may not be fully set up yet to perform computation after initialization of the engine
with ``ray.init(runtime_env={'env_vars': {'__MODIN_AUTOIMPORT_PANDAS__': '1'}})``, which is the default behavior of Modin on Ray engine
if Ray has not been initialised yet. Modin intentionaly initializes Ray this way to import ``pandas`` in workers
once Python interpreter is started in them so that to avoid a race condition in Ray between the import thread and the thread executing the code.

..
      See more details on why we started using ``ray.init(runtime_env={'env_vars': {'__MODIN_AUTOIMPORT_PANDAS__': '1'}})` in
      https://github.com/modin-project/modin/pull/4603.

.. code-block:: python

  import time
  import pandas
  import numpy as np
  import ray
  import modin.pandas as pd
  import modin.config as cfg

  # Look at the Ray documentation with respect to the Ray configuration suited to you most.
  ray.init(runtime_env={'env_vars': {'__MODIN_AUTOIMPORT_PANDAS__': '1'}})

  pandas_df = pandas.DataFrame(
    np.random.randint(0, 100, size=(1000000, 13))
  )
  pandas_df.to_csv("foo.csv", index=False)

  def read_csv_with_pandas():
    start_time = time.time()
    pandas_df = pandas.read_csv("foo.csv", index_col=0)
    end_time = time.time()
    pandas_duration = end_time - start_time
    print("Time to read_csv with pandas: {} seconds".format(round(pandas_duration, 3)))
    return pandas_df

  def read_csv_with_modin():
    start_time = time.time()
    modin_df = pd.read_csv("foo.csv", index_col=0)
    end_time = time.time()
    modin_duration = end_time - start_time
    print("Time to read_csv with Modin: {} seconds".format(round(modin_duration, 3))) 
    return modin_df

  for i in range(5):
    pandas_df = read_csv_with_pandas()
    modin_df = read_csv_with_modin()

  Time to read_csv with pandas: 0.708 seconds
  Time to read_csv with Modin: 4.132 seconds
  Time to read_csv with pandas: 0.735 seconds
  Time to read_csv with Modin: 0.37 seconds
  Time to read_csv with pandas: 0.646 seconds
  Time to read_csv with Modin: 0.377 seconds
  Time to read_csv with pandas: 0.673 seconds
  Time to read_csv with Modin: 0.371 seconds
  Time to read_csv with pandas: 0.672 seconds
  Time to read_csv with Modin: 0.379 seconds

**Solution**

So far there is no a solution to fix or work around the problem rather than not to pass a non-empty runtime_env to ``ray.init()``.
However, this may lead to other problem regarding a race condition in Ray between the import thread and the thread executing the code.
So for now we just highlight the problem in hope of a future fix in Ray itself.

Also, it is worth noting that every distributed engine by its nature has a little overhead for the first operation being called,
which may be important for microbenchmarks. What you likely want to do is warm up worker processes
either by excluding the time of the first iteration from your measurements or execute a simple function in workers to fully set up them.

Common errors
-------------

Error when using Dask engine: ``RuntimeError: if __name__ == '__main__':``
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

The following `script.py` uses Modin with Dask as an execution engine and produces errors:

.. code-block:: python

  # script.py
  import modin.pandas as pd
  import modin.config as cfg

  cfg.Engine.put("dask")

  df = pd.DataFrame([0,1,2,3])
  print(df)

A part of the produced errors by the script above would be the following:

.. code-block::

  File "/path/python3.9/multiprocessing/spawn.py", line 134, in _check_not_importing_main
    raise RuntimeError('''
    RuntimeError: 
        An attempt has been made to start a new process before the
        current process has finished its bootstrapping phase.

        This probably means that you are not using fork to start your
        child processes and you have forgotten to use the proper idiom
        in the main module:

            if __name__ == '__main__':
                freeze_support()
                ...

        The "freeze_support()" line can be omitted if the program
        is not going to be frozen to produce an executable.

This happens because Dask Client uses `fork <https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods>`_
to start processes.

**Solution**

To avoid the problem the Dask Client creation code needs to be moved into the ``__main__`` scope of the module.

The corrected `script.py` would look like:

.. code-block:: python

  # script.py
  import modin.pandas as pd
  import modin.config as cfg

  cfg.Engine.put("dask")

  if __name__ == "__main__":
    df = pd.DataFrame([0, 1, 2, 3]) # Dask Client creation is hidden in the first call of Modin functionality.
    print(df)

or

.. code-block:: python

  # script.py
  from distributed import Client
  import modin.pandas as pd
  import modin.config as cfg

  cfg.Engine.put("dask")

  if __name__ == "__main__":
    # Explicit Dask Client creation.
    # Look at the Dask Distributed documentation with respect to the Client configuration suited to you most.
    client = Client()
    df = pd.DataFrame([0, 1, 2, 3])
    print(df)

Spurious error "cannot import partially initialised pandas module" on custom Ray cluster
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

If you're using some pre-configured Ray cluster to run Modin, it's possible you would
be seeing spurious errors like

.. code-block::

  ray.exceptions.RaySystemError: System error: partially initialized module 'pandas' has no attribute 'core' (most likely due to a circular import)
  traceback: Traceback (most recent call last):
    File "/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/ray/serialization.py", line 340, in deserialize_objects
      obj = self._deserialize_object(data, metadata, object_ref)
    File "/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/ray/serialization.py", line 237, in _deserialize_object
      return self._deserialize_msgpack_data(data, metadata_fields)
    File "/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/ray/serialization.py", line 192, in _deserialize_msgpack_data
      python_objects = self._deserialize_pickle5_data(pickle5_data)
    File "/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/ray/serialization.py", line 180, in _deserialize_pickle5_data
      obj = pickle.loads(in_band, buffers=buffers)
    File "/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/pandas/__init__.py", line 135, in <module>
      from pandas import api, arrays, errors, io, plotting, testing, tseries
    File "/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/pandas/testing.py", line 6, in <module>
      from pandas._testing import (
    File "/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/pandas/_testing/__init__.py", line 979, in <module>
      cython_table = pd.core.common._cython_table.items()
  AttributeError: partially initialized module 'pandas' has no attribute 'core' (most likely due to a circular import)

**Solution**

Modin contains a workaround that should automatically do ``import pandas`` upon worker process starts.

It is triggered by the presence of non-empty ``__MODIN_AUTOIMPORT_PANDAS__`` environment variable which
Modin sets up automatically on the Ray clusters it spawns, but it might be missing on pre-configured clusters.

So if you're seeing the issue like shown above, please make sure you set this environment variable on all
worker nodes of your cluster before actually spawning the workers.

.. _issue: https://github.com/modin-project/modin/issues
.. _Slack: https://modin.org/slack.html


================================================
FILE: docs/getting_started/using_modin/using_modin.rst
================================================
Using Modin
===========

In this section, we show how Modin can be used to accelerate your pandas workflows on a 
single machine up to multiple machines in a cluster setting.

.. toctree::
    :maxdepth: 4
    
    using_modin_locally
    using_modin_cluster
    

================================================
FILE: docs/getting_started/using_modin/using_modin_cluster.rst
================================================
Using Modin in a Cluster
========================

.. note::
  | *Estimated Reading Time: 15 minutes*

Often in practice we have a need to exceed the capabilities of a single machine.
Modin works and performs well in both local mode and in a cluster environment.
The key advantage of Modin is that your python code does not change between
local development and cluster execution. Users are not required to think about
how many workers exist or how to distribute and partition their data;
Modin handles all of this seamlessly and transparently.

.. note::
   It is possible to use a Jupyter notebook, but you will have to deploy a Jupyter server 
   on the remote cluster head node and connect to it.

.. image:: ../../img/modin_cluster.png
   :alt: Modin cluster
   :align: center

Extra requirements for AWS authentication
-----------------------------------------

First of all, install the necessary dependencies in your environment:

.. code-block:: bash

   pip install boto3

The next step is to setup your AWS credentials. One can set  ``AWS_ACCESS_KEY_ID``, 
``AWS_SECRET_ACCESS_KEY`` and ``AWS_SESSION_TOKEN`` (Optional)
(refer to `AWS CLI environment variables`_ to get more insight on this) or  
just run the following command:

.. code-block:: bash

   aws configure

Starting and connecting to the cluster
--------------------------------------

This example starts 1 head node (m5.24xlarge) and 5 worker nodes (m5.24xlarge), 576 total CPUs.
You can check the `Amazon EC2 pricing`_ page.

It is possble to manually create AWS EC2 instances and configure them or just use the `Ray CLI`_ to 
create and initialize a Ray cluster on AWS using `Modin's Ray cluster setup config`_,
which we are going to utilize in this example.
Refer to `Ray's autoscaler options`_ page on how to modify the file.

More details on how to launch a Ray cluster can be found on `Ray's cluster docs`_.

To start up the Ray cluster, run the following command in your terminal:

.. code-block:: bash

   ray up modin-cluster.yaml

Once the head node has completed initialization, you can optionally connect to it by running the following command.

.. code-block:: bash

   ray attach modin-cluster.yaml

To exit the ssh session and return back into your local shell session, type:

.. code-block:: bash

   exit

Executing in a cluster environment
----------------------------------

.. note::
   Be careful when using the `Ray client`_ to connect to a remote cluster.
   We don't recommend this connection mode, beacuse it may not work. Known bugs:
   - https://github.com/ray-project/ray/issues/38713,
   - https://github.com/modin-project/modin/issues/6641.

Modin lets you instantly speed up your workflows with a large data by scaling pandas
on a cluster. In this tutorial, we will use a 12.5 GB ``big_yellow.csv`` file that was
created by concatenating a 200MB `NYC Taxi dataset`_ file 64 times. Preparing this
file was provided as part of our `Modin's Ray cluster setup config`_.

If you want to use the other dataset, you should provide it to each of
the cluster nodes with the same path. We recomnend doing this by customizing the
``setup_commands`` section of the `Modin's Ray cluster setup config`_.

To run any script in a remote cluster, you need to submit it to the Ray. In this way,
the script file is sent to the the remote cluster head node and executed there. 

In this tutorial, we provide the `exercise_5.py`_ script, which reads the data from the
CSV file and executes such pandas operations as count, groupby and map.
As the result, you will see the size of the file being read and the execution time of the entire script.

You can submit this script to the existing remote cluster by running the following command.

.. code-block:: bash

   ray submit modin-cluster.yaml exercise_5.py

To download or upload files to the cluster head node, use ``ray rsync_down`` or ``ray rsync_up``.
It may help if you want to use some other Python modules that should be available to
execute your own script or download a result file after executing the script.

.. code-block:: bash

   # download a file from the cluster to the local machine:
   ray rsync_down modin-cluster.yaml '/path/on/cluster' '/local/path'
   # upload a file from the local machine to the cluster:
   ray rsync_up modin-cluster.yaml '/local/path' '/path/on/cluster'

Shutting down the cluster
--------------------------

Now that we have finished the computation, we need to shut down the cluster with `ray down` command.

.. code-block:: bash

   ray down modin-cluster.yaml

.. _`Ray's autoscaler options`: https://docs.ray.io/en/latest/cluster/vms/references/ray-cluster-configuration.html#cluster-config
.. _`Ray's cluster docs`: https://docs.ray.io/en/latest/cluster/getting-started.html
.. _`NYC Taxi dataset`: https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv
.. _`Modin's Ray cluster setup config`: https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/modin-cluster.yaml
.. _`Amazon EC2 pricing`: https://aws.amazon.com/ec2/pricing/on-demand/
.. _`exercise_5.py`: https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_5.py
.. _`Ray client`: https://docs.ray.io/en/latest/cluster/running-applications/job-submission/ray-client.html
.. _`Ray CLI`: https://docs.ray.io/en/latest/cluster/vms/getting-started.html#running-applications-on-a-ray-cluster
.. _`AWS CLI environment variables`: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html

================================================
FILE: docs/getting_started/using_modin/using_modin_locally.rst
================================================
===================
Using Modin Locally
===================

.. note::
  | *Estimated Reading Time: 5 minutes*
  | You can follow along this tutorial in the `Jupyter notebook`_.

In our quickstart example, we have already seen how you can achieve considerable
speedup from Modin, even on a single machine. Users do not need to know how many
cores their system has, nor do they need to specify how to distribute the data. In fact,
users can **continue using their existing pandas code** while experiencing a
considerable speedup from Modin, even on a single machine.

To use Modin on a single machine, only a modification of the import statement is needed.
Once you've changed your import statement, you're ready to use Modin
just like you would pandas, since the API is identical to pandas.

.. code-block:: python

  # import pandas as pd
  import modin.pandas as pd

**That's it. You're ready to use Modin on your previous pandas workflows!**

Advanced: Configuring the resources Modin uses
----------------------------------------------

Modin automatically check the number of CPUs available on your machine and sets the
number of partitions to be equal to the number of CPUs. You can verify this by running
the following code:

.. code-block:: python

   import modin
   print(modin.config.NPartitions.get()) #prints 16 on a laptop with 16 physical cores

Modin fully utilizes the resources on your machine. To read more about how this works,
see :doc:`Why Modin? </getting_started/why_modin/pandas/>` page for more details.

Since Modin will use all of the resources available on your machine by default, at
times, it is possible that you may like to limit the amount of resources Modin uses to
free resources for another task or user. Here is how you would limit the number of CPUs
Modin used in your bash environment variables:

.. code-block:: bash

   export MODIN_CPUS=4


You can also specify this in your python script with ``os.environ``:

.. code-block:: python

   import os
   os.environ["MODIN_CPUS"] = "4"
   import modin.pandas as pd

If you're using a specific engine and want more control over the environment Modin
uses, you can start Ray or Dask in your environment and Modin will connect to it.

.. code-block:: python

   import ray
   ray.init(num_cpus=4)
   import modin.pandas as pd

Specifying ``num_cpus`` limits the number of processors that Modin uses. You may also
specify more processors than you have available on your machine; however this will not
improve the performance (and might end up hurting the performance of the system).

.. note::
   Make sure to update the ``MODIN_CPUS`` configuration and initialize your preferred
   engine before you start working with the first operation using Modin! Otherwise,
   Modin will opt for the default setting.


.. _`Jupyter notebook`: https://github.com/modin-project/modin/tree/main/examples/quickstart.ipynb


================================================
FILE: docs/getting_started/why_modin/modin_vs_dask_vs_koalas.rst
================================================
Modin vs. Dask DataFrame vs. Koalas
===================================

Libraries such as `Dask DataFrame <https://docs.dask.org/en/stable/dataframe.html>`_ (DaskDF for short) and `Koalas <https://koalas.readthedocs.io/en/latest/>`_ aim to support the pandas API on top of distributed computing frameworks, Dask and Spark respectively. Instead, Modin aims to preserve the pandas API and behavior as is, while abstracting away the details of the distributed computing framework underneath. Thus, the aims of these libraries are fundamentally different.

Specifically, Modin enables pandas-like

* row and column-parallel operations, unlike DaskDF and Koalas that only support row-parallel operations
* indexing & ordering semantics, unlike DaskDF and Koalas that deviate from these semantics
* eager execution, unlike DaskDF and Koalas that provide lazy execution

As a result, Modin's coverage is `more than 90% <https://github.com/modin-project/modin#pandas-api-coverage>`_ of the pandas API, while DaskDF and Koalas' coverage is about 55%. 

.. figure:: ../../img/api_coverage_comparison.svg
   :align: center
   :alt: Percentage coverage of the pandas API after deduplication

For more technical details please see our VLDB 2022 research paper, referenced `here <https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf>`_. 

Brief Overview of DaskDF and Koalas
-----------------------------------

Dask's `DataFrame <https://docs.dask.org/en/stable/dataframe.html>`_ (DaskDF) is effectively a meta-DataFrame, partitioning and scheduling many smaller ``pandas.DataFrame`` objects. Users construct a task graph of dataframe computation step by step and then trigger computation using the ``compute`` function.

Spark's `Koalas <https://koalas.readthedocs.io/en/latest/>`_ provides the pandas API on Spark, leveraging the preexisting Spark SQL optimizer to execute select pandas commands. Like DaskDF, Koalas also employs lazy computation, only triggering computation when the user requests to see the results.

Partitioning and Parallelization
--------------------------------

Modin, DaskDF, Koalas are all examples of parallel dataframe systems. Parallelism is achieved by partitioning a large dataframe into smaller ones that can be operated on in parallel. As a result, the partitioning scheme chosen by the system dictates the pandas functions that can or can not be supported.

**DaskDF and Koalas only support row-oriented partitioning and parallelism.** This approach is analogous to relational databases. The dataframe is conceptually broken down into horizontal partitions along rows, where each partition is independently processed if possible. When DaskDF or Koalas are required to perform column-parallel operations that to be done on columns independently (e.g., dropping columns with null values via ``dropna`` on the column ``axis``), they either perform very poorly with no parallelism or do not support that operation.

**Modin supports both row, column, and cell-oriented partitioning and parallelism**. That is, the dataframe can be conceptually broken down as groups of rows, groups of columns, or both groups of rows and groups of columns (effectively a block or sub-matrix). Modin will transparently reshape the partitioning as necessary for the corresponding operation, based on whether the operation is row-parallel, column-parallel, or cell-parallel (independently applied to each unit cell). This allows Modin to support more of the pandas API and do so efficiently. Due to the finer-grained control over the partitioning, Modin can support a number of operations that are very challenging to parallelize in row-oriented systems (e.g., ``transpose``, ``median``, ``quantile``). This flexibility in partitioning also gives Modin tremendous power to implement efficient straggler mitigation and improve utilization over the entire cluster.

API Coverage
------------

One of the key benefits of pandas is its versatility, due to the wide array of operations, with more than 600+ API operations for data cleaning, feature engineering, data transformation, data summarization, data exploration, and machine learning. However, it is not trivial to develop scalable implementations of each of these operations in a dataframe system.
**DaskDF and Koalas only implements about** `55%  <https://arxiv.org/abs/2001.00888>`_ **of the pandas API**; they do not implement certain APIs that would deviate from the row-wise partitioning approach, or would be inefficient with the row-wise parallelization. For example, Dask does not implement ``iloc``, ``MultiIndex``, ``apply(axis=0)``, ``quantile`` (only approximate quantile is available), ``median``, and more. Given DaskDF's row-oriented architecture, ``iloc``, for example, can technically be implemented, but it would be inefficient, and column-wise operations such as ``apply(axis=0)`` would be impossible to implement. Similarly, Koalas does not implement ``apply(axis=0)`` (it only applies the function per row partition, giving a different result), ``quantile``, ``median`` (only approximate quantile/median is available), ``MultiIndex``, ``combine``, ``compare`` and more.

**Modin supports all of the above pandas API functions, as well as others, with** `more than 90% <https://github.com/modin-project/modin#pandas-api-coverage>`_ **coverage of the pandas API.**  Modin additionally acts as a drop-in replacement for pandas, such that even if the API is not yet supported, it still works by falling back to running vanilla pandas. One of the key features of being a drop-in replacement is that not only will it work for existing code, if a user wishes to go back to running pandas directly, they are not locked in to using Modin and can switch between Modin and pandas at no cost. In other words, scripts and notebooks written in Modin can be converted to and from pandas as the user desires by simply replacing the import statement.

Execution Semantics
---------------------

**DaskDF and Koalas make use of lazy evaluation, which means that the computation is delayed until users explicitly evaluate the results.** This mode of evaluation places a lot of optimization responsibility on the user, forcing them to think about when it would be useful to inspect the intermediate results or delay doing so. Specifically, DaskDF's API differs from pandas in that it requires users to explicitly call ``.compute()`` to materialize the result of the computation. Often if that computation corresponds to a long chain of operators, this call can take a very long time to execute. Overall, the need to explicitly trigger computation makes the API less convenient to work with, but gives DaskDF and Koalas the opportunity to perform holistic optimizations over the entire dataflow graph. However, to the best of our knowledge, neither DaskDF nor Koalas actually leverage holistic optimizations.

**Modin employs eager evaluation, like pandas.** Eager evaluation is the default mode of operation for data scientists when working with pandas in an interactive environment, such as Jupyter Notebooks. Modin reproduces this familiar behavior by performing all computations eagerly as soon as it is issued, so that users can inspect intermediate results and quickly see the results of their computations without having to wait or explicitly trigger computation. This is especially useful during interactive data analysis, where users often iterate on their dataframe workflows or build up their dataframe queries in an incremental fashion. We also have developed techniques for `opportunistic evaluation <https://arxiv.org/pdf/2103.02145.pdf>`_ that bridges the gap between lazy and eager evaluation that will be incorporated in Modin in the future.

Ordering Semantics
------------------

By default, pandas preserves the order of the dataframe, so that users can expect a consistent, ordered view as they are operating on their dataframe. 

**Both DaskDF and Koalas make no guarantees about the order of rows in the DataFrame.**  This is because DaskDF sorts the ``index`` for optimization purposes to speed up computations that involve the row index; and as a result, it does not support user-specified order. Likewise, Koalas `does not support ordering <https://koalas.readthedocs.io/en/latest/whatsnew/v0.27.0.html#head-ordering>`_ by default because it will lead to a performance overhead when operating on distributed datasets. 

**DaskDF additionally does not support multi-indexing or sorting.** 
DaskDF sorts the data based on a single set of row labels for fast row lookups, and builds an indexing structure based on these row labels. Data is both logically and physically stored in the same order. As a result, DaskDF does not support a `sort` function.

**Modin reproduces the intuitive behavior in pandas where the order of the DataFrame is preserved, and supports multi-indexing.** Enforcing ordering on a parallel dataframe system like Modin requires non-trivial effort that involves decoupling of the logical and physical representation of the data, enabling the order to be lazily kept up-to-date, but eagerly computed based on user needs (See Section 4.2 in `our recent paper <https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf>`_). Modin abstracts away the physical representation of the data and provides an ordered view that is consistent with user's expectations.

Compatibility with Computational Frameworks
-------------------------------------------

**DaskDF and Koalas are meant to be run on Dask and Spark respectively.** They are highly tuned to the corresponding frameworks, and cannot be ported to other computational frameworks.

**Modin's highly modular design is architected to run on a variety of systems, and support a variety of APIs.** The goal for the extensible design is that users can take the same notebook or script and seamlessly move between different clusters and environments, with Modin being able to support the pandas API on your preexisting infrastructure. Currently, Modin support running on Dask's compute engine in addition to Ray. The modular design makes it easier for developers to different execution engines or compile to different memory formats. Modin can run on a Dask cluster in the same way that DaskDF can, but they differ in the ways described above. In addition, Modin is continually expanding to support popular data processing APIs (SQL in addition to pandas, among other DSLs for data processing) while leveraging the same underlying execution framework. Modin's flexible architecture also means that as the `pandas API continues to evolve <https://data-apis.org/blog/announcing_the_consortium/>`_, Modin can quickly move towards supporting new versions of the pandas API.

.. figure:: ../../img/performance-all-supported.svg
   :align: center
   :alt: Scalability of operators supported by Modin and other systems
   :width: 95%

Performance Comparison
----------------------

**On operations supported by all systems, Modin provides substantial speedups.** Thanks to its optimized design, Modin is able to take advantage of multiple cores relative to both Koalas and DaskDF to efficiently execute pandas operations. It is notable that Koalas is often slower than pandas, due to the overhead of Spark. 

.. figure:: ../../img/performance-not-all-supported.svg
   :align: center
   :alt: Scalability of operators supported by Modin but not by other systems

**Modin provides substantial speedups even on operators not supported by other systems.** Thanks to its flexible partitioning schemes that enable it to support the vast majority of pandas operations — be it row, column, or cell-oriented - Modin provides benefits on operations such as ``join``, ``median``, and ``infer_types``. While Koalas performs ``join`` slower than Pandas, Dask failed to support ``join`` on more than 20M rows, likely due poor support for `shuffles <https://coiled.io/blog/better-shuffling-in-dask-a-proof-of-concept/>`_. Details of the benchmark and additional join experiments can be found in `our paper <https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf>`_.

.. _documentation: http://docs.dask.org/en/latest/DataFrame.html#design.
.. _Modin's documentation: https://modin.readthedocs.io/en/latest/development/architecture.html


================================================
FILE: docs/getting_started/why_modin/out_of_core.rst
================================================
Out-of-memory data with Modin
=============================

.. note::
  | *Estimated Reading Time: 10 minutes*
  
When using pandas, you might run into a memory error if you are working with large datasets that cannot fit in memory or perform certain memory-intensive operations (e.g., joins). 

Modin solves this problem by spilling over to disk, in other words, it uses your disk as an overflow for memory so that you can work with datasets that are too large to fit in memory. By default, Modin leverages out-of-core methods to handle datasets that don't fit in memory for both Ray and Dask engines.

.. note::
  Object spilling is disabled in a multi-node Ray cluster by default. To enable object spilling
  use `Ray instruction <https://docs.ray.io/en/latest/ray-core/objects/object-spilling.html#cluster-mode>`_.


Motivating Example: Memory error with pandas
--------------------------------------------

pandas makes use of in-memory data structures to store and operate on data, which means that if you have a dataset that is too large to fit in memory, it will cause an error on pandas. As an example, let's creates a 80GB DataFrame by appending together 40 different 2GB DataFrames. 

.. code-block:: python

  import pandas
  import numpy as np
  df = pandas.concat([pandas.DataFrame(np.random.randint(0, 100, size=(2**20, 2**8))) for _ in range(40)]) # Memory Error!

When we run this on a laptop with 32GB of RAM, pandas will run out of memory and throw an error (e.g., :code:`MemoryError` , :code:`Killed: 9`). 

The `pandas documentation <https://pandas.pydata.org/pandas-docs/stable/user_guide/scale.html>`_ has a great section on recommendations for scaling your analysis to these larger datasets. However, this generally involves loading in less data or rewriting your pandas code to process the data in smaller chunks. 

Operating on out-of-memory data with Modin
------------------------------------------

In order to work with data that exceeds memory constraints, you can use Modin to handle these large datasets.

.. code-block:: python

  import modin.pandas as pd
  import numpy as np
  df = pd.concat([pd.DataFrame(np.random.randint(0, 100, size=(2**20, 2**8))) for _ in range(40)]) # 40x2GB frames -- Working!
  df.info()

Not only does Modin let you work with datasets that are too large to fit in memory, we can perform various operations on them without worrying about memory constraints. 

Advanced: Configuring out-of-core settings
------------------------------------------

.. why would you want to disable out of core?

By default, out-of-core functionality is enabled by the compute engine selected. 
To disable it, start your preferred compute engine with the appropriate arguments. For example:

.. code-block:: python

  import modin.pandas as pd
  import ray

  ray.init(_plasma_directory="/tmp")  # setting to disable out of core in Ray
  df = pd.read_csv("some.csv")

If you are using Dask, you have to modify local configuration files. Visit the
Dask documentation_ on object spilling for more details.


.. _documentation: https://distributed.dask.org/en/latest/worker.html#memory-management


================================================
FILE: docs/getting_started/why_modin/pandas.rst
================================================
How does Modin differ from pandas?
==================================

.. note:: 
  | *Estimated Reading Time: 10 minutes*

In the earlier tutorials, we have seen how Modin can be used to speed up pandas workflows. Here, we discuss at a high level how Modin works, in particular, how Modin's dataframe implementation differs from pandas. 

Scalablity of implementation
----------------------------

Modin exposes the pandas API through ``modin.pandas``, but it does not inherit the same pitfalls and design decisions that make it difficult to scale. 
The pandas implementation is inherently single-threaded. This means that only one of
your CPU cores can be utilized at any given time. In a laptop, it would look something
like this with pandas:

.. image:: /img/pandas_multicore.png
   :alt: pandas is single threaded!
   :align: center
   :scale: 80%

However, Modin's implementation enables you to use all of the cores on your machine, or
all of the cores in an entire cluster. On a laptop, it will look something like this:

.. image:: /img/modin_multicore.png
   :alt: modin uses all of the cores!
   :align: center
   :scale: 80%

The additional utilization leads to improved performance, however if you want to scale
to an entire cluster, Modin suddenly looks something like this:

.. image:: /img/modin_cluster.png
   :alt: modin works on a cluster too!
   :align: center
   :scale: 30%

Modin is able to efficiently make use of all of the hardware available to it!

Memory usage and immutability
-----------------------------

The pandas API contains many cases of "inplace" updates, which are known to be
controversial. This is due in part to the way pandas manages memory:  the user may
think they are saving memory, but pandas is usually copying the data whether an
operation was inplace or not.

Modin allows for inplace semantics, but the underlying data structures within Modin's
implementation are immutable, unlike pandas. This immutability gives Modin the ability
to internally chain operators and better manage memory layouts, because they will not
be changed. This leads to improvements over pandas in memory usage in many common cases,
due to the ability to share common memory blocks among all dataframes.

Modin provides the inplace semantics by having a mutable pointer to the immutable
internal Modin dataframe. This pointer can change, but the underlying data cannot, so
when an inplace update is triggered, Modin will treat it as if it were not inplace and
just update the pointer to the resulting Modin dataframe.

API vs implementation
---------------------

It is well known that the pandas API contains many duplicate ways of performing the same
operation. Modin instead enforces that any one behavior have one and only one
implementation internally. This guarantee enables Modin to focus on and optimize a
smaller code footprint while still guaranteeing that it covers the entire pandas API.
Modin has an internal algebra, which is roughly 15 operators, narrowed down from the
original >200 that exist in pandas. The algebra is grounded in both practical and
theoretical work. Learn more in our `VLDB 2020 paper`_. More information about this
algebra can be found in the :doc:`architecture </development/architecture>` documentation.

.. _VLDB 2020 paper: https://arxiv.org/abs/2001.00888


================================================
FILE: docs/getting_started/why_modin/why_modin.rst
================================================
Why Modin?
==========

In this section, we explain the design and motivation behind Modin and why you should use Modin to scale up your pandas workflows. We first describe the architectural differences between pandas and Modin. Then we describe how Modin can also help resolve out-of-memory issues common to pandas. Finally, we look at the key differences between Modin and other distributed dataframe libraries. 

.. toctree::
    :maxdepth: 4
    
    pandas
    out_of_core
    modin_vs_dask_vs_koalas

Modin is built on many years of research and development at UC Berkeley. For more information on how this works underneath the hoods, check out our publications in this space:

- `Flexible Rule-Based Decomposition and Metadata Independence in Modin <https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf>`_ (VLDB 2021)
- `Enhancing the Interactivity of Dataframe Queries by Leveraging Think Time <https://arxiv.org/pdf/2103.02145.pdf>`_ (IEEE Data Eng 2021)
- `Dataframe Systems: Theory, Architecture, and Implementation <https://www2.eecs.berkeley.edu/Pubs/TechRpts/2021/EECS-2021-193.pdf>`_ (PhD Dissertation 2021)
- `Scaling Data Science does not mean Scaling Machines <http://cidrdb.org/cidr2021/papers/cidr2021_abstract11.pdf>`_ (CIDR 2021)
- `Towards Scalable Dataframe Systems <https://arxiv.org/pdf/2001.00888.pdf>`_ (VLDB 2020)


================================================
FILE: docs/index.rst
================================================
.. image:: img/MODIN_ver2_hrz.png
   :width: 400px
   :alt: modin logo
   :align: center

====

.. toctree::
   :hidden:

   getting_started/quickstart
   usage_guide/index
   supported_apis/index
   development/index
   ecosystem
   contact

.. raw:: html

    <p align="center"><b>To use Modin, replace the pandas import:</b></p>

.. figure:: img/Modin_Pandas_Import.gif
   :align: center

Scale your pandas workflow by changing a single line of code
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

Modin uses Ray_, Dask_ or Unidist_ to provide an effortless way to speed up your pandas notebooks,
scripts, and libraries. Unlike other distributed DataFrame libraries, Modin provides
seamless integration and compatibility with existing pandas code. Even using the
DataFrame constructor is identical.

.. code-block:: python

  import modin.pandas as pd
  import numpy as np

  frame_data = np.random.randint(0, 100, size=(2**10, 2**8))
  df = pd.DataFrame(frame_data)

It is not necessary to know in advance the available hardware resources in order to use Modin.
Additionally, it is not necessary to specify how to distribute or place data.
Modin acts as a drop-in replacement for pandas, which means that you can continue using your previous
pandas notebooks, *unchanged*, while experiencing a considerable speedup thanks to Modin, even on a single
machine. Once you've changed your import statement, you’re ready to use Modin just like
you would pandas.

Installation and choosing your compute engine
"""""""""""""""""""""""""""""""""""""""""""""

Modin can be installed from PyPI:

.. code-block:: bash

   pip install modin


If you don't have Ray_, Dask_ or Unidist_ installed, you will need to install Modin with one
of the targets:

.. code-block:: bash

   pip install "modin[ray]" # Install Modin dependencies and Ray to run on Ray
   pip install "modin[dask]" # Install Modin dependencies and Dask to run on Dask
   pip install "modin[mpi]" # Install Modin dependencies and MPI to run on MPI through unidist
   pip install "modin[all]" # Install all of the above

Modin will automatically detect which engine you have installed and use that for
scheduling computation!

If you want to choose a specific compute engine to run on, you can set the environment
variable ``MODIN_ENGINE`` and Modin will do computation with that engine:

.. code-block:: bash

   export MODIN_ENGINE=ray  # Modin will use Ray
   export MODIN_ENGINE=dask  # Modin will use Dask
   export MODIN_ENGINE=unidist # Modin will use Unidist

If you want to choose the Unidist engine, you should set the additional environment 
variable ``UNIDIST_BACKEND``, because currently Modin only supports MPI through unidist:

.. code-block:: bash

   export UNIDIST_BACKEND=mpi # Unidist will use MPI backend

This can also be done within a notebook/interpreter before you import Modin:

.. code-block:: python

   import os

   os.environ["MODIN_ENGINE"] = "ray"  # Modin will use Ray
   os.environ["MODIN_ENGINE"] = "dask"  # Modin will use Dask

   os.environ["MODIN_ENGINE"] = "unidist" # Modin will use Unidist
   os.environ["UNIDIST_BACKEND"] = "mpi" # Unidist will use MPI backend

   import modin.pandas as pd

Faster pandas, even on your laptop
""""""""""""""""""""""""""""""""""

.. image:: img/read_csv_benchmark.png
   :height: 350px
   :width: 300px
   :alt: Plot of read_csv
   :align: right

The ``modin.pandas`` `DataFrame`_ is an extremely light-weight parallel DataFrame. Modin
transparently distributes the data and computation so that all you need to do is
continue using the pandas API as you were before installing Modin. Unlike other parallel
DataFrame systems, Modin is an extremely light-weight, robust DataFrame. Because it is so
light-weight, Modin provides speed-ups of up to 4x on a laptop with 4 physical cores.

In pandas, you are only able to use one core at a time when you are doing computation of
any kind. With Modin, you are able to use all of the CPU cores on your machine. Even in
``read_csv``, we see large gains by efficiently distributing the work across your entire
machine.

.. code-block:: python

  import modin.pandas as pd

  df = pd.read_csv("my_dataset.csv")

Modin is a DataFrame for datasets from 1MB to 1TB+
""""""""""""""""""""""""""""""""""""""""""""""""""

We have focused heavily on bridging the solutions between DataFrames for small data
(e.g. pandas) and large data. Often data scientists require different tools for doing
the same thing on different sizes of data. The DataFrame solutions that exist for 1MB do
not scale to 1TB+, and the overheads of the solutions for 1TB+ are too costly for
datasets in the 1KB range. With Modin, because of its light-weight, robust, and scalable
nature, you get a fast DataFrame at 1MB and 1TB+.

**Modin is currently under active development. Requests and contributions are welcome!**

If you are interested in learning more about Modin, please check out the :doc:`Getting Started</getting_started/quickstart>`
guide then refer to the :doc:`Developer Documentation</development/index>` section,
where you can find system architecture, internal implementation details, and other useful information.
Also check out the `Github`_ to view open issues and make contributions.

.. _Dataframe: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
.. _Ray: https://github.com/ray-project/ray/
.. _Dask: https://dask.org/
.. _Unidist: https://github.com/modin-project/unidist/
.. _Github: https://github.com/modin-project/modin


================================================
FILE: docs/release-procedure.md
================================================
## Versioning

### Patch release

Modin uses semantic versioning. So when doing a patch release, please make a separate branch
off the previous release tag, and `git cherry-pick` **only** the commits we would like to have in our
patch release (assuming previous release was versioned `X.Y.Z`):

        git checkout -b release-X.Y.Z+1 X.Y.Z

### Major and Minor releases

A major (`xx.0.0`) or minor (`0.xx.0`) release could be done by branching from `main`:

        git checkout -b release-X.Y.0 main

## Preparing the release

Before continuing with the release process, make sure that automated CI which runs on each commit passed successfully with the commit you deem as a "release candidate".

Modin follows the "no push" logic, which is _only_ circumvented for cherry-picked commits,
as reviewing them again would not add a lot of value but would add lots of excess work.

Hence non-cherry-pick commits should happen in a separate branch in your own fork, and
be delivered to the release branch by using a PR.

Note that Modin uses fully signed commits, so you have to have GPG keys set up. See [onboarding instructions](https://github.com/modin-project/modin/blob/main/contributing/contributing.md) on where to get started.

To update Modin version, follow the instructions below.

### Preparing the repo for a Major or Minor Version

**Note**: this should be done in your fork of Modin.

First, update your fork of Modin's main with the main repo's main. From your main, create a new
branch called `release-X.Y.0` off of main. Create an empty commit in your new branch with the message
`Release version X.Y.0`. Make sure to sign this commit with both your GPG key
and with the conventional `git commit -s` (so `git commit -s -S`). Open a PR against modin-project/modin with just this commit.

### Preparing the repo for a Patch Version

**Note**: this should be done in the original Modin repository (in `upstream`) .

First, you must create a new branch in the upstream (main modin-project/modin) repo for the new release.
This branch must be named `release-X.Y.Z`, and should be made off of the tag for the last release. To
do this, use `git checkout -b release-X.Y.Z+1 X.Y.Z` to create the branch for the new release. Once
this branch has been created, cherry-pick the commits that will go into this release, and push this
branch to `upstream`.

**Note**: now you must switch to your fork of Modin.

From your fork of Modin, fetch the upstream repo, and checkout the release branch you made above.
From this release branch, create a new branch.

From your new branch, edit the `README.md` so that the PyPi badge will
point to the badge for this specific version (instead of latest) and so that the docs link will point
to the docs for this specific version (rather than latest).

Once the badges have been edited, create a commit, the same as for a major or minor version,
with the message `Release version X.Y.Z`, and make sure to sign it with both your GPG key, and the
traditional git sign-off. Create a PR using your branch against the `release-X.Y.Z` branch in the
original Modin repo.

### Tag commit

After the PR has been merged, clone a clean copy of the Modin repo from the modin-project organization.
You now need to tag the commit that corresponds to the above PR with the appropriate tag for this release.

**Note**: from now on you work on the `main` branch (in `upstream`) for a major or minor release,
or the `release-X.Y.Z` branch (in `upstream`) for a patch release.

        git tag -as X.Y.Z

  * Use `scripts/release.py` to draft the release notes (might be as simple as `python scripts/release.py notes > draft.txt`)
    * If you're experiencing [rate limiting by GitHub](https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting) during username resolving, pass a token via `--token` option to the script
    * Fill in the placeholder for summary of the release
    * Please look into PR sections and split them if necessary into smaller but better fitting ones, as the script only categorizes by prefix (`FIX-`, `TEST-`, etc.)
    * Make sure to correctly resolve contributors whom script failed to transform to GitHub usernames if there are any!
  * Include release documentation in the annotation and make sure it is signed.
  * Push the tag to `main` or `release-X.Y.Z` branch: `git push upstream X.Y.Z`
    * If you're re-pushing a tag (beware! you shouldn't be doing that, no, _really_!), you can remove remote tag and push a local one by `git push upstream :refs/tags/X.Y.Z`


### Build wheel:

**Note**: This should be done from your clean clone of the `upstream` Modin
repository from the modin-project organization, where you made the release tag.

```bash
# Install/update tools
pip install --upgrade build twine
# Build a pure Python wheel.
python3 setup.py sdist bdist_wheel
```

You may see the wheel in the `dist` folder: `ls -l dist`. Make sure the version is correct.
Also make sure there is a `tar` file that contains the source.

### Upload wheels:

Make sure you have an active PyPI account which has write access to Modin pypi repo, and make sure you have a pypi token set up.

Use `twine` to upload wheels:

```bash
twine upload dist/*
```

When asked for account, provide `__token__` (exactly as stated), when asked for password, present your token from pypi.

### Check with `pip install`:

Run `pip install -U "modin[all]"` on Linux, Mac, and Windows systems in a new environment
to test that the wheels were uploaded correctly.

## Make Github and conda-forge release

### Github

Once the tag has been published, we need to make the release on GitHub. Go to the
[Release page](https://github.com/modin-project/modin/releases), and click on `Draft a new release`.
Choose the tag you made above from the dropdown menu, and copy paste the name of the release 
in the `Release title` box. Next, copy paste the release notes from above into the box labelled
`Describe this release`. This will ensure that the release notes on GitHub are Markdown formatted.

Double check that everything looks good by clicking `Preview`, and then hit the green `Publish release`
button!

### Conda-forge

Conda-forge has a bot which watches for new releases of software packaged through it,
and in case of Modin it waits either for Github releases or for tags and then makes
a new automatic PR with version increment.

You should watch for that PR and, fixing any issues if there are some, merge it
to make new Modin release appear in `conda-forge` channel. For detailed instructions
on how to ensure the PR passes CI and is merge-able, check out [the how-to page in the modin-feedstock repo](https://github.com/conda-forge/modin-feedstock/blob/main/HOWTO.md)!

## Publicize Release
Once the release has been finalized, make sure to post an announcement
in the #general channel of the public Modin Slack!


================================================
FILE: docs/release_notes/release_notes-0.14.0.rst
================================================
:orphan:

Modin 0.14.0

Key Features and Updates
------------------------

* Stability and Bugfixes
  * FIX-#4058: Allow pickling empty dataframes and series (#4095)
  * FIX-#4136: Fix exercise_3.ipynb example notebook (#4137)
  * FIX-#4105: Fix names of pandas options to avoid `OptionError` (#4109)
  * FIX-#3417: Fix read_csv with skiprows and header parameters (#3419)
  * FIX-#4142: Fix OmniSci enabling (#4146)
  * FIX-#4162: Use `skipif` instead of `skip` for compatibility with pytest 7.0 (#4163)
  * FIX-#4158: Do not print OmniSci logs to stdout by default (#4159)
  * FIX-#4177: Support read_feather from pathlike objects (#4177)
  * FIX-#4234: Upgrade pandas to 1.4.1 (#4235)
  * FIX-#3368: support unsigned integers in OmniSci backend (#4256)
  * FIX-#4057: Allow reading an empty parquet file (#4075)
  * FIX-#3884: Fix read_excel() dropping empty rows (#4161)
  * FIX-#4257: Fix Categorical() for scalar categories (#4258)
  * FIX-#4300: Fix Modin Categorical column dtype categories (#4276)
  * FIX-#4208: Fix lazy metadata update for `PandasDataFrame.from_labels` (#4209)
  * FIX-#3981, FIX-#3801, FIX-#4149: Stop broadcasting scalars to set items (#4160)
  * FIX-#4185: Fix rolling across column partitions (#4262)
  * FIX-#4303: Fix the syntax error in reading from postgres (#4304)
  * FIX-#4308: Add proper error handling in df.set_index (#4309)
  * FIX-#4056: Allow an empty parse_date list in `read_csv_glob` (#4074)
  * FIX-#4312: Fix constructing categorical frame with duplicate column names (#4313).
  * FIX-#4314: Allow passing a series of dtypes to astype (#4318)
  * FIX-#4310: Handle lists of lists of ints in read_csv_glob (#4319)
* Performance enhancements
  * FIX-#4138, FIX-#4009: remove redundant sorting in the internal '.mask()' flow (#4140)
  * FIX-#4183: Stop shallow copies from creating global shared state. (#4184)
* Benchmarking enhancements
  * FIX-#4221: add `wait` method for `PandasOnRayDataframeColumnPartition` class (#4231)
* Refactor Codebase
  * REFACTOR-#3990: remove code duplication in `PandasDataframePartition` hierarchy (#3991)
  * REFACTOR-#4229: remove unused `dask_client` global variable in `modin\pandas\__init__.py` (#4230)
  * REFACTOR-#3997: remove code duplication for `broadcast_apply` method (#3996)
  * REFACTOR-#3994: remove code duplication for `get_indices` function (#3995)
  * REFACTOR-#4331: remove code duplication for `to_pandas`, `to_numpy` functions in `QueryCompiler` hierarchy (#4332)
  * REFACTOR-#4213: Refactor `modin/examples/tutorial/` directory (#4214)
  * REFACTOR-#4206: add assert check into `__init__` method of `PandasOnDaskDataframePartition` class (#4207)
  * REFACTOR-#3900: add flake8-no-implicit-concat plugin and refactor flake8 error codes (#3901)
  * REFACTOR-#4093: Refactor base to be smaller (#4220)
  * REFACTOR-#4047: Rename `cluster` directory to `cloud` in examples (#4212)
  * REFACTOR-#3853: interacting with Dask interface through `DaskWrapper` class (#3854)
  * REFACTOR-#4322: Move is_reduce_fn outside of groupby_agg (#4323)
* Pandas API implementations and improvements
  * FEAT-#3603: add experimental `read_custom_text` function that can read custom line-by-line text files (#3441)
  * FEAT-#979: Enable reading from SQL server (#4279)
* OmniSci enhancements
  *
* XGBoost enhancements
  *
* Developer API enhancements
  * FEAT-#4245: Define base interface for dataframe exchange protocol (#4246)
  * FEAT-#4244: Implement dataframe exchange protocol for HdkOnNative execution (#4269)
  * FEAT-#4144: Implement dataframe exchange protocol for pandas storage format (#4150)
  * FEAT-#4342: Support `from_dataframe`` for pandas storage format (#4343)
* Update testing suite
  * TEST-#3628: Report coverage data for `test-internals` CI job (#4198)
  * TEST-#3938: Test tutorial notebooks in CI (#4145)
  * TEST-#4153: Fix condition of running lint-commit and set of CI triggers (#4156)
  * TEST-#4201: Add read_parquet, explode, tail, and various arithmetic functions to asv_bench (#4203)
* Documentation improvements
  * DOCS-#4077: Add release notes template to docs folder (#4078)
  * DOCS-#4082: Add pdf/epub/htmlzip formats for doc builds (#4083)
  * DOCS-#4168: Fix rendering the examples on troubleshooting page (#4169)
  * DOCS-#4151: Add info in troubleshooting page related to Dask engine usage (#4152)
  * DOCS-#4172: Refresh Intel Distribution of Modin paragraph (#4175)
  * DOCS-#4173: Mention strict channel priority in conda install section (#4178)
  * DOCS-#4176: Update OmniSci usage section (#4192)
  * DOCS-#4027: Add GIF images and chart to Modin README demonstrating speedups (#4232)
  * DOCS-#3954: Add Dask example notebooks (#4139)
  * DOCS-#4272: Add bar chart comparisons to quick start guide (#4277)
  * DOCS-#3953: Add docs and notebook examples on running Modin with OmniSci (#4001)
  * DOCS-#4280: Change links in jupyter notebooks (#4281)
  * DOCS-#4290: Add changes for OmniSci notebooks (#4291)
  * DOCS-#4241: Update warnings and docs regarding defaulting to pandas (#4242)
  * DOCS-#3099: Fix `BasePandasDataSet` docstrings warnings (#4333)
  * DOCS-#4339: Reformat I/O functions docstrings (#4341)
  * DOCS-#4336: Reformat general utilities docstrings (#4338)
* Dependencies
  * FIX-#4113, FIX-#4116, FIX-#4115: Apply new `black` formatting, fix pydocstyle check and readthedocs build (#4114)
  * TEST-#3227: Use codecov github action instead of bash form in GA workflows (#3226)
  * FIX-#4115: Unpin `pip` in readthedocs deps list (#4170)
  * TEST-#4217: Pin `Dask<2022.2.0` as a temporary fix of CI (#4218)

Contributors
------------

@prutskov
@amyskov
@paulovn
@anmyachev
@YarShev
@RehanSD
@devin-petersohn
@dchigarev
@Garra1980
@mvashishtha
@naren-ponder
@jeffreykennethli
@dorisjlee
@Rubtsowa


================================================
FILE: docs/release_notes/release_notes-0.15.0.rst
================================================
:orphan:

Modin 0.15.0

Key Features and Updates
------------------------

* Stability and Bugfixes
  * FIX-#4376: Upgrade pandas to 1.4.2 (#4377)
  * FIX-#3615: Relax some deps in development env (#4365)
  * FIX-#4370: Fix broken docstring links (#4375)
  * FIX-#4392: Align Modin XGBoost with xgb>=1.6 (#4393)
  * FIX-#4385: Get rid of `use-deprecated` option in `pip` (#4386)
  * FIX-#3527: Fix parquet partitioning issue causing negative row length partitions (#4368)
  * FIX-#4330: Override the memory limit to start ray 1.11.0 on Macs (#4335)
  * FIX-#4407: Align `insert` function with pandas in case of numpy array with several columns (#4408)
  * FIX-#4373: Fix invalid file path when trying `read_csv_glob` with `usecols` parameter (#4405)
  * FIX-#4394: Fix issue with multiindex metadata desync (#4395)
  * FIX-#4438: Fix `reindex` function that doesn't preserve initial index metadata (#4442)
  * FIX-#4425: Add parameters to groupby pct_change (#4429)
  * FIX-#4457: Fix `loc` in case when need reindex item (#4457)
  * FIX-#4414: Add missing f prefix on f-strings found at https://codereview.doctor (#4415)
  * FIX-#4461: Fix S3 CSV data path (#4462)
  * FIX-#4467: `drop_duplicates` no longer removes items based on index values (#4468)
  * FIX-#4449: Drain the call queue before waiting on result in benchmark mode (#4472)
  * FIX-#4518: Fix Modin Logging to report specific Modin warnings/errors (#4519)
  * FIX-#4481: Allow clipping with a Modin Series of bounds (#4486)  
  * FIX-#4504: Support na_action in applymap (#4505)
  * FIX-#4503: Stop the memory logging thread after session exit (#4515)
  * FIX-#4531: Fix a makedirs race condition in to_parquet (#4533)
  * FIX-#4464: Refactor Ray utils and quick fix groupby.count failing on virtual partitions (#4490)
  * FIX-#4436: Fix to_pydatetime dtype for timezone None (#4437)
  * FIX-#4541: Fix merge_asof with non-unique right index (#4542)
* Performance enhancements
  * FEAT-#4320: Add connectorx as an alternative engine for read_sql (#4346)
  * PERF-#4493: Use partition size caches more in Modin dataframe (#4495)
* Benchmarking enhancements
  * FEAT-#4371: Add logging to Modin (#4372)
  * FEAT-#4501: Add RSS Memory Profiling to Modin Logging (#4502)
  * FEAT-#4524: Split Modin API and Memory log files (#4526)
* Refactor Codebase
  * REFACTOR-#4284: use variable length unpacking when getting results from `deploy` function (#4285)
  * REFACTOR-#3642: Move PyArrow storage format usage from main feature to experimental ones (#4374)
  * REFACTOR-#4003: Delete the deprecated cloud mortgage example (#4406)
  * REFACTOR-#4513: Fix spelling mistakes in docs and docstrings (#4514)
  * REFACTOR-#4510: Align experimental and regular IO modules initializations (#4511)
* Pandas API implementations and improvements
  *
* OmniSci enhancements
  *
* XGBoost enhancements
  *
* Developer API enhancements
  * FEAT-#4359: Add __dataframe__ method to the protocol dataframe (#4360)
* Update testing suite
  * TEST-#4363: Use Ray from pypi in CI (#4364)
  * FIX-#4422: get rid of case sensitivity for `warns_that_defaulting_to_pandas` (#4423)
  * TEST-#4426: Stop passing is_default kwarg to Modin and pandas (#4428)
  * FIX-#4439: Fix flake8 CI fail (#4440)
  * FIX-#4409: Fix `eval_insert` utility that doesn't actually check results of `insert` function (#4410)
  * TEST-#4482: Fix getitem and loc with series of bools (#4483).
* Documentation improvements
  * DOCS-#4296: Fix docs warnings (#4297)
  * DOCS-#4388: Turn off fail_on_warning option for docs build (#4389)
  * DOCS-#4469: Say that commit messages can start with PERF (#4470).
  * DOCS-#4466: Recommend GitHub issues over bug_reports@modin.org (#4474).  
  * DOCS-#4487: Recommend GitHub issues over feature_requests@modin.org (#4489).
  * DOCS-#4545: Add socials to README (#4555).
* Dependencies
  * FIX-#4327: Update min pin for xgboost version (#4328)
  * FIX-#4383: Remove `pathlib` from deps (#4384)
  * FIX-#4390: Add `redis` to Modin dependencies (#4396)
  * FIX-#3689: Add black and flake8 into development environment files (#4480)
  * TEST-#4516: Add numpydoc to developer requirements (#4517)
* New Features
  * FEAT-#4412: Add Batch Pipeline API to Modin (#4452)

Contributors
------------
@YarShev
@Garra1980
@prutskov
@alexander3774
@amyskov
@wangxiaoying
@jeffreykennethli
@mvashishtha
@anmyachev
@dchigarev
@devin-petersohn
@jrsacher
@orcahmlee
@naren-ponder
@RehanSD


================================================
FILE: docs/release_notes/release_notes-0.16.0.rst
================================================
:orphan:

Modin 0.16.0

Key Features and Updates
------------------------

* Stability and Bugfixes
  * FIX-#4570: Replace ``np.bool`` -> ``np.bool_`` (#4571)
  * FIX-#4543: Fix `read_csv` in case skiprows=<0, []> (#4544)
  * FIX-#4059: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)
  * FIX-#4589: Pin protobuf<4.0.0 to fix ray (#4590)
  * FIX-#4577: Set attribute of Modin dataframe to updated value (#4588)
  * FIX-#4411: Fix binary_op between datetime64 Series and pandas timedelta (#4592)
  * FIX-#4604: Fix `groupby` + `agg` in case when multicolumn can arise (#4642)
  * FIX-#4582: Inherit custom log layer (#4583)
  * FIX-#4639: Fix `storage_options` usage for `read_csv` and `read_csv_glob` (#4644)
  * FIX-#4593: Ensure Modin warns when setting columns via attributes (#4621)
  * FIX-#4584: Enable pdb debug when running cloud tests (#4585)
  * FIX-#4564: Workaround import issues in Ray: auto-import pandas on python start if env var is set (#4603)
  * FIX-#4641: Reindex pandas partitions in `df.describe()` (#4651)
  * FIX-#2064: Fix `iloc`/`loc` assignment when dataframe is empty (#4677)
  * FIX-#4634: Check for FrozenList as `by` in `df.groupby()` (#4667)
  * FIX-#4680: Fix `read_csv` that started defaulting to pandas again in case of reading from a buffer and when a buffer has a non-zero starting position (#4681)
  * FIX-#4491: Wait for all partitions in parallel in benchmark mode (#4656)
  * FIX-#4358: MultiIndex `loc` shouldn't drop levels for full-key lookups (#4608)
  * FIX-#4658: Expand exception handling for `read_*` functions from s3 storages (#4659)
  * FIX-#4672: Fix incorrect warning when setting `frame.index` or `frame.columns` (#4721)
  * FIX-#4686: Propagate metadata and drain call queue in unwrap_partitions (#4697)
  * FIX-#4652: Support categorical data in `from_dataframe` (#4737)
  * FIX-#4756: Correctly propagate `storage_options` in `read_parquet` (#4764)
  * FIX-#4657: Use `fsspec` for handling s3/http-like paths instead of `s3fs` (#4710)
  * FIX-#4676: drain sub-virtual-partition call queues (#4695)
  * FIX-#4782: Exclude certain non-parquet files in `read_parquet` (#4783)
  * FIX-#4808: Set dtypes correctly after column rename (#4809)
  * FIX-#4811: Apply dataframe -> not_dataframe functions to virtual partitions (#4812)
  * FIX-#4099: Use mangled column names but keep the original when building frames from arrow (#4767)
  * FIX-#4838: Bump up modin-spreadsheet to latest master (#4839)
  * FIX-#4840: Change modin-spreadsheet version for notebook requirements (#4841)
  * FIX-#4835: Handle Pathlike paths in `read_parquet` (#4837)
  * FIX-#4872: Stop checking the private ray mac memory limit (#4873)
  * FIX-#4914: `base_lengths` should be computed from `base_frame` instead of `self` in `copartition` (#4915)
  * FIX-#4848: Fix rebalancing partitions when NPartitions == 1 (#4874)
  * FIX-#4927: Fix `dtypes` computation in `dataframe.filter` (#4928)
  * FIX-#4907: Implement `radd` for Series and DataFrame (#4908)
  * FIZ-#4945: Fix `_take_2d_positional` that loses indexes due to filtering empty dataframes (#4951)
  * FIX-#4818, PERF-#4825: Fix where by using the new n-ary operator (#4820)
  * FIX-#3983: FIX-#4107: Materialize 'rowid' columns when selecting rows by position (#4834)
  * FIX-#4845: Fix KeyError from `__getitem_bool` for single row dataframes (#4845)
  * FIX-#4734: Handle Series.apply when return type is a DataFrame (#4830)
  * FIX-#4983: Set `frac` to `None` in _sample when `n=0` (#4984)
  * FIX-#4993: Return `_default_to_pandas` in `df.attrs` (#4995)
  * FIX-#5043: Fix `execute` function in ASV utils failed if `len(partitions) == 0` (#5044)
  * FIX-#4597: Refactor Partition handling of func, args, kwargs (#4715)
  * FIX-#4996: Evaluate BenchmarkMode at each function call (#4997)
  * FIX-#4022: Fixed empty data frame with index (#4910)
  * FIX-#4090: Fixed check if the index is trivial (#4936)
  * FIX-#4966: Fix `to_timedelta` to return Series instead of TimedeltaIndex (#5028)
  * FIX-#5042: Fix series __getitem__ with invalid strings (#5048)
  * FIX-#4691: Fix binary operations between virtual partitions (#5049)  
  * FIX-#5045: Fix ray virtual_partition.wait with duplicate object refs (#5058)
* Performance enhancements
  * PERF-#4182: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)
  * PERF-#4288: Improve perf of `groupby.mean` for narrow data (#4591)
  * PERF-#4772: Remove `df.copy` call from `from_pandas` since it is not needed for Ray and Dask (#4781)
  * PERF-#4325: Improve perf of multi-column assignment in `__setitem__` when no new column names are assigning (#4455)
  * PERF-#3844: Improve perf of `drop` operation (#4694)
  * PERF-#4727: Improve perf of `concat` operation (#4728)
  * PERF-#4705: Improve perf of arithmetic operations between `Series` objects with shared `.index` (#4689)
  * PERF-#4703: Improve performance in accessing `ser.cat.categories`, `ser.cat.ordered`, and `ser.__array_priority__` (#4704)
  * PERF-#4305: Parallelize `read_parquet` over row groups (#4700)
  * PERF-#4773: Compute `lengths` and `widths` in `put` method of Dask partition like Ray do (#4780)
  * PERF-#4732: Avoid overwriting already-evaluated `PandasOnRayDataframePartition._length_cache` and `PandasOnRayDataframePartition._width_cache` (#4754)
  * PERF-#4862: Don't call `compute_sliced_len.remote` when `row_labels/col_labels == slice(None)` (#4863)
  * PERF-#4713: Stop overriding the ray MacOS object store size limit (#4792)
  * PERF-#4851: Compute `dtypes` for binary operations that can only return bool type and the right operand is not a Modin object (#4852)
  * PERF-#4842: `copy` should not trigger any previous computations (#4843)
  * PERF-#4849: Compute `dtypes` in `concat` also for ROW_WISE case when possible (#4850)
  * PERF-#4929: Compute `dtype` when using `Series.dt` accessor (#4930)
  * PERF-#4892: Compute `lengths` in `rebalance_partitions` when possible (#4893)
  * PERF-#4794: Compute caches in `_propagate_index_objs` (#4888)
  * PERF-#4860: `PandasDataframeAxisPartition.deploy_axis_func` should be serialized only once (#4861)
  * PERF-#4890: `PandasDataframeAxisPartition.drain` should be serialized only once (#4891)
  * PERF-#4870: Avoid index materialization in `__getattribute__` and `__getitem__` (4911)
  * PERF-#4886: Use lazy index and columns evaluation in `query` method (#4887)
  * PERF-#4866: `iloc` function that used in `partition.mask` should be serialized only once (#4901)
  * PERF-#4920: Avoid index and cache computations in `take_2d_labels_or_positional` unless they are needed (#4921)
  * PERF-#4999: don't call `apply` in virtual partition' `drain_call_queue` if `call_queue` is empty (#4975)
  * PERF-#4268: Implement partition-parallel __getitem__ for bool Series masks (#4753)
  * PERF-#5017: `reset_index` shouldn't trigger index materialization if possible (#5018)
  * PERF-#4963: Use partition `width/length` methods instead of `_compute_axis_labels_and_lengths` if index is already known (#4964)
  * PERF-#4940: Optimize categorical dtype check in `concatenate` (#4953)
* Benchmarking enhancements
  * TEST-#5066: Add outer join case for `TimeConcat` benchmark (#5067)
  * TEST-#5083: Add `merge` op with categorical data (#5084)
  * FEAT-#4706: Add Modin ClassLogger to PandasDataframePartitionManager (#4707)
  * TEST-#5014: Simplify adding new ASV benchmarks (#5015)
  * TEST-#5064: Update `TimeConcat` benchmark with new parameter `ignore_index` (#5065)
  * PERF-#4944: Avoid default_to_pandas in ``Series.cat.codes``, ``Series.dt.tz``, and ``Series.dt.to_pytimedelta`` (#4833)
  * TEST-#5068: Add binary op benchmark for Series (#5069)
* Refactor Codebase
  * REFACTOR-#4530: Standardize access to physical data in partitions (#4563)
  * REFACTOR-#4534: Replace logging meta class with class decorator (#4535)
  * REFACTOR-#4708: Delete combine dtypes (#4709)
  * REFACTOR-#4629: Add type annotations to modin/config (#4685)
  * REFACTOR-#4717: Improve PartitionMgr.get_indices() usage (#4718)
  * REFACTOR-#4730: make Indexer immutable (#4731)
  * REFACTOR-#4774: remove `_build_treereduce_func` call from `_compute_dtypes` (#4775)
  * REFACTOR-#4750: Delete BaseDataframeAxisPartition.shuffle (#4751)
  * REFACTOR-#4722: Stop suppressing undefined name lint (#4723)
  * REFACTOR-#4832: unify `split_result_of_axis_func_pandas` (#4831)
  * REFACTOR-#4796: Introduce constant for __reduced__ column name (#4799)
  * REFACTOR-#4000: Remove code duplication for `PandasOnRayDataframePartitionManager` (#4895)
  * REFACTOR-#3780: Remove code duplication for `PandasOnDaskDataframe` (#3781)
  * REFACTOR-#4530: Unify access to physical data for any partition type (#4829)
  * REFACTOR-#4978: Align `modin/core/execution/dask/common/__init__.py` with `modin/core/execution/ray/common/__init__.py` (#4979)
  * REFACTOR-#4949: Remove code duplication in `default2pandas/dataframe.py` and `default2pandas/any.py` (#4950)
  * REFACTOR-#4976: Rename `RayTask` to `RayWrapper` in accordance with Dask (#4977)
  * REFACTOR-#4885: De-duplicated take_2d_labels_or_positional methods (#4883)
  * REFACTOR-#5005: Use `finalize` method instead of list comprehension + `drain_call_queue` (#5006)
  * REFACTOR-#5001: Remove `jenkins` stuff (#5002)
  * REFACTOR-#5026: Change exception names to simplify grepping (#5027)
  * REFACTOR-#4970: Rewrite base implementations of a partition' `width/length` (#4971)  
  * REFACTOR-#4942: Remove `call` method in favor of `register` due to duplication (4943)
  * REFACTOR-#4922: Helpers for take_2d_labels_or_positional (#4865)
  * REFACTOR-#5024: Make `_row_lengths` and `_column_widths` public (#5025)
  * REFACTOR-#5009: Use `RayWrapper.materialize` instead of `ray.get` (#5010)
  * REFACTOR-#4755: Rewrite Pandas version mismatch warning (#4965)
  * REFACTOR-#5012: Add mypy checks for singleton files in base modin directory (#5013)
  * REFACTOR-#5038: Remove unnecessary _method argument from resamplers (#5039)
  * REFACTOR-#5081: Remove `c323f7fe385011ed849300155de07645.db` file (#5082)
* Pandas API implementations and improvements
  * FEAT-#4670: Implement convert_dtypes by mapping across partitions (#4671)
* OmniSci enhancements
  * FEAT-#4913: Enabling pyhdk
* XGBoost enhancements
  *
* Developer API enhancements
  *
* Update testing suite
  * TEST-#4508: Reduce test_partition_api pytest threads to deflake it (#4551)
  * TEST-#4550: Use much less data in test_partition_api (#4554)
  * TEST-#4610: Remove explicit installation of `black`/`flake8` for omnisci ci-notebooks (#4609)
  * TEST-#2564: Add caching and use mamba for conda setups in GH (#4607)
  * TEST-#4557: Delete multiindex sorts instead of xfailing (#4559)
  * TEST-#4698: Stop passing invalid storage_options param (#4699)
  * TEST-#4745: Pin flake8 to <5 to workaround installation conflict (#4752)
  * TEST-#4875: XFail tests failing due to file gone missing (#4876)
  * TEST-#4879: Use pandas `ensure_clean()` in place of `io_tests_data` (#4881)
  * TEST-#4562: Use local Ray cluster in CI to resolve flaky `test-compat-win` (#5007)
  * TEST-#5040: Rework test_series using eval_general() (#5041)
  * TEST-#5050: Add black to pre-commit hook (#5051)
* Documentation improvements
  * DOCS-#4552: Change default sphinx language to en to fix sphinx >= 5.0.0 build (#4553)
  * DOCS-#4628: Add to_parquet partial support notes (#4648)
  * DOCS-#4668: Set light theme for readthedocs page, remove theme switcher (#4669)
  * DOCS-#4748: Apply the Triage label to new issues (#4749)
  * DOCS-#4790: Give all templates issue type and triage labels (#4791)
  * DOCS-#4521: Document how to benchmark modin (#5020)
* Dependencies
  * FEAT-#4598: Add support for pandas 1.4.3 (#4599)
  * FEAT-#4619: Integrate mypy static type checking (#4620)
  * FEAT-#4202: Allow dask past 2022.2.0 (#4769)
  * FEAT-#4925: Upgrade pandas to 1.4.4 (#4926)
  * TEST-#4998: Add flake8 plugins to dev requirements (#5000)
* New Features
  * FEAT-4463: Add experimental fuzzydata integration for testing against a randomized dataframe workflow (#4556)
  * FEAT-#4419: Extend virtual partitioning API to pandas on Dask (#4420)
  * FEAT-#4147: Add partial compatibility with Python 3.6 and pandas 1.1 (#4301)
  * FEAT-#4569: Add error message when `read_` function defaults to pandas (#4647)
  * FEAT-#4725: Make index and columns lazy in Modin DataFrame (#4726)
  * FEAT-#4664: Finalize compatibility support for Python 3.6 (#4800)
  * FEAT-#4746: Sync interchange protocol with recent API changes (#4763)
  * FEAT-#4733: Support fastparquet as engine for `read_parquet` (#4807)
  * FEAT-#4766: Support fsspec URLs in `read_csv` and `read_csv_glob` (#4898)
  * FEAT-#4827: Implement `infer_types` dataframe algebra operator (#4871)
  * FEAT-#4989: Switch pandas version to 1.5 (#5037)

Contributors
------------
@mvashishtha
@NickCrews
@prutskov
@vnlitvinov
@pyrito
@suhailrehman
@RehanSD
@helmeleegy
@anmyachev
@d33bs
@noloerino
@devin-petersohn
@YarShev
@naren-ponder
@jbrockmendel
@ienkovich
@Garra1980
@Billy2551


================================================
FILE: docs/release_notes/release_notes-template.rst
================================================
:orphan:

Modin X.X.X

Key Features and Updates
------------------------

* Stability and Bugfixes
  *
* Performance enhancements
  *
* Benchmarking enhancements
  *
* Refactor Codebase
  *
* Pandas API implementations and improvements
  *
* HDK enhancements
  *
* XGBoost enhancements
  *
* Developer API enhancements
  *
* Update testing suite
  *
* Documentation improvements
  *
* Dependencies
  *
* New Features

Contributors
------------


================================================
FILE: docs/requirements-doc.txt
================================================
# install current modin checkout to bring all required dependencies
.[all]
# now install some more optional dependencies
colorama
click
flatbuffers
funcsigs
mock
opencv-python
pydata_sphinx_theme
pyyaml
recommonmark
sphinx<6.0.0
sphinx-click
ray>=2.10.0,<3
# Override to latest version of modin-spreadsheet
git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
sphinxcontrib_plantuml
sphinx-issues


================================================
FILE: docs/supported_apis/dataframe_supported.rst
================================================
``pd.DataFrame`` supported APIs
===================================

The following table lists both implemented and not implemented methods. If you have need
of an operation that is listed as not implemented, feel free to open an issue on the
`GitHub repository`_, or give a thumbs up to already created issues. Contributions are
also welcome!

The following table is structured as follows: The first column contains the method name.
The second column contains link to a description of corresponding pandas method.
The third column is a flag for whether or not there is an implementation in Modin for
the method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands
for partial (meaning some parameters may not be supported yet), and ``D`` stands for
default to pandas.

+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| DataFrame method           | pandas Doc link           | Implemented? (Y/N/P/D) | Notes for Current implementation                   |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``T``                      | `T`_                      | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``abs``                    | `abs`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``add``                    | `add`_                    | Y                      | **Ray** and **Dask**: Shuffles data in operations  |
|                            |                           |                        | between DataFrames.                                |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``add_prefix``             | `add_prefix`_             | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``add_suffix``             | `add_suffix`_             | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``agg`` / ``aggregate``    | `agg`_ / `aggregate`_     | P                      | - Dictionary ``func`` parameter defaults to pandas |
|                            |                           |                        | - Numpy operations default to pandas               |
|                            |                           |                        |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``align``                  | `align`_                  | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``all``                    | `all`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``any``                    | `any`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``apply``                  | `apply`_                  | Y                      | See ``agg``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``applymap``               | `applymap`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``asfreq``                 | `asfreq`_                 | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``asof``                   | `asof`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``assign``                 | `assign`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``astype``                 | `astype`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``at``                     | `at`_                     | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``at_time``                | `at_time`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``axes``                   | `axes`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``between_time``           | `between_time`_           | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``bfill``                  | `bfill`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``bool``                   | `bool`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``boxplot``                | `boxplot`_                | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``clip``                   | `clip`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``combine``                | `combine`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``combine_first``          | `combine_first`_          | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``compare``                | `compare`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``copy``                   | `copy`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``corr``                   | `corr`_                   | P                      | Correlation floating point precision may slightly  |
|                            |                           |                        | differ from pandas. For now pearson method is      |
|                            |                           |                        | available only. For other methods and for          |
|                            |                           |                        | ``numeric_only`` defaults to pandas.               |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``corrwith``               | `corrwith`_               | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``count``                  | `count`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``cov``                    | `cov`_                    | P                      | Covariance floating point precision may slightly   |
|                            |                           |                        | differ from pandas. For ``numeric_only``           |
|                            |                           |                        | defaults to pandas.                                |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``cummax``                 | `cummax`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``cummin``                 | `cummin`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``cumprod``                | `cumprod`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``cumsum``                 | `cumsum`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``describe``               | `describe`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``diff``                   | `diff`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``div``                    | `div`_                    | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``divide``                 | `divide`_                 | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``dot``                    | `dot`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``drop``                   | `drop`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``droplevel``              | `droplevel`_              | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``drop_duplicates``        | `drop_duplicates`_        | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``dropna``                 | `dropna`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``dtypes``                 | `dtypes`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``duplicated``             | `duplicated`_             | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``empty``                  | `empty`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``eq``                     | `eq`_                     | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``equals``                 | `equals`_                 | Y                      | Requires shuffle, can be further optimized         |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``eval``                   | `eval`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``ewm``                    | `ewm`_                    | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``expanding``              | `expanding`_              | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``explode``                | `explode`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``ffill``                  | `ffill`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``fillna``                 | `fillna`_                 | P                      | ``value`` parameter of type DataFrame defaults to  |
|                            |                           |                        | pandas.                                            |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``filter``                 | `filter`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``first``                  | `first`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``first_valid_index``      | `first_valid_index`_      | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``floordiv``               | `floordiv`_               | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``from_dict``              | `from_dict`_              | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``from_records``           | `from_records`_           | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``ge``                     | `ge`_                     | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``get``                    | `get`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``groupby``                | `groupby`_                | Y                      | Not yet optimized for all operations.              |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``gt``                     | `gt`_                     | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``head``                   | `head`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``hist``                   | `hist`_                   | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``iat``                    | `iat`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``idxmax``                 | `idxmax`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``idxmin``                 | `idxmin`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``iloc``                   | `iloc`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``infer_objects``          | `infer_objects`_          | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``info``                   | `info`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``insert``                 | `insert`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``interpolate``            | `interpolate`_            | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``isetitem``               | `isetitem`_               | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``isin``                   | `isin`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``isna``                   | `isna`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``isnull``                 | `isnull`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``items``                  | `items`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``iterrows``               | `iterrows`_               | P                      | Modin does not parallelize iteration in Python     |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``itertuples``             | `itertuples`_             | P                      | Modin does not parallelize iteration in Python     |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``join``                   | `join`_                   | P                      | When ``on`` is set to ``right`` or ``outer`` or    |
|                            |                           |                        | when ``validate`` is given defaults to pandas      |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``keys``                   | `keys`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``kurt``                   | `kurt`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``kurtosis``               | `kurtosis`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``last``                   | `last`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``last_valid_index``       | `last_valid_index`_       | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``le``                     | `le`_                     | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``loc``                    | `loc`_                    | P                      | We do not support: boolean array, callable.        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``lt``                     | `lt`_                     | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``mask``                   | `mask`_                   | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``max``                    | `max`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``mean``                   | `mean`_                   | P                      | Modin defaults to pandas if given the ``level``    |
|                            |                           |                        | param.                                             |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``median``                 | `median`_                 | P                      | Modin defaults to pandas if given the ``level``    |
|                            |                           |                        | param.                                             |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``melt``                   | `melt`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``memory_usage``           | `memory_usage`_           | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
|                            |                           |                        | Implemented the following cases:                   |
|                            |                           |                        | ``left_index=True`` and ``right_index=True``,      |
|                            |                           |                        | ``how=left`` and ``how=inner`` for all values      |
| ``merge``                  | `merge`_                  | P                      | of parameters except ``left_index=True`` and       |
|                            |                           |                        | ``right_index=False`` or ``left_index=False``      |
|                            |                           |                        | and ``right_index=True``.                          |
|                            |                           |                        | Defaults to pandas otherwise.                      |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``min``                    | `min`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``mod``                    | `mod`_                    | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``mode``                   | `mode`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``mul``                    | `mul`_                    | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``multiply``               | `multiply`_               | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``ndim``                   | `ndim`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``ne``                     | `ne`_                     | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``nlargest``               | `nlargest`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``notna``                  | `notna`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``notnull``                | `notnull`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``nsmallest``              | `nsmallest`_              | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``nunique``                | `nunique`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pct_change``             | `pct_change`_             | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pipe``                   | `pipe`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pivot``                  | `pivot`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pivot_table``            | `pivot_table`_            | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``plot``                   | `plot`_                   | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pop``                    | `pop`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pow``                    | `pow`_                    | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``prod``                   | `prod`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``product``                | `product`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``quantile``               | `quantile`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``query``                  | `query`_                  | P                      | Local variables not yet supported                  |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``radd``                   | `radd`_                   | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rank``                   | `rank`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rdiv``                   | `rdiv`_                   | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``reindex``                | `reindex`_                | Y                      | Shuffles data                                      |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``reindex_like``           | `reindex_like`_           | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rename``                 | `rename`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rename_axis``            | `rename_axis`_            | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``reorder_levels``         | `reorder_levels`_         | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``replace``                | `replace`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``resample``               | `resample`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``reset_index``            | `reset_index`_            | P                      | **Ray** and **Dask**: ``D`` when ``names`` or      |
|                            |                           |                        | ``allow_duplicates`` is non-default                |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rfloordiv``              | `rfloordiv`_              | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rmod``                   | `rmod`_                   | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rmul``                   | `rmul`_                   | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rolling``                | `rolling`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``round``                  | `round`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rpow``                   | `rpow`_                   | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rsub``                   | `rsub`_                   | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``rtruediv``               | `rtruediv`_               | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``sample``                 | `sample`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``select_dtypes``          | `select_dtypes`_          | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``sem``                    | `sem`_                    | P                      | Modin defaults to pandas if given the ``level``    |
|                            |                           |                        | param.                                             |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``set_axis``               | `set_axis`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``set_index``              | `set_index`_              | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``shape``                  | `shape`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``shift``                  | `shift`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``size``                   | `size`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``skew``                   | `skew`_                   | P                      | Modin defaults to pandas if given the ``level``    |
|                            |                           |                        | param                                              |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``sort_index``             | `sort_index`_             | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``sort_values``            | `sort_values`_            | Y                      | Shuffles data. Order of indexes that have the      |
|                            |                           |                        | same sort key is not guaranteed to be the same     |
|                            |                           |                        | across sorts                                       |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``sparse``                 | `sparse`_                 | N                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``squeeze``                | `squeeze`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``stack``                  | `stack`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``std``                    | `std`_                    | P                      | Modin defaults to pandas if given the ``level``    |
|                            |                           |                        | param.                                             |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``style``                  | `style`_                  | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``sub``                    | `sub`_                    | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``subtract``               | `subtract`_               | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``sum``                    | `sum`_                    | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``swapaxes``               | `swapaxes`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``swaplevel``              | `swaplevel`_              | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``tail``                   | `tail`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``take``                   | `take`_                   | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_clipboard``           | `to_clipboard`_           | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_csv``                 | `to_csv`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_dict``                | `to_dict`_                | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_excel``               | `to_excel`_               | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_feather``             | `to_feather`_             | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_gbq``                 | `to_gbq`_                 | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_hdf``                 | `to_hdf`_                 | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_html``                | `to_html`_                | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_json``                | `to_json`_                | D                      |                                                    |
|                            |                           |                        | Experimental implementation:                       |
|                            |                           |                        | DataFrame.modin.to_json_glob                       |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_xml``                 | `to_xml`_                 | D                      |                                                    |
|                            |                           |                        | Experimental implementation:                       |
|                            |                           |                        | DataFrame.modin.to_xml_glob                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_latex``               | `to_latex`_               | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_orc``                 | `to_orc`_                 | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_parquet``             | `to_parquet`_             | P                      | **Ray/Dask/Unidist**: Parallel implementation only |
|                            |                           |                        | if path parameter is a string. In that case, the   |
|                            |                           |                        | ``path`` parameter specifies a directory where one |
|                            |                           |                        | file is written per row partition of the Modin     |
|                            |                           |                        | dataframe.                                         |
|                            |                           |                        | Experimental implementation:                       |
|                            |                           |                        | DataFrame.modin.to_parquet_glob                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_period``              | `to_period`_              | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_pickle``              | `to_pickle`_              | D                      | Experimental implementation:                       |
|                            |                           |                        | DataFrame.modin.to_pickle_glob                     |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_records``             | `to_records`_             | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_sql``                 | `to_sql`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_stata``               | `to_stata`_               | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_string``              | `to_string`_              | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_timestamp``           | `to_timestamp`_           | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``to_xarray``              | `to_xarray`_              | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``transform``              | `transform`_              | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``transpose``              | `transpose`_              | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``truediv``                | `truediv`_                | Y                      | See ``add``                                        |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``truncate``               | `truncate`_               | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``tz_convert``             | `tz_convert`_             | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``tz_localize``            | `tz_localize`_            | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``unstack``                | `unstack`_                | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``update``                 | `update`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``values``                 | `values`_                 | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``value_counts``           | `value_counts`_           | D                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``var``                    | `var`_                    | P                      | Modin defaults to pandas if given the ``level``    |
|                            |                           |                        | param.                                             |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``where``                  | `where`_                  | Y                      |                                                    |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+

.. _`GitHub repository`: https://github.com/modin-project/modin/issues
.. _`T`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.T.html#pandas.DataFrame.T
.. _`abs`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.abs.html#pandas.DataFrame.abs
.. _`add`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.add.html#pandas.DataFrame.add
.. _`add_prefix`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.add_prefix.html#pandas.DataFrame.add_prefix
.. _`add_suffix`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.add_suffix.html#pandas.DataFrame.add_suffix
.. _`agg`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.agg.html#pandas.DataFrame.agg
.. _`aggregate`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.aggregate.html#pandas.DataFrame.aggregate
.. _`align`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.align.html#pandas.DataFrame.align
.. _`all`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.all.html#pandas.DataFrame.all
.. _`any`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.any.html#pandas.DataFrame.any
.. _`apply`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.apply.html#pandas.DataFrame.apply
.. _`applymap`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.applymap.html#pandas.DataFrame.applymap
.. _`asfreq`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq
.. _`asof`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asof.html#pandas.DataFrame.asof
.. _`assign`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.assign.html#pandas.DataFrame.assign
.. _`astype`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.astype.html#pandas.DataFrame.astype
.. _`at`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.at.html#pandas.DataFrame.at
.. _`at_time`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.at_time.html#pandas.DataFrame.at_time
.. _`axes`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.axes.html#pandas.DataFrame.axes
.. _`between_time`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.between_time.html#pandas.DataFrame.between_time
.. _`bfill`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.bfill.html#pandas.DataFrame.bfill
.. _`bool`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.bool.html#pandas.DataFrame.bool
.. _`boxplot`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.boxplot.html#pandas.DataFrame.boxplot
.. _`clip`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.clip.html#pandas.DataFrame.clip
.. _`combine`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.combine.html#pandas.DataFrame.combine
.. _`combine_first`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.combine_first.html#pandas.DataFrame.combine_first
.. _`compare`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.compare.html#pandas.DataFrame.compare
.. _`compound`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.compound.html#pandas.DataFrame.compound
.. _`copy`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html#pandas.DataFrame.copy
.. _`corr`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.corr.html#pandas.DataFrame.corr
.. _`corrwith`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.corrwith.html#pandas.DataFrame.corrwith
.. _`count`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.count.html#pandas.DataFrame.count
.. _`cov`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cov.html#pandas.DataFrame.cov
.. _`cummax`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cummax.html#pandas.DataFrame.cummax
.. _`cummin`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cummin.html#pandas.DataFrame.cummin
.. _`cumprod`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cumprod.html#pandas.DataFrame.cumprod
.. _`cumsum`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cumsum.html#pandas.DataFrame.cumsum
.. _`describe`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.describe.html#pandas.DataFrame.describe
.. _`diff`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.diff.html#pandas.DataFrame.diff
.. _`div`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.div.html#pandas.DataFrame.div
.. _`divide`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.divide.html#pandas.DataFrame.divide
.. _`dot`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dot.html#pandas.DataFrame.dot
.. _`drop`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html#pandas.DataFrame.drop
.. _`droplevel`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.droplevel.html
.. _`drop_duplicates`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop_duplicates.html#pandas.DataFrame.drop_duplicates
.. _`dropna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html#pandas.DataFrame.dropna
.. _`dtypes`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dtypes.html#pandas.DataFrame.dtypes
.. _`duplicated`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.duplicated.html#pandas.DataFrame.duplicated
.. _`empty`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.empty.html#pandas.DataFrame.empty
.. _`eq`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.eq.html#pandas.DataFrame.eq
.. _`equals`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.equals.html#pandas.DataFrame.equals
.. _`eval`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.eval.html#pandas.DataFrame.eval
.. _`ewm`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html#pandas.DataFrame.ewm
.. _`expanding`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.expanding.html#pandas.DataFrame.expanding
.. _`explode`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.explode.html#pandas-dataframe-explode
.. _`ffill`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ffill.html#pandas.DataFrame.ffill
.. _`fillna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html#pandas.DataFrame.fillna
.. _`filter`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.filter.html#pandas.DataFrame.filter
.. _`first`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.first.html#pandas.DataFrame.first
.. _`first_valid_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.first_valid_index.html#pandas.DataFrame.first_valid_index
.. _`floordiv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.floordiv.html#pandas.DataFrame.floordiv
.. _`from_dict`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.from_dict.html#pandas.DataFrame.from_dict
.. _`from_records`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.from_records.html#pandas.DataFrame.from_records
.. _`ge`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ge.html#pandas.DataFrame.ge
.. _`get`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get.html#pandas.DataFrame.get
.. _`get_dtype_counts`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get_dtype_counts.html#pandas.DataFrame.get_dtype_counts
.. _`get_ftype_counts`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get_ftype_counts.html#pandas.DataFrame.get_ftype_counts
.. _`get_value`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get_value.html#pandas.DataFrame.get_value
.. _`get_values`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get_values.html#pandas.DataFrame.get_values
.. _`groupby`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html#pandas.DataFrame.groupby
.. _`gt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.gt.html#pandas.DataFrame.gt
.. _`head`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.head.html#pandas.DataFrame.head
.. _`hist`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.hist.html#pandas.DataFrame.hist
.. _`iat`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iat.html#pandas.DataFrame.iat
.. _`idxmax`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.idxmax.html#pandas.DataFrame.idxmax
.. _`idxmin`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.idxmin.html#pandas.DataFrame.idxmin
.. _`iloc`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html#pandas.DataFrame.iloc
.. _`infer_objects`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.infer_objects.html#pandas.DataFrame.infer_objects
.. _`info`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.info.html#pandas.DataFrame.info
.. _`insert`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.insert.html#pandas.DataFrame.insert
.. _`interpolate`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.interpolate.html#pandas.DataFrame.interpolate
.. _`is_copy`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.is_copy.html#pandas.DataFrame.is_copy
.. _`isetitem`: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isetitem.html?#pandas-dataframe-isetitem
.. _`isin`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isin.html#pandas.DataFrame.isin
.. _`isna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isna.html#pandas.DataFrame.isna
.. _`isnull`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isnull.html#pandas.DataFrame.isnull
.. _`items`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.items.html#pandas.DataFrame.items
.. _`iterrows`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows
.. _`itertuples`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.itertuples.html#pandas.DataFrame.itertuples
.. _`ix`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ix.html#pandas.DataFrame.ix
.. _`join`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.join.html#pandas.DataFrame.join
.. _`keys`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.keys.html#pandas.DataFrame.keys
.. _`kurt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurt.html#pandas.DataFrame.kurt
.. _`kurtosis`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurtosis.html#pandas.DataFrame.kurtosis
.. _`last`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.last.html#pandas.DataFrame.last
.. _`last_valid_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.last_valid_index.html#pandas.DataFrame.last_valid_index
.. _`le`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.le.html#pandas.DataFrame.le
.. _`loc`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html#pandas.DataFrame.loc
.. _`lt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.lt.html#pandas.DataFrame.lt
.. _`mask`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mask.html#pandas.DataFrame.mask
.. _`max`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.max.html#pandas.DataFrame.max
.. _`mean`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mean.html#pandas.DataFrame.mean
.. _`median`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.median.html#pandas.DataFrame.median
.. _`melt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.melt.html#pandas.DataFrame.melt
.. _`memory_usage`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.memory_usage.html#pandas.DataFrame.memory_usage
.. _`merge`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.merge.html#pandas.DataFrame.merge
.. _`min`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.min.html#pandas.DataFrame.min
.. _`mod`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mod.html#pandas.DataFrame.mod
.. _`mode`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mode.html#pandas.DataFrame.mode
.. _`mul`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mul.html#pandas.DataFrame.mul
.. _`multiply`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.multiply.html#pandas.DataFrame.multiply
.. _`ndim`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ndim.html#pandas.DataFrame.ndim
.. _`ne`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ne.html#pandas.DataFrame.ne
.. _`nlargest`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.nlargest.html#pandas.DataFrame.nlargest
.. _`notna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.notna.html#pandas.DataFrame.notna
.. _`notnull`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.notnull.html#pandas.DataFrame.notnull
.. _`nsmallest`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.nsmallest.html#pandas.DataFrame.nsmallest
.. _`nunique`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.nunique.html#pandas.DataFrame.nunique
.. _`pct_change`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pct_change.html#pandas.DataFrame.pct_change
.. _`pipe`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pipe.html#pandas.DataFrame.pipe
.. _`pivot`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot.html#pandas.DataFrame.pivot
.. _`pivot_table`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot_table.html#pandas.DataFrame.pivot_table
.. _`plot`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.html#pandas.DataFrame.plot
.. _`pop`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pop.html#pandas.DataFrame.pop
.. _`pow`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pow.html#pandas.DataFrame.pow
.. _`prod`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.prod.html#pandas.DataFrame.prod
.. _`product`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.product.html#pandas.DataFrame.product
.. _`quantile`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.quantile.html#pandas.DataFrame.quantile
.. _`query`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html#pandas.DataFrame.query
.. _`radd`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.radd.html#pandas.DataFrame.radd
.. _`rank`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html#pandas.DataFrame.rank
.. _`rdiv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rdiv.html#pandas.DataFrame.rdiv
.. _`reindex`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reindex.html#pandas.DataFrame.reindex
.. _`reindex_like`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reindex_like.html#pandas.DataFrame.reindex_like
.. _`rename`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rename.html#pandas.DataFrame.rename
.. _`rename_axis`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rename_axis.html#pandas.DataFrame.rename_axis
.. _`reorder_levels`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reorder_levels.html#pandas.DataFrame.reorder_levels
.. _`replace`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.replace.html#pandas.DataFrame.replace
.. _`resample`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html#pandas.DataFrame.resample
.. _`reset_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reset_index.html#pandas.DataFrame.reset_index
.. _`rfloordiv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rfloordiv.html#pandas.DataFrame.rfloordiv
.. _`rmod`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rmod.html#pandas.DataFrame.rmod
.. _`rmul`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rmul.html#pandas.DataFrame.rmul
.. _`rolling`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html#pandas.DataFrame.rolling
.. _`round`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.round.html#pandas.DataFrame.round
.. _`rpow`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rpow.html#pandas.DataFrame.rpow
.. _`rsub`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rsub.html#pandas.DataFrame.rsub
.. _`rtruediv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rtruediv.html#pandas.DataFrame.rtruediv
.. _`sample`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sample.html#pandas.DataFrame.sample
.. _`select_dtypes`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.select_dtypes.html#pandas.DataFrame.select_dtypes
.. _`sem`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sem.html#pandas.DataFrame.sem
.. _`set_axis`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.set_axis.html#pandas.DataFrame.set_axis
.. _`set_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.set_index.html#pandas.DataFrame.set_index
.. _`set_value`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.set_value.html#pandas.DataFrame.set_value
.. _`shape`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shape.html#pandas.DataFrame.shape
.. _`shift`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shift.html#pandas.DataFrame.shift
.. _`size`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.size.html#pandas.DataFrame.size
.. _`skew`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.skew.html#pandas.DataFrame.skew
.. _`sort_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_index.html#pandas.DataFrame.sort_index
.. _`sort_values`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html#pandas.DataFrame.sort_values
.. _`sparse`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sparse.html#pandas-dataframe-sparse
.. _`squeeze`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.squeeze.html#pandas.DataFrame.squeeze
.. _`stack`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.stack.html#pandas.DataFrame.stack
.. _`std`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.std.html#pandas.DataFrame.std
.. _`style`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.style.html#pandas.DataFrame.style
.. _`sub`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sub.html#pandas.DataFrame.sub
.. _`subtract`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.subtract.html#pandas.DataFrame.subtract
.. _`sum`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sum.html#pandas.DataFrame.sum
.. _`swapaxes`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.swapaxes.html#pandas.DataFrame.swapaxes
.. _`swaplevel`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.swaplevel.html#pandas.DataFrame.swaplevel
.. _`tail`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tail.html#pandas.DataFrame.tail
.. _`take`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.take.html#pandas.DataFrame.take
.. _`to_clipboard`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_clipboard.html#pandas.DataFrame.to_clipboard
.. _`to_csv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html#pandas.DataFrame.to_csv
.. _`to_dict`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_dict.html#pandas.DataFrame.to_dict
.. _`to_excel`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_excel.html#pandas.DataFrame.to_excel
.. _`to_feather`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_feather.html#pandas.DataFrame.to_feather
.. _`to_gbq`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html#pandas.DataFrame.to_gbq
.. _`to_hdf`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_hdf.html#pandas.DataFrame.to_hdf
.. _`to_html`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html#pandas.DataFrame.to_html
.. _`to_json`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html#pandas.DataFrame.to_json
.. _`to_xml`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_xml.html#pandas.DataFrame.to_xml
.. _`to_latex`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_latex.html#pandas.DataFrame.to_latex
.. _`to_orc`: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_orc.html#pandas.DataFrame.to_orc
.. _`to_parquet`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
.. _`to_period`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_period.html#pandas.DataFrame.to_period
.. _`to_pickle`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_pickle.html#pandas.DataFrame.to_pickle
.. _`to_records`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_records.html#pandas.DataFrame.to_records
.. _`to_sql`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html#pandas.DataFrame.to_sql
.. _`to_stata`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_stata.html#pandas.DataFrame.to_stata
.. _`to_string`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_string.html#pandas.DataFrame.to_string
.. _`to_timestamp`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_timestamp.html#pandas.DataFrame.to_timestamp
.. _`to_xarray`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_xarray.html#pandas.DataFrame.to_xarray
.. _`transform`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.transform.html#pandas.DataFrame.transform
.. _`transpose`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.transpose.html#pandas.DataFrame.transpose
.. _`truediv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.truediv.html#pandas.DataFrame.truediv
.. _`truncate`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.truncate.html#pandas.DataFrame.truncate
.. _`tz_convert`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tz_convert.html#pandas.DataFrame.tz_convert
.. _`tz_localize`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tz_localize.html#pandas.DataFrame.tz_localize
.. _`unstack`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.unstack.html#pandas.DataFrame.unstack
.. _`update`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.update.html#pandas.DataFrame.update
.. _`value_counts`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.value_counts.html#pandas.DataFrame.value_counts
.. _`values`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.values.html#pandas.DataFrame.values
.. _`var`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.var.html#pandas.DataFrame.var
.. _`where`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.where.html#pandas.DataFrame.where
.. _`xs`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.xs.html#pandas.DataFrame.xs


================================================
FILE: docs/supported_apis/defaulting_to_pandas.rst
================================================
Defaulting to pandas
====================

Currently Modin does not support distributed execution for all methods from pandas API.
The remaining unimplemented methods are being executed in a mode called "default to pandas".
This allows users to continue using Modin even though their workloads contain functions not yet implemented in Modin.
Here is a diagram of how we convert to pandas and perform the operation:

.. image:: /img/convert_to_pandas.png
   :align: center

We first convert to a pandas DataFrame, then perform the operation. **There is a
performance penalty for going from a partitioned Modin DataFrame to pandas because of
the communication cost and single-threaded nature of pandas.** Once the pandas operation
has completed, we convert the DataFrame back into a partitioned Modin DataFrame. This
way, operations performed after something defaults to pandas will be optimized with
Modin.

The exact methods we have implemented are listed in the respective subsections:

* :doc:`DataFrame </supported_apis/dataframe_supported>`
* :doc:`Series </supported_apis/series_supported>`
* :doc:`utilities </supported_apis/utilities_supported>`
* :doc:`I/O </supported_apis/io_supported>`

We have taken a community-driven approach to implementing new methods. We did a `study
on pandas usage`_ to learn what the most-used APIs are. Modin currently supports **93%**
of the pandas API based on our study of pandas usage, and we are actively expanding the
API.
**To request implementation, file an issue at https://github.com/modin-project/modin/issues
or send an email to feature_requests@modin.org.**

.. _`study on pandas usage`: https://github.com/modin-project/study_kaggle_usage


================================================
FILE: docs/supported_apis/index.rst
================================================
Supported APIs
==============

For your convenience, we have compiled a list of currently implemented APIs and methods
available in Modin. This documentation is updated as new methods and APIs are merged
into the main branch, and not necessarily correct as of the most recent release. 

To view the docs for the most recent release, check that you're viewing the 
`stable version`_ of the docs.

In order to install the latest version of Modin, follow the directions found on the
:doc:`installation page </getting_started/installation>`.

Questions on implementation details
-----------------------------------

If you have a question about the implementation details or would like more information
about an API or method in Modin, please contact the Modin `developer mailing list`_.

.. toctree::
   :titlesonly:
   :hidden:

   defaulting_to_pandas
   dataframe_supported
   series_supported
   utilities_supported
   io_supported
   older_pandas_compat

.. meta::
    :description lang=en:
        Compilation of implemented pandas APIs in Modin.

.. _developer mailing list: https://groups.google.com/forum/#!forum/modin-dev
.. _stable version: https://modin.readthedocs.io/en/stable/supported_apis/index.html


================================================
FILE: docs/supported_apis/io_supported.rst
================================================
``pd.read_<file>`` and I/O APIs
=================================

A number of IO methods default to pandas. We have parallelized ``read_csv``,
``read_parquet`` and some more (see table), though many of the remaining methods
can be relatively easily parallelized. Some of the operations default to the
pandas implementation, meaning it will read in serially as a single, non-distributed
DataFrame and distribute it. Performance will be affected by this.

The following table is structured as follows: The first column contains the method name.
The second column is a flag for whether or not there is an implementation in Modin for
the method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands
for partial (meaning some parameters may not be supported yet), and ``D`` stands for
default to pandas.

.. note::
    Support for fully asynchronous reading has been added for the following functions:
    ``read_csv``, ``read_fwf``, ``read_table``, ``read_custom_text``.
    This mode is disabled by default, one can enable it using ``MODIN_ASYNC_READ_MODE=True``
    environment variable. Some parameter combinations are not supported and the function
    will be executed in synchronous mode.

+-------------------+---------------------------------+--------------------------------------------------------+
| IO method         | Modin Implementation? (Y/N/P/D) | Notes for Current implementation                       |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_csv`_       | Y                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_fwf`_       | Y                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_table`_     | Y                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_parquet`_   | P                               | Parameters besides ``filters`` and ``storage_options`` |
|                   |                                 | passed via ``**kwargs`` are not supported.             |
|                   |                                 | ``use_nullable_dtypes`` == True is not supported.      |
|                   |                                 |                                                        |
|                   |                                 | Experimental implementation: read_parquet_glob         |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_json`_      | P                               | Implemented for ``lines=True``                         |
|                   |                                 | Experimental implementation: read_json_glob            |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_xml`        | D                               | Experimental implementation: read_xml_glob             |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_html`_      | D                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_clipboard`_ | D                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_excel`_     | D                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_hdf`_       | D                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_feather`_   | Y                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_stata`_     | D                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_sas`_       | D                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_pickle`_    | D                               | Experimental implementation:                           |
|                   |                                 | read_pickle_glob                                       |
+-------------------+---------------------------------+--------------------------------------------------------+
| `read_sql`_       | Y                               |                                                        |
+-------------------+---------------------------------+--------------------------------------------------------+

.. _`read_csv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html#pandas.read_csv
.. _`read_fwf`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_fwf.html#pandas.read_fwf
.. _`read_table`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_table.html#pandas.read_table
.. _`read_parquet`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_parquet.html#pandas.read_parquet
.. _`read_json`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_json.html#pandas.read_json
.. _`read_html`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_html.html#pandas.read_html
.. _`read_clipboard`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_clipboard.html#pandas.read_clipboard
.. _`read_excel`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html#pandas.read_excel
.. _`read_hdf`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_hdf.html#pandas.read_hdf
.. _`read_feather`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html#pandas.read_feather
.. _`read_stata`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_stata.html#pandas.read_stata
.. _`read_sas`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sas.html#pandas.read_sas
.. _`read_pickle`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_pickle.html#pandas.read_pickle
.. _`read_sql`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql


================================================
FILE: docs/supported_apis/older_pandas_compat.rst
================================================
===================================
Pandas backwards compatibility mode
===================================

Modin verions 0.16 and 0.17, but no later minor versions, had limited support
for running with legacy pandas versions. The latest version of Modin no longer
has such support.

Motivation for compatibility mode
---------------------------------

Modin aims to keep compatibility with latest pandas release, hopefully catching up each release
within a few days.

However, due to certain restrictions like need to use Python 3.6 it forces some users to
use older pandas (1.1.x for Python 3.6, specifically), which normally would mean they're
bound to be using ancient Modin as well.

To overcome this, Modin has special "compatibility mode" where some basic functionality
works, but please note that the support is "best possible effort" (e.g. not all older bugs
are worth fixing).

Known issues with pandas 1.1.x
------------------------------

* ``pd.append()`` does not preserve the order of columns in older pandas while Modin does
* ``.astype()`` produces different error type on incompatible dtypes
* ``read_csv()`` does not support reading from ZIP file *with compression* in parallel mode
* ``read_*`` do not support ``storage_option`` named argument
* ``to_csv()`` does not support binary mode for output file
* ``read_excel()`` does not support ``.xlsx`` files
* ``read_fwf()`` has a bug with list of skiprows and non-None nrows: `pandas-dev#10261`_
* ``.agg(int-value)`` produces TypeError in older pandas but Modin raises AssertionError
* ``Series.reset_index(drop=True)`` does not ignore ``name`` in older pandas while Modin ignores it
* ``.sort_index(ascending=None)`` does not raise ValueError in older pandas while Modin raises it

Please keep in mind that there are probably more issues which are not yet uncovered!

.. _`pandas-dev#10261`: https://github.com/pandas-dev/pandas/issues/10261


================================================
FILE: docs/supported_apis/series_supported.rst
================================================
``pd.Series`` supported APIs
============================

The following table lists both implemented and not implemented methods. If you have need
of an operation that is listed as not implemented, feel free to open an issue on the
`GitHub repository`_, or give a thumbs up to already created issues. Contributions are
also welcome!

The following table is structured as follows: The first column contains the method name.
The second column is a flag for whether or not there is an implementation in Modin for
the method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands
for partial (meaning some parameters may not be supported yet), and ``D`` stands for
default to pandas. To learn more about the implementations that default to pandas, see
the related section on :doc:`Defaulting to pandas </supported_apis/index>`.

+-----------------------------+---------------------------------+----------------------------------------------------+
| Series method               | Modin Implementation? (Y/N/P/D) | Notes for Current implementation                   |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``abs``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``add``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``add_prefix``              | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``add_suffix``              | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``agg``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``aggregate``               | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``align``                   | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``all``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``any``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``apply``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``argmax``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``argmin``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``argsort``                 | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``array``                   | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``asfreq``                  | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``asobject``                | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``asof``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``astype``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``at``                      | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``at_time``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``autocorr``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``axes``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``base``                    | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``between``                 | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``between_time``            | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``bfill``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``bool``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``cat``                     | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``clip``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``combine``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``combine_first``           | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``compare``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``compress``                | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``copy``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``corr``                    | Y                               | Correlation floating point precision may slightly  |
|                             |                                 | differ from pandas. For now pearson method is      |
|                             |                                 | available only. For other methods defaults to      |
|                             |                                 | pandas.                                            |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``count``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``cov``                     | Y                               | Covariance floating point precision may slightly   |
|                             |                                 | differ from pandas.                                |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``cummax``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``cummin``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``cumprod``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``cumsum``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``data``                    | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``describe``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``diff``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``div``                     | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``divide``                  | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``divmod``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``dot``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``drop``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``drop_duplicates``         | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``droplevel``               | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``dropna``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``dt``                      | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``dtype``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``dtypes``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``duplicated``              | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``empty``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``eq``                      | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``equals``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ewm``                     | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``expanding``               | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``explode``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``factorize``               | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ffill``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``fillna``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``filter``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``first``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``first_valid_index``       | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``flags``                   | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``floordiv``                | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``from_array``              | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ftype``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ge``                      | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``get``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``get_dtype_counts``        | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``get_ftype_counts``        | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``get_value``               | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``get_values``              | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``groupby``                 | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``gt``                      | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``hasnans``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``head``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``hist``                    | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``iat``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``idxmax``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``idxmin``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``iloc``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``imag``                    | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``index``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``infer_objects``           | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``interpolate``             | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``is_monotonic_decreasing`` | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``is_monotonic_increasing`` | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``is_unique``               | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``isin``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``isna``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``isnull``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``item``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``items``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``itemsize``                | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``keys``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``kurt``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``kurtosis``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``last``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``last_valid_index``        | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``le``                      | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``loc``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``lt``                      | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``map``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``mask``                    | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``max``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``mean``                    | P                               | Modin defaults to pandas if given the ``level``    |
|                             |                                 | param.                                             |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``median``                  | P                               | Modin defaults to pandas if given the ``level``    |
|                             |                                 | param.                                             |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``memory_usage``            | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``min``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``mod``                     | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``mode``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``mul``                     | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``multiply``                | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``name``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nbytes``                  | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ndim``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ne``                      | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nlargest``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nonzero``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``notna``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``notnull``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nsmallest``               | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nunique``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``pct_change``              | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``pipe``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``plot``                    | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``pop``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``pow``                     | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``prod``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``product``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ptp``                     | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``put``                     | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``quantile``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``radd``                    | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rank``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ravel``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rdiv``                    | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rdivmod``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``real``                    | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``reindex``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``reindex_like``            | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rename``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rename_axis``             | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``reorder_levels``          | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``repeat``                  | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``replace``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``resample``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``reset_index``             | P                               | **Ray** and **Dask**: ``D`` when ``names`` or      |
|                             |                                 | ``allow_duplicates`` is non-default                |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rfloordiv``               | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rmod``                    | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rmul``                    | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rolling``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``round``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rpow``                    | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rsub``                    | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rtruediv``                | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``sample``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``searchsorted``            | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``sem``                     | P                               | Modin defaults to pandas if given the ``level``    |
|                             |                                 | param.                                             |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``set_axis``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``set_value``               | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``shape``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``shift``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``size``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``skew``                    | P                               | Modin defaults to pandas if given the ``level``    |
|                             |                                 | param.                                             |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``sort_index``              | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``sort_values``             | Y                               | Order of indexes that have the same sort key       |
|                             |                                 | is not guaranteed to be the same across sorts;     |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``sparse``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``squeeze``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``std``                     | P                               | Modin defaults to pandas if given the ``level``    |
|                             |                                 | param.                                             |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``str``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``strides``                 | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``sub``                     | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``subtract``                | Y                               | See ``add``;                                       |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``sum``                     | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``swapaxes``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``swaplevel``               | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``tail``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``take``                    | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_clipboard``            | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_csv``                  | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_dict``                 | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_excel``                | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_frame``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_hdf``                  | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_json``                 | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_latex``                | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_list``                 | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_numpy``                | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_period``               | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_pickle``               | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_sql``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_string``               | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_timestamp``            | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_xarray``               | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``tolist``                  | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``transform``               | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``transpose``               | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``truediv``                 | Y                               | See ``add``                                        |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``truncate``                | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``tz_convert``              | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``tz_localize``             | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``unique``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``unstack``                 | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``update``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``valid``                   | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``value_counts``            | Y                               | The indices order of resulting object may differ   |
|                             |                                 | from pandas.                                       |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``values``                  | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``var``                     | P                               | Modin defaults to pandas if given the ``level``    |
|                             |                                 | param.                                             |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``view``                    | D                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``where``                   | Y                               |                                                    |
+-----------------------------+---------------------------------+----------------------------------------------------+

.. _`GitHub repository`: https://github.com/modin-project/modin/issues


================================================
FILE: docs/supported_apis/utilities_supported.rst
================================================
pandas Utilities Supported
==========================

If you run ``import modin.pandas as pd``, the following operations are available from
``pd.<op>``, e.g. ``pd.concat``. If you do not see an operation that pandas enables and
would like to request it, feel free to `open an issue`_. Make sure you tell us your
primary use-case so we can make it happen faster!

The following table is structured as follows: The first column contains the method name.
The second column is a flag for whether or not there is an implementation in Modin for
the method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands
for partial (meaning some parameters may not be supported yet), and ``D`` stands for
default to pandas.

+---------------------------+---------------------------------+----------------------------------------------------+
| Utility method            | Modin Implementation? (Y/N/P/D) | Notes for Current implementation                   |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.concat`_              | Y                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.eval`_                | Y                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.unique`_              | Y                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pd.value_counts``       | Y                               | The indices order of resulting object may differ   |
|                           |                                 | from pandas.                                       |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.cut`_                 | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.to_numeric`_          | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.factorize`_           | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.from_dummies`_        | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.qcut`_                | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pd.match``              | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.to_datetime`_         | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.get_dummies`_         | Y                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.date_range`_          | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.bdate_range`_         | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.to_timedelta`_        | D                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pd.options``            | Y                               |                                                    |
+---------------------------+---------------------------------+----------------------------------------------------+

Other objects & structures
--------------------------

This list is a list of objects not currently distributed by Modin. All of these objects
are compatible with the distributed components of Modin. If you are interested in
contributing a distributed version of any of these objects, feel free to open a
`pull request`_.

* Panel
* Index
* MultiIndex
* CategoricalIndex
* DatetimeIndex
* Timedelta
* Timestamp
* NaT
* PeriodIndex
* Categorical
* Interval
* UInt8Dtype
* UInt16Dtype
* UInt32Dtype
* UInt64Dtype
* SparseDtype
* Int8Dtype
* Int16Dtype
* Int32Dtype
* Int64Dtype
* CategoricalDtype
* DatetimeTZDtype
* IntervalDtype
* PeriodDtype
* RangeIndex
* TimedeltaIndex
* IntervalIndex
* IndexSlice
* TimeGrouper
* Grouper
* array
* Period
* DateOffset
* ExcelWriter
* SparseArray

.. _open an issue: https://github.com/modin-project/modin/issues
.. _pull request: https://github.com/modin-project/modin/pulls
.. _`pd.concat`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.concat.html#pandas.concat
.. _`pd.eval`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.eval.html#pandas.eval
.. _`pd.unique`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.unique.html#pandas.unique
.. _`pd.cut`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.cut.html#pandas.cut
.. _`pd.to_numeric`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_numeric.html#pandas.to_numeric
.. _`pd.factorize`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.factorize.html#pandas.factorize
.. _`pd.from_dummies`: https://pandas.pydata.org/docs/reference/api/pandas.from_dummies.html#pandas-from-dummies
.. _`pd.qcut`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.qcut.html#pandas.qcut
.. _`pd.to_datetime`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html#pandas.to_datetime
.. _`pd.get_dummies`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.get_dummies.html#pandas.get_dummies
.. _`pd.date_range`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.date_range.html#pandas.date_range
.. _`pd.bdate_range`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.bdate_range.html#pandas.bdate_range
.. _`pd.to_timedelta`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_timedelta.html#pandas.to_timedelta


================================================
FILE: docs/usage_guide/advanced_usage/batch.rst
================================================
Batch Pipline API Usage Guide
=============================

Modin provides an experimental batching feature that pipelines row-parallel queries. This feature 
is currently only supported for the ``PandasOnRay`` engine. Please note that this feature is experimental
and behavior or interfaces could be changed.

Usage examples
--------------

In examples below we build and run some pipelines. It is important to note that the queries passed to
the pipeline operate on Modin DataFrame partitions, which are backed by ``pandas``. When using ``pandas``-
module level functions, please make sure to import and use ``pandas`` rather than ``modin.pandas``.

Simple Batch Pipelining
^^^^^^^^^^^^^^^^^^^^^^^

This example walks through a simple batch pipeline in order to familiarize the user with the API.

.. code-block:: python

    from modin.experimental.batch import PandasQueryPipeline
    import modin.pandas as pd
    import numpy as np

    df = pd.DataFrame(
        np.random.randint(0, 100, (100, 100)),
        columns=[f"col {i}" for i in range(1, 101)],
    ) # Build the dataframe we will pipeline.
    pipeline = PandasQueryPipeline(df) # Build the pipeline.
    pipeline.add_query(lambda df: df + 1, is_output=True) # Add the first query and specify that
                                                          # it is an output query.
    pipeline.add_query(
        lambda df: df.rename(columns={f"col {i}":f"col {i-1}" for i in range(1, 101)})
    ) # Add a second query.
    pipeline.add_query(
        lambda df: df.drop(columns=['col 99']),
        is_output=True,
    ) # Add a third query and specify that it is an output query.
    new_df = pd.DataFrame(
        np.ones((100, 100)),
        columns=[f"col {i}" for i in range(1, 101)],
    ) # Build a second dataframe that we will pipeline now instead.
    pipeline.update_df(new_df) # Update the dataframe that we will pipeline to be `new_df`
                               # instead of `df`.
    result_dfs = pipeline.compute_batch() # Begin batch processing.

    # Print pipeline results
    print(f"Result of Query 1:\n{result_dfs[0]}")
    print(f"Result of Query 2:\n{result_dfs[1]}")
    # Output IDs can also be specified
    pipeline = PandasQueryPipeline(df) # Build the pipeline.
    pipeline.add_query(
        lambda df: df + 1,
        is_output=True,
        output_id=1,
    ) # Add the first query, specify that it is an output query, as well as specify an output id.
    pipeline.add_query(
        lambda df: df.rename(columns={f"col {i}":f"col {i-1}" for i in range(1, 101)})
    ) # Add a second query.
    pipeline.add_query(
        lambda df: df.drop(columns=['col 99']),
        is_output=True,
        output_id=2,
    ) # Add a third query, specify that it is an output query, and specify an output_id.
    result_dfs = pipeline.compute_batch() # Begin batch processing.

    # Print pipeline results - should be a dictionary mapping Output IDs to resulting dataframes:
    print(f"Mapping of Output ID to dataframe:\n{result_dfs}")
    # Print results
    for query_id, res_df in result_dfs.items():
        print(f"Query {query_id} resulted in\n{res_df}")

Batch Pipelining with Postprocessing
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

A postprocessing function can also be provided when calling ``pipeline.compute_batch``. The example
below runs a similar pipeline as above, but the postprocessing function writes the output dfs to 
a parquet file.

.. code-block:: python

    from modin.experimental.batch import PandasQueryPipeline
    import modin.pandas as pd
    import numpy as np
    import os
    import shutil

    df = pd.DataFrame(
        np.random.randint(0, 100, (100, 100)),
        columns=[f"col {i}" for i in range(1, 101)],
    ) # Build the dataframe we will pipeline.
    pipeline = PandasQueryPipeline(df) # Build the pipeline.
    pipeline.add_query(
        lambda df: df + 1,
        is_output=True,
        output_id=1,
    ) # Add the first query, specify that it is an output query, as well as specify an output id.
    pipeline.add_query(
        lambda df: df.rename(columns={f"col {i}":f"col {i-1}" for i in range(1, 101)})
    ) # Add a second query.
    pipeline.add_query(
        lambda df: df.drop(columns=['col 99']),
        is_output=True,
        output_id=2,
    ) # Add a third query, specify that it is an output query, and specify an output_id.
    def postprocessing_func(df, output_id, partition_id):
        filepath = f"query_{output_id}/"
        os.makedirs(filepath, exist_ok=True)
        filepath += f"part-{partition_id:04d}.parquet"
        df.to_parquet(filepath)
        return df
    result_dfs = pipeline.compute_batch(
        postprocessor=postprocessing_func,
        pass_partition_id=True,
        pass_output_id=True,
    ) # Begin computation, pass in a postprocessing function, and specify that partition ID and 
      # output ID should be passed to that postprocessing function.

    print(os.system("ls query_1/")) # Should show `NPartitions.get()` parquet files - which
                                    # correspond to partitions of the output of query 1.
    print(os.system("ls query_2/")) # Should show `NPartitions.get()` parquet files - which
                                    # correspond to partitions of the output of query 2.

    for query_id, res_df in result_dfs.items():
        written_df = pd.read_parquet(f"query_{query_id}/")
        shutil.rmtree(f"query_{query_id}/") # Clean up
        print(f"Written and Computed DF are " +
              f"{'equal' if res_df.equals(written_df) else 'not equal'} for query {query_id}")

Batch Pipelining with Fan Out
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

If the input dataframe to a query is small (consisting of only one partition), it is possible to 
induce additional parallelism using the ``fan_out`` argument. The ``fan_out`` argument replicates
the input partition, applies the query to each replica, and then coalesces all of the replicas back
to one partition using the ``reduce_fn`` that must also be specified when ``fan_out`` is ``True``.

It is possible to control the parallelism via the ``num_partitions`` parameter passed to the
constructor of the ``PandasQueryPipeline``. This parameter designates the desired number of partitions,
and defaults to ``NPartitions.get()`` when not specified. During fan out, the input partition is replicated
``num_partitions`` times. In the previous examples, ``num_partitions`` was not specified, and so defaulted
to ``NPartitions.get()``.

The example below demonstrates the usage of ``fan_out`` and ``num_partitions``. We first demonstrate
an example of a function that would benefit from this computation pattern:

.. code-block:: python

    import glob
    from PIL import Image
    import torchvision.transforms as T
    import torchvision

    transforms = T.Compose([T.ToTensor()])
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    COCO_INSTANCE_CATEGORY_NAMES = [
        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
        'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
        'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
        'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
        'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
        'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
        'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
        'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    def contains_cat(image, model):
        image = transforms(image)
        labels = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in model([image])[0]['labels']]
        return 'cat' in labels

    def serial_query(df):
        """
        This function takes as input a dataframe with a single row corresponding to a folder
        containing images to parse. Each image in the folder is passed through a neural network
        that detects whether it contains a cat, in serial, and a new column is computed for the
        dataframe that counts the number of images containing cats.

        Parameters
        ----------
        df : a dataframe
            The dataframe to process
        
        Returns
        -------
        The same dataframe as before, with an additional column containing the count of images 
        containing cats.
        """
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        model.eval()
        img_folder = df['images'][0]
        images = sorted(glob.glob(f"{img_folder}/*.jpg"))
        cats = 0
        for img in images:
            cats = cats + 1 if contains_cat(Image.open(img), model) else cats
        df['cat_count'] = cats
        return df
    
To download the image files to test out this code, run the following bash script, which downloads
the images from the fast-ai-coco S3 bucket to a folder called ``images`` in your current working
directory:

.. code-block:: shell

    aws s3 cp s3://fast-ai-coco/coco_tiny.tgz . --no-sign-request; tar -xf coco_tiny.tgz; mkdir \
        images; mv coco_tiny/train/* images/; rm -rf coco_tiny; rm -rf coco_tiny.tgz

We can pipeline that code like so:

.. code-block:: python

    import modin.pandas as pd
    from modin.experimental.batch import PandasQueryPipeline
    from time import time
    df = pd.DataFrame([['images']], columns=['images'])
    pipeline = PandasQueryPipeline(df)
    pipeline.add_query(serial_query, is_output=True)
    serial_start = time()
    df_with_cat_count = pipeline.compute_batch()[0]
    serial_end = time()
    print(f"Result of pipeline:\n{df_with_cat_count}")

We can induce `8x` parallelism into the pipeline above by combining the ``fan_out`` and ``num_partitions`` parameters like so:

.. code-block:: python

    import modin.pandas as pd
    from modin.experimental.batch import PandasQueryPipeline
    import shutil
    from time import time
    df = pd.DataFrame([['images']], columns=['images'])
    desired_num_partitions = 8
    def parallel_query(df, partition_id):
        """
        This function takes as input a dataframe with a single row corresponding to a folder
        containing images to parse. It parses `total_images/desired_num_partitions` images every
        time it is called. A new column is computed for the dataframe that counts the number of
        images containing cats.

        Parameters
        ----------
        df : a dataframe
            The dataframe to process
        partition_id : int
            The partition id of the dataframe that this function runs on.
        
        Returns
        -------
        The same dataframe as before, with an additional column containing the count of images
        containing cats.
        """
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        model.eval()
        img_folder = df['images'][0]
        images = sorted(glob.glob(f"{img_folder}/*.jpg"))
        total_images = len(images)
        cats = 0
        start_index = partition_id * (total_images // desired_num_partitions)
        if partition_id == desired_num_partitions - 1: # Last partition must parse to end of list
            images = images[start_index:]
        else:
            end_index = (partition_id + 1) * (total_images // desired_num_partitions)
            images = images[start_index:end_index]
        for img in images:
            cats = cats + 1 if contains_cat(Image.open(img), model) else cats
        df['cat_count'] = cats
        return df

    def reduce_fn(dfs):
        """
        Coalesce the results of fanning out the `parallel_query` query.

        Parameters
        ----------
        dfs : a list of dataframes
            The resulting dataframes from fanning out `parallel_query`
        
        Returns
        -------
        A new dataframe whose `cat_count` column is the sum of the `cat_count` column of all
        dataframes in `dfs`
        """
        df = dfs[0]
        cat_count = df['cat_count'][0]
        for dataframe in dfs[1:]:
            cat_count += dataframe['cat_count'][0]
        df['cat_count'] = cat_count
        return df
    pipeline = PandasQueryPipeline(df, desired_num_partitions)
    pipeline.add_query(
        parallel_query,
        fan_out=True,
        reduce_fn=reduce_fn,
        is_output=True,
        pass_partition_id=True
    )
    parallel_start = time()
    df_with_cat_count = pipeline.compute_batch()[0]
    parallel_end = time()
    print(f"Result of pipeline:\n{df_with_cat_count}")
    print(f"Total Time in Serial: {serial_end - serial_start}")
    print(f"Total time with induced parallelism: {parallel_end - parallel_start}")
    shutil.rmtree("images/") # Clean up

Batch Pipelining with Dynamic Repartitioning
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Similarly, it is also possible to hint to the Pipeline API to repartition after a node completes
computation. This is currently only supported if the input dataframe consists of only one partition.
The number of partitions after repartitioning is controlled by the ``num_partitions`` parameter
passed to the constructor of the ``PandasQueryPipeline``.

The following example demonstrates how to use the ``repartition_after`` parameter.

.. code-block:: python

    import modin.pandas as pd
    from modin.experimental.batch import PandasQueryPipeline
    import numpy as np

    small_df = pd.DataFrame([[1, 2, 3]]) # Create a small dataframe
    
    def increase_dataframe_size(df):
        import pandas
        new_df = pandas.concat([df] * 1000)
        new_df = new_df.reset_index(drop=True) # Get a new range index that isn't duplicated
        return new_df
    
    desired_num_partitions = 24 # We will repartition to 24 partitions

    def add_partition_id_to_df(df, partition_id):
        import pandas
        new_col = pandas.Series([partition_id]*len(df), name="partition_id", index=df.index)
        return pandas.concat([df, new_col], axis=1)
    
    pipeline = PandasQueryPipeline(small_df, desired_num_partitions)
    pipeline.add_query(increase_dataframe_size, repartition_after=True)
    pipeline.add_query(add_partition_id_to_df, pass_partition_id=True, is_output=True)
    result_df = pipeline.compute_batch()[0]
    print(f"Number of partitions passed to second query: " + 
          f"{len(np.unique(result_df['partition_id'].values))}")
    print(f"Result of pipeline:\n{result_df}")


================================================
FILE: docs/usage_guide/advanced_usage/index.rst
================================================
Advanced Usage
==============

.. toctree::
   :titlesonly:
   :hidden:

   /flow/modin/distributed/dataframe/pandas
   spreadsheets_api
   progress_bar
   modin_xgboost
   modin_logging
   modin_metrics
   batch
   modin_engines

.. meta::
    :description lang=en:
        Description of Modin's advanced features.

Modin aims to not only optimize pandas, but also provide a comprehensive,
integrated toolkit for data scientists. We are actively developing data science tools
such as DataFrame spreadsheet integration, DataFrame algebra, progress bars, SQL queries
on DataFrames, and more. Join us on `Slack`_ for the latest updates!

Modin engines
-------------

Modin supports a series of execution engines such as Ray_, Dask_, `MPI through unidist`_,
each of which might be a more beneficial choice for a specific scenario. When doing the first operation
with Modin it automatically initializes one of the engines to further perform distributed/parallel computation.
If you are familiar with a concrete execution engine, it is possible to initialize the engine on your own and
Modin will automatically attach to it. Refer to :doc:`Modin engines </usage_guide/advanced_usage/modin_engines>` page
for more details.

Additional APIs
---------------

Modin also supports these additional APIs on top of pandas to improve user experience.

- :py:meth:`~modin.pandas.DataFrame.modin.to_pandas` -- convert a Modin DataFrame/Series to a pandas DataFrame/Series.
- :py:meth:`~modin.pandas.DataFrame.get_backend` -- Get the ``Backend`` :doc:`configuration variable </flow/modin/config>` of this ``DataFrame``.
- :py:meth:`~modin.pandas.DataFrame.move_to` -- Move data and execution for this ``DataFrame`` to the given ``Backend`` :doc:`configuration variable </flow/modin/config>`. This method is an alias for ``DataFrame.set_backend``.
- :py:meth:`~modin.pandas.DataFrame.set_backend` -- Move data and execution for this ``DataFrame`` to the given ``Backend`` :doc:`configuration variable </flow/modin/config>`. This method is an alias for ``DatFrame.move_to``.
- :py:func:`~modin.pandas.io.from_pandas` -- convert a pandas DataFrame to a Modin DataFrame.
- :py:meth:`~modin.pandas.DataFrame.modin.to_ray` -- convert a Modin DataFrame/Series to a Ray Dataset.
- :py:func:`~modin.pandas.io.from_ray` -- convert a Ray Dataset to a Modin DataFrame.
- :py:meth:`~modin.pandas.DataFrame.modin.to_dask` -- convert a Modin DataFrame/Series to a Ray Dataset.
- :py:func:`~modin.pandas.io.from_dask` -- convert a Modin DataFrame/Series to a Dask DataFrame/Series.
- :py:func:`~modin.pandas.io.from_map` -- create a Modin DataFrame from map function applied to an iterable object.
- :py:func:`~modin.pandas.io.from_arrow` -- convert an Arrow Table to a Modin DataFrame.
- :py:func:`~modin.experimental.pandas.read_csv_glob` -- read multiple files in a directory.
- :py:func:`~modin.experimental.pandas.read_sql` -- add optional parameters for the database connection.
- :py:func:`~modin.experimental.pandas.read_custom_text` -- read custom text data from file.
- :py:func:`~modin.experimental.pandas.read_pickle_glob`  -- read multiple pickle files in a directory.
- :py:func:`~modin.experimental.pandas.read_parquet_glob`  -- read multiple parquet files in a directory.
- :py:func:`~modin.experimental.pandas.read_json_glob`  -- read multiple json files in a directory.
- :py:func:`~modin.experimental.pandas.read_xml_glob`  -- read multiple xml files in a directory.
- :py:meth:`~modin.pandas.DataFrame.modin.to_pickle_glob` -- write to multiple pickle files in a directory.
- :py:meth:`~modin.pandas.DataFrame.modin.to_parquet_glob` -- write to multiple parquet files in a directory.
- :py:meth:`~modin.pandas.DataFrame.modin.to_json_glob` -- write to multiple json files in a directory.
- :py:meth:`~modin.pandas.DataFrame.modin.to_xml_glob` -- write to multiple xml files in a directory.

DataFrame partitioning API
--------------------------

Modin DataFrame provides an API to directly access partitions: you can extract physical partitions from
a :py:class:`~modin.pandas.dataframe.DataFrame`, modify their structure by reshuffling or applying some
functions, and create a DataFrame from those modified partitions. Visit
:doc:`pandas partitioning API </flow/modin/distributed/dataframe/pandas>` documentation to learn more.

Modin Spreadsheet API
---------------------

The Spreadsheet API for Modin allows you to render the dataframe as a spreadsheet to easily explore
your data and perform operations on a graphical user interface. The API also includes features for recording
the changes made to the dataframe and exporting them as reproducible code. Built on top of Modin and SlickGrid,
the spreadsheet interface is able to provide interactive response times even at a scale of billions of rows.
See our `Modin Spreadsheet API documentation`_ for more details.

.. figure:: /img/modin_spreadsheet_mini_demo.gif
   :align: center
   :width: 650px
   :height: 350px

Progress Bar
------------

Visual progress bar for Dataframe operations such as groupby and fillna, as well as for file reading operations such as
read_csv. Built using the `tqdm`_ library and Ray execution engine. See `Progress Bar documentation`_ for more details.

.. figure:: /img/progress_bar_example.png
   :align: center

Dataframe Algebra
-----------------

A minimal set of operators that can be composed to express any dataframe query for use in query planning and optimization.
See our `paper`_ for more information, and full documentation is coming soon!

Distributed XGBoost on Modin
----------------------------

Modin provides an implementation of `distributed XGBoost`_ machine learning algorithm on Modin DataFrames. See our
:doc:`Distributed XGBoost on Modin documentation <modin_xgboost>` for details about installation and usage, as well as
:doc:`Modin XGBoost architecture documentation </flow/modin/experimental/xgboost>` for information about implementation and
internal execution flow.

Logging with Modin
------------------

Modin logging offers users greater insight into their queries by logging internal Modin API calls, partition metadata,
and system memory. Logging is disabled by default, but when it is enabled, log files are written to a local `.modin` directory
at the same directory level as the notebook/script used to run Modin. See our :doc:`Logging with Modin documentation <modin_logging>`
for usage information.

Batch Pipeline API
------------------
Modin provides an experimental batched API that pipelines row parallel queries. See our :doc:`Batch Pipline API Usage Guide <batch>`
for a walkthrough on how to use this feature, as well as :doc:`Batch Pipeline API documentation </flow/modin/experimental/batch>`
for more information about the API.

Fuzzydata Testing
-----------------

An experimental GitHub Action on pull request has been added to Modin, which automatically runs the Modin codebase against
`fuzzydata`, a random dataframe workflow generator. The resulting workflow that was used to test Modin codebase can be
downloaded as an artifact from the GitHub Actions tab for further inspection. See `fuzzydata`_ for more details.

.. _`Modin Spreadsheet API documentation`: spreadsheets_api.html
.. _`Progress Bar documentation`: progress_bar.html
.. _`Paper`: https://arxiv.org/pdf/2001.00888.pdf
.. _`Slack`: https://modin.org/slack.html
.. _`tqdm`: https://github.com/tqdm/tqdm
.. _`distributed XGBoost`: https://medium.com/intel-analytics-software/distributed-xgboost-with-modin-on-ray-fc17edef7720
.. _`fuzzydata`: https://github.com/suhailrehman/fuzzydata
.. _Ray: https://github.com/ray-project/ray
.. _Dask: https://github.com/dask/distributed
.. _`MPI through unidist`: https://github.com/modin-project/unidist


================================================
FILE: docs/usage_guide/advanced_usage/modin_engines.rst
================================================
Modin engines
=============

As a rule, you don't have to worry about initialization of an execution engine as
Modin itself automatically initializes one when performing the first operation.
Also, Modin has a broad range of :doc:`configuration settings </flow/modin/config>`, which
you can use to configure an execution engine. If there is a reason to initialize an execution engine
on your own and you are sure what to do, Modin will automatically attach to whichever engine is available.
Below, you can find some examples on how to initialize a specific execution engine on your own.

Ray
---

You can initialize Ray engine with a specific number of CPUs (worker processes) to perform computation.

.. code-block:: python

  import ray
  import modin.config as modin_cfg

  ray.init(num_cpus=<N>)
  modin_cfg.Engine.put("ray") # Modin will use Ray engine
  modin_cfg.CpuCount.put(<N>)

To get more details on all possible parameters for initialization refer to `Ray documentation`_.

Dask
----

You can initialize Dask engine with a specific number of worker processes and threads per worker to perform computation.

.. code-block:: python

  from distributed import Client
  import modin.config as modin_cfg

  client = Client(n_workers=<N1>, threads_per_worker=<N2>)
  modin_cfg.Engine.put("dask") # # Modin will use Dask engine
  modin_cfg.CpuCount.put(<N1>)

To get more details on all possible parameters for initialization refer to `Dask Distributed documentation`_.

MPI through unidist
-------------------

You can initialize MPI through unidist engine with a specific number of CPUs (worker processes) to perform computation.

.. code-block:: python

  import unidist
  import unidist.config as unidist_cfg
  import modin.config as modin_cfg

  unidist_cfg.Backend.put("mpi")
  unidist_cfg.CpuCount.put(<N>)
  unidist.init()

  modin_cfg.Engine.put("unidist") # # Modin will use MPI through unidist engine
  modin_cfg.CpuCount.put(<N>)

To get more details on all possible parameters for initialization refer to `unidist documentation`_.

.. _`Ray documentation`: https://docs.ray.io/en/latest
.. _Dask Distributed documentation: https://distributed.dask.org/en/latest
.. _`unidist documentation`: https://unidist.readthedocs.io/en/latest


================================================
FILE: docs/usage_guide/advanced_usage/modin_logging.rst
================================================
Modin Logging
=============

Modin logging offers users greater insight into their queries by logging internal Modin API calls, partition metadata,
and profiling system memory. When Modin logging is enabled (default disabled), log files are written to a local ``.modin`` directory at the same
directory level as the notebook/script used to run Modin.

The logs generated by Modin Logging will be written to a ``.modin/logs/job_<uuid>`` directory, uniquely named after the job uuid.
The logs that contain the Modin API stack traces are named ``trace.log``. The logs that contain the memory utilization metrics are
named ``memory.log``. By default, if any log file exceeds 10MB (configurable with ``LogFileSize``), that file will be saved and a 
separate log file will be created. For instance, if users have 20MB worth of Modin API logs, they can expect to find ``trace.log.1`` 
and ``trace.log.2`` in the ``.modin/logs/job_<uuid>`` directory. After ``10 * LogFileSize`` MB or by default 100MB of logs, the logs will 
rollover and the original log files beginning with ``trace.log.1`` will be overwritten with the new log lines.

**Developer Warning:** In some cases, running services like JupyterLab in the ``modin/modin`` directory may result in circular dependency issues.
This is due to a naming conflict between the ``modin/logging`` directory and the Python ``logging`` module, which may be used as a default in
such environments. To resolve this, please run Jupyterlab or other similar services from directories other than ``modin/modin``.

Usage examples
--------------

In the example below, we enable logging for internal Modin API calls, partition metadata and memory profiling.
We can set the granularity (in seconds) at which the system memory utilization is logged using ``LogMemoryInterval``.
We can also set the maximum size of the logs (in MBs) using ``LogFileSize``.

.. code-block:: python

  import modin.pandas as pd
  from modin.config import LogMode, LogMemoryInterval, LogFileSize
  LogMode.enable()
  LogMemoryInterval.put(2) # Defaults to 5 seconds, new interval is 2 seconds
  LogFileSize.put(5) # Defaults to 10 MB per log file, new size is 5 MB

  # User code goes here

Disable Modin logging like so:

.. code-block:: python

  import modin.pandas as pd
  from modin.config import LogMode
  LogMode.disable()

  # User code goes here

In Modin the lower-level functionality is logged in debug level, and higher level functionality in info level.
By default when logging is enabled in Modin, both high level and low level functionality are logged.
The below example script could be used to switch between logging all functions vs only logging higher level functions.
Setting logger level to ``logging.INFO`` logs only higher level functions.

.. code-block:: python

  import modin.pandas as pd
  from modin.logging.config import get_logger
  from modin.config import LogMode
  import logging
  LogMode.enable()
  logger = get_logger()
  logger.setLevel(logging.INFO) # Replace with logger.setLevel(logging.DEBUG)  for lower level logs
  df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  df = pd.concat([df, df])


Debugging from user defined functions:

.. warning:: 
    When attempting to use Modin logging in user defined functions that execute in workers for logging lower-level operators
    as in example below, multiple log directories ``.modin/logs/job_**`` would be created for each worker executing the UDF.

.. code-block:: python

  import modin.pandas as pd

  def udf(x):
      from modin.config import LogMode
      
      LogMode.enable()
      
      return x + 1
  
  modin_df = pd.DataFrame([0, 1, 2, 3])
  print(modin_df.map(udf))

So the **recommended** approach would be to use a different logger as in the below snipet
to log from user defined functions that execute on workers.
Below is an an example to log from UDF. For this the logger config has to be specified inside the UDF that would execute on a remote worker.

.. code-block:: python

  import logging
  import modin.pandas as pd
  
  def udf(x):
      logging.basicConfig(filename='modin_udf.log', level=logging.INFO)
      logging.info("This log message will be written to modin_udf.log ")

      # User code goes here
      return x + 1

  modin_df = pd.DataFrame([0, 1, 2, 3])
  print(modin_df.map(udf))


================================================
FILE: docs/usage_guide/advanced_usage/modin_metrics.rst
================================================
Modin Metrics
=============

Modin allows for third-party systems to register a metrics handler to collect specific API statistics.
Metrics have a name and a value, can be aggregated, discarded, or emitted without impact to the program.

CPU load, memory usage, and disk usage are all typical metrics; but modin currently only emits metrics on API timings which can be used to optimize end-user interactive performance. New metrics may 
be added in the future.

It is the responsibility of the handler to process or forward these metrics. The name of the metric will 
be in "dot format" and all lowercase, similar to graphite or rrd. The value is an integer or float.

Example metric names include:

* 'modin.core-dataframe.pandasdataframe.copy_index_cache'
* 'modin.core-dataframe.pandasdataframe.transpose'
* 'modin.query-compiler.pandasquerycompiler.transpose'
* 'modin.query-compiler.basequerycompiler.columnarize'
* 'modin.pandas-api.series.__init__'
* 'modin.pandas-api.dataframe._reduce_dimension'
* 'modin.pandas-api.dataframe.sum'

Handlers are functions of the form: `fn(str, int|float)` and can be registered with:

.. code-block:: python

  import modin.pandas as pd
  from modin.logging.metrics import add_metric_handler

  def func(name: str, value: int | float):
    print(f"Got metric {name} value {value}")

  add_metric_handler(func)

.. warning:: 
  A metric handler should be non-blocking, returning within 100ms, although this is not enforced. It must not throw exceptions or it will
  be deregistered. These restrictions are to help guard against the implementation of a metrics collector which would impact
  interactice performance significantly. The data from metrics should generally be offloaded to another system for processing
  and not involve any blocking network calls.

Metrics are enabled by default. Modin metrics can be disabled like so:

.. code-block:: python

  import modin.pandas as pd
  from modin.config import MetricsMode
  MetricsMode.disable()


================================================
FILE: docs/usage_guide/advanced_usage/modin_xgboost.rst
================================================
Distributed XGBoost on Modin
============================

Modin provides an implementation of `distributed XGBoost`_ machine learning
algorithm on Modin DataFrames. Please note that this feature is experimental and behavior or
interfaces could be changed.

Install XGBoost on Modin
------------------------

Modin comes with all the dependencies except ``xgboost`` package by default.
Currently, distributed XGBoost on Modin is only supported on the Ray execution engine, therefore, see
the :doc:`installation page </getting_started/installation>` for more information on installing Modin with the Ray engine.
To install ``xgboost`` package you can use ``pip``:

.. code-block:: bash

  pip install xgboost


XGBoost Train and Predict
-------------------------

Distributed XGBoost functionality is placed in ``modin.experimental.xgboost`` module.
``modin.experimental.xgboost`` provides a drop-in replacement API for ``train`` and ``Booster.predict`` xgboost functions.

.. automodule:: modin.experimental.xgboost
  :noindex:
  :members: train

.. autoclass:: modin.experimental.xgboost.Booster
  :noindex:
  :members: predict


ModinDMatrix
------------

Data is passed to ``modin.experimental.xgboost`` functions via a Modin ``DMatrix`` object.

.. automodule:: modin.experimental.xgboost
  :noindex:
  :members: DMatrix

Currently, the Modin ``DMatrix`` supports ``modin.pandas.DataFrame`` only as an input.


A Single Node / Cluster setup
-----------------------------

The XGBoost part of Modin uses a Ray resources by similar way as all Modin functions.

To start the Ray runtime on a single node:

.. code-block:: python

  import ray
  # Look at the Ray documentation with respect to the Ray configuration suited to you most.
  ray.init()

If you already had the Ray cluster you can connect to it by next way:

.. code-block:: python

  import ray
  ray.init(address='auto')

A detailed information about initializing the Ray runtime you can find in `starting ray`_  page.


Usage example
-------------

In example below we train XGBoost model using `the Iris Dataset`_ and get prediction on the same data.
All processing will be in a `single node` mode.

.. code-block:: python

  from sklearn import datasets

  import ray
  # Look at the Ray documentation with respect to the Ray configuration suited to you most.
  ray.init() # Start the Ray runtime for single-node

  import modin.pandas as pd
  import modin.experimental.xgboost as xgb

  # Load iris dataset from sklearn
  iris = datasets.load_iris()

  # Create Modin DataFrames
  X = pd.DataFrame(iris.data)
  y = pd.DataFrame(iris.target)

  # Create DMatrix
  dtrain = xgb.DMatrix(X, y)
  dtest = xgb.DMatrix(X, y)

  # Set training parameters
  xgb_params = {
      "eta": 0.3,
      "max_depth": 3,
      "objective": "multi:softprob",
      "num_class": 3,
      "eval_metric": "mlogloss",
  }
  steps = 20

  # Create dict for evaluation results
  evals_result = dict()

  # Run training
  model = xgb.train(
      xgb_params,
      dtrain,
      steps,
      evals=[(dtrain, "train")],
      evals_result=evals_result
  )

  # Print evaluation results
  print(f'Evals results:\n{evals_result}')

  # Predict results
  prediction = model.predict(dtest)

  # Print prediction results
  print(f'Prediction results:\n{prediction}')


.. _Dataframe: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
.. _`starting ray`: https://docs.ray.io/en/master/starting-ray.html
.. _`the Iris Dataset`: https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html
.. _`distributed XGBoost`: https://medium.com/intel-analytics-software/distributed-xgboost-with-modin-on-ray-fc17edef7720


================================================
FILE: docs/usage_guide/advanced_usage/progress_bar.rst
================================================
Progress Bar
============

The progress bar allows users to see the estimated progress and completion time of each line they run, 
in environments such as a shell or Jupyter notebook.

.. figure:: /img/progress_bar.gif
   :align: center

Quickstart
""""""""""

The progress bar uses the `tqdm` library to visualize displays:

.. code-block:: bash

   pip install tqdm


Import the progress bar into your notebook by running the following:


.. code-block:: python

    from modin.config import ProgressBar
    ProgressBar.enable()


================================================
FILE: docs/usage_guide/advanced_usage/spreadsheets_api.rst
================================================
Modin Spreadsheets API
======================

Getting started
---------------
Install Modin-spreadsheet using pip:

.. code-block:: bash

    pip install "modin[spreadsheet]"


The following code snippet creates a spreadsheet using the FiveThirtyEight dataset on labor force information by college majors (licensed under CC BY 4.0):

.. code-block:: python

    import modin.pandas as pd
    import modin.experimental.spreadsheet as mss
    df = pd.read_csv('https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/all-ages.csv')
    spreadsheet = mss.from_dataframe(df)
    spreadsheet


.. figure:: /img/modin_spreadsheets_installation.png
    :align: center

Basic Manipulations through User Interface
------------------------------------------

The Spreadsheet API allows users to manipulate the DataFrame with simple graphical controls for sorting, filtering, and editing. 

Here are the instructions for each operation:
    * **Sort**: Click on the column header of the column to sort on.
    * **Filter**: Click on the filter button on the column header and apply the desired filter to the column. The filter dropdown changes depending on the type of the column. Multiple filters are automatically combined.
    * **Edit Cell**: Double click on a cell and enter the new value.
    * **Add Rows**: Click on the “Add Row” button in the toolbar to duplicate the last row in the DataFrame. The duplicated values provide a convenient default and can be edited as necessary.
    * **Remove Rows**: Select row(s) and click the “Remove Row” button. Select a single row by clicking on it. Multiple rows can be selected with Cmd+Click (Windows: Ctrl+Click) on the desired rows or with Shift+Click to specify a range of rows. 

Some of these operations can also be done through the spreadsheet’s programmatic interface. Sorts and filters can be reset using the toolbar buttons. Edits and adding/removing rows can only be undone manually.

Virtual Rendering
-----------------

The spreadsheet will only render data based on the user’s viewport. This allows for quick rendering 
even on very large DataFrames because only a handful of rows are loaded at any given time. As a result, scrolling and viewing your data is smooth and responsive.

Transformation History and Exporting Code
-----------------------------------------

All operations on the spreadsheet are recorded and are easily exported as code for sharing or reproducibility. 
This history is automatically displayed in the history cell, which is generated below the spreadsheet whenever the spreadsheet widget is displayed. 
The history cell is displayed on default, but this can be turned off. Modin Spreadsheet API provides a few methods for interacting with the history:

    * `SpreadsheetWidget.get_history()`: Retrieves the transformation history in the form of reproducible code. 
    * `SpreadsheetWidget.filter_relevant_history(persist=True)`: Returns the transformation history that contains only code relevant to the final state of the spreadsheet. The `persist` parameter determines whether the internal state and the displayed history is also filtered.
    * `SpreadsheetWidget.reset_history()`: Clears the history of transformation.

Customizable Interface
----------------------

The spreadsheet widget provides a number of options that allows the user to change the appearance and the interactivity of the spreadsheet. Options include:

    * Row height/Column width
    * Preventing edits, sorts, or filters on the whole spreadsheet or on a per-column basis
    * Hiding the toolbar and history cell
    * Float precision
    * Highlighting of cells and rows
    * Viewport size

Converting Spreadsheets To and From Dataframes
----------------------------------------------

.. automodule:: modin.experimental.spreadsheet.general
    :noindex:
    :members: from_dataframe

    
.. automodule:: modin.experimental.spreadsheet.general
    :noindex:
    :members: to_dataframe


Further API Documentation
-------------------------

.. automodule:: modin_spreadsheet.grid
    :noindex:
    :members: SpreadsheetWidget

================================================
FILE: docs/usage_guide/benchmarking.rst
================================================
Benchmarking Modin
==================

Summary
-------
To benchmark a single Modin function, often turning on the
:doc:`configuration variable </flow/modin/config>` variable
:code:`BenchmarkMode` will suffice.

There is no simple way to benchmark more complex Modin workflows, though
benchmark mode or calling ``modin.utils.execute`` on Modin objects may be useful.
The :doc:`Modin logs </usage_guide/advanced_usage/modin_logging>` may help you
identify bottlenecks in your code, and they may also help profile the execution
of each Modin function.

Modin's execution and benchmark mode
------------------------------------

Most of Modin's execution happens asynchronously, i.e. in separate processes that run
independently of the main program flow. Some execution is also lazy, meaning that it
doesn't start immediately once the user calls a Modin function. While Modin provides
the same API as pandas, lazy and asynchronous execution can often make it hard to
tell how much time each Modin function call takes, as well as to compare Modin's
performance to pandas and other similar libraries.

.. note::
    All examples in this doc use the system specified at the bottom of this page.

Consider the following ipython script:

.. code-block:: python

    import modin.pandas as pd
    from modin.config import MinRowPartitionSize
    import time
    import ray

    # Look at the Ray documentation with respect to the Ray configuration suited to you most.
    ray.init()
    df = pd.DataFrame(list(range(MinRowPartitionSize.get() * 2)))
    %time result = df.map(lambda x: time.sleep(0.1) or x)
    %time print(result)


Modin takes just 2.68 milliseconds for the ``map``, and 3.78 seconds to print
the result. However, if we run this script in pandas by replacing
:code:`import modin.pandas as pd` with :code:`import pandas as pd`, the ``map``
takes 6.63 seconds, and printing the result takes just 5.53 milliseconds.

Both pandas and Modin start executing the ``map`` as soon as the interpreter
evalutes it. While pandas blocks until the ``map`` has finished, Modin just kicks
off asynchronous functions in remote ray processes. Printing the function result
is fairly fast in pandas and Modin, but before Modin can print the data, it has to
wait until all the remote functions complete.

To time how long Modin takes for a single operation, you should typically use
benchmark mode. Benchmark mode will wait for all asynchronous remote execution
to complete. You can turn on benchmark mode on at any point as follows:

.. code-block:: python

    from modin.config import BenchmarkMode
    BenchmarkMode.put(True)

Rerunning the script above with benchmark mode on, the Modin ``map`` takes
3.59 seconds, and the ``print`` takes 183 milliseconds. These timings better
reflect where Modin is spending its execution time.

A caveat about benchmark mode
-----------------------------

While benchmark code is often good for measuring the performance of a single
Modin function call, it can underestimate Modin's performance in cases where
Modin's asynchronous execution improves Modin's performance. Consider the
following script with benchmark mode on:

.. code-block:: python

    import numpy as np
    import time
    import ray
    from io import BytesIO

    import modin.pandas as pd
    from modin.config import BenchmarkMode, MinRowPartitionSize

    BenchmarkMode.put(True)

    start = time.time()
    df = pd.DataFrame(list(range(MinRowPartitionSize.get())), columns=['A'])
    result1 = df.map(lambda x: time.sleep(0.2) or x + 1)
    result2 = df.map(lambda x: time.sleep(0.2) or x + 2)
    result1.to_parquet(BytesIO())
    result2.to_parquet(BytesIO())
    end = time.time()
    print(f'map and write to parquet took {end - start} seconds.')

.. code-block::python

The script does two slow ``map`` on a dataframe and then writes each result
to a buffer. The whole script takes 13 seconds with benchmark mode on, but
just 7 seconds with benchmark mode off. Because Modin can run the ``map``
asynchronously, it can start writing the first result to its buffer while
it's still computing the second result. With benchmark mode on, Modin has to
execute every function synchronously instead.

How to benchmark complex workflows
----------------------------------

Typically, to benchmark Modin's overall performance on your workflow, you
should start by looking at end-to-end performance with benchmark mode off.
It's common for Modin worfklows to end with writing results to one or more
files, or with printing some Modin objects to an interactive console. Such
end points are natural ways to make sure that all of the Modin execution that
you require is complete.

To measure more fine-grained performance, it can be helpful to turn
benchmark mode on, but beware that doing so may reduce your script's overall
performance and thus may not reflect where Modin is normally spending execution
time, as pointed out above.

Turning on :doc:`Modin logging </usage_guide/advanced_usage/modin_logging>` and
using the Modin logs can also help you profile your workflow. The Modin logs
can also give a detailed break down of the performance of each Modin function
at each Modin :doc:`layer </development/architecture>`. Log mode is more
useful when used in conjuction with benchmark mode.

Sometimes, if you don't have a natural end-point to your workflow, you can
just call ``modin.utils.execute`` on the workflow's final Modin objects.
That will typically block on any asynchronous computation:

.. code-block:: python

    import time
    import ray
    from io import BytesIO

    import modin.pandas as pd
    from modin.config import MinRowPartitionSize, NPartitions
    import modin.utils

    MinRowPartitionSize.put(32)
    NPartitions.put(16)

    def slow_add_one(x):
      if x == 5000:
        time.sleep(10)
      return x + 1

    # Look at the Ray documentation with respect to the Ray configuration suited to you most.
    ray.init()
    df1 = pd.DataFrame(list(range(10_000)), columns=['A'])
    result = df1.map(slow_add_one)
    # %time modin.utils.execute(result)
    %time result.to_parquet(BytesIO())
.. code-block::python

Writing the result to a buffer takes 9.84 seconds. However, if you uncomment
the :code:`%time modin.utils.execute(result)` before the :code:`to_parquet`
call, the :code:`to_parquet` takes just 23.8 milliseconds!

.. note::
    If you see any Modin documentation touting Modin's speed without using
    benchmark mode or otherwise guaranteeing that Modin is finishing all asynchronous
    and deferred computation, you should file an issue on the Modin GitHub. It's
    not fair to compare the speed of an async Modin function call to an equivalent
    synchronous call using another library.

Appendix: System Information
----------------------------
The example scripts here were run on the following system:

- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**: macOS Monterey 12.4
- **Modin version**: d6d503ac7c3028d871c34d9e99e925ddb0746df6
- **Ray version**: 2.0.0
- **Python version**: 3.10.4
- **Machine**: MacBook Pro (16-inch, 2019)
- **Processor**: 2.3 GHz 8-core Intel Core i9 processor
- **Memory**: 16 GB 2667 MHz DDR4


================================================
FILE: docs/usage_guide/examples/index.rst
================================================
Modin Usage Examples
====================

This section shows Modin usage examples in different scenarios like Modin on a local/remote cluster,
the use of Modin spreadsheet.

Tutorials
'''''''''

The following tutorials cover the basic usage of Modin. `Here <https://www.youtube.com/watch?v=NglkafEmbhE>`_ is a one hour video tutorial that walks through these basic exercises.

- Exercise 1: Introduction to Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_1.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_1.ipynb>`__]
- Exercise 2: Speed Improvements with Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_2.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_2.ipynb>`__]
- Exercise 3: Defaulting to pandas with Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_3.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_3.ipynb>`__]

The following tutorials covers more advanced features in Modin:

- Exercise 4: Experimental Features in Modin (Spreadsheet, Progress Bar) [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_4.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_4.ipynb>`__]
- Exercise 5: Setting up Modin in a Cluster Environment [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_5.py>`__]

How to get required dependencies for the tutorial notebooks and to run them please refer to the respective `README.md <https://github.com/modin-project/modin/tree/main/examples/tutorial/jupyter/README.md>`__ file.


Data Science Benchmarks
'''''''''''''''''''''''

- Using Modin with the NYC Taxi Dataset [`Source <https://github.com/modin-project/modin/blob/main/examples/jupyter/Modin_Taxi.ipynb>`__]
- Using Modin with the Census Dataset (coming soon...)
- Using Modin with the Plasticc Dataset (coming soon...)

Modin Spreadsheets
''''''''''''''''''

- Using Modin along with the Spreadsheets API [`Source <https://github.com/modin-project/modin/blob/main/examples/spreadsheet/tutorial.ipynb>`__]

Modin with scikit-learn
'''''''''''''''''''''''

- Modin for Machine Learning with scikit-learn [`Source <https://github.com/modin-project/modin/blob/main/examples/modin-scikit-learn-example.ipynb>`__]


================================================
FILE: docs/usage_guide/index.rst
================================================
Usage Guide
===========

This guide describes both basic and advanced Modin usage, including usage examples, 
details regarding Modin configuration settings, as well as tips and tricks on 
how you can further optimize the performance of your workload with Modin.

.. toctree::
    :maxdepth: 4

    /flow/modin/config
    examples/index
    advanced_usage/index
    optimization_notes/index
    benchmarking
    integrations

.. meta::
    :description lang=en:
        Usage-specific documentation.

================================================
FILE: docs/usage_guide/integrations.rst
================================================
Third Party Library Integrations
================================

Modin is a drop-in replacement for Pandas, so we want it to interoperate with third-party libraries just as Pandas does. 
To see where Modin performs well and where it needs to improve, we've selected a number of important machine learning + visualization + statistics libraries, 
and then looked at examples (from their documentation, if possible) about how they work with Pandas. Then we ran those same workflows with Modin, and 
tracked what worked, and what failed.

In the table below, you'll see, for each third-party library we tested, the number of successful test calls / total test calls, and a qualitative description of how both Pandas and Modin integrate with that library.

In the deeper dive, you can view the Jupyter notebook we have used to test API calls and the corresponding Github issues filed. If you come across other issues/ examples 
in your own workflows we encourage you to file an `issue <https://github.com/modin-project/modin/issues/new/choose>`_ or contribute a `PR <https://github.com/modin-project/modin/pulls>`_!


.. note::
    These interoperability metrics are preliminary and not all APIs for each library have been tested. Feel free to add more!


Modin Interoperability by Library
'''''''''''''''''''''''''''''''''
.. list-table::
   :widths: 5 5 20
   :header-rows: 1

   * - Library
     - API successes / calls
     - Interoperability
     
   * - seaborn
     - 73% (11/15)
     - **Pandas**: Accepts Pandas DataFrames as inputs for producing plot |br|
       **Modin**: Mostly accepts Modin DataFrames as inputs for producing plots, but fails completely in some cases (pairplot, lmplot), and in others (catplot, objects.Plot) only works for some parameter combinations

   * - plotly
     - 78% (7 / 9)
     - **Pandas**: Accepts Pandas DataFrames as inputs for producing plots, including specifying X and Y parameters as df columns |br|
       **Modin**: Mostly accepts Modin DataFrames as inputs for producing plots (the exception is choropleth), but fails when specifying X and Y parameters as df columns
   
   * - matplotlib
     - 100% (5 / 5)
     - **Pandas**: Accepts Pandas DataFrames as inputs for producing plots like scatter, barh, etc. |br|
       **Modin**: Accepts Modin DataFrames as inputs for producing plots like scatter, barh, etc.
  
   * - altair
     - 0% (0 / 1)
     - **Pandas**: Accepts Pandas DataFrames as inputs for producing charts through Chart |br|
       **Modin**: Does not accept Modin DataFrames as inputs for producing charts through Chart

   * - bokeh
     - 0% (0 / 1)
     - **Pandas**: Loads Pandas DataFrames through ColumnDataSource |br|
       **Modin**: Does not load Modin DataFrames through ColumnDataSource
     
   * - sklearn
     - 100% (6 / 6)
     - **Pandas**: Many functions take Pandas DataFrames as inputs |br|
       **Modin**: Many functions take Modin DataFrames as inputs
    
   * - Hugging Face (Transformers, Datasets)
     - 100% (2 / 2) 
     - **Pandas**: Loads Pandas DataFrames into Datasets, and processes Pandas DataFrame rows as inputs using Transformers.InputExample (deprecated) |br|
       **Modin**: Loads Modin DataFrames into Datasets (though slowly), and processes Modin DataFrame rows as inputs through Transformers.InputExample (deprecated)
     
   * - Tensorflow
     - 75% (3 / 4)
     - **Pandas**: Converts Pandas dataframes to tensors |br|
       **Modin**: Converts Modin DataFrames to tensors, but specialized APIs like Keras might not work yet
     
   * - NLTK
     - 100% (1 / 1)
     - **Pandas**: Performs transformations like tokenization on Pandas DataFrames |br|
       **Modin**: Performs transformations like tokenization on Modin DataFrames
    
   * - XGBoost
     - 100% (1 / 1)
     - **Pandas**: Loads Pandas DataFrames through the DMatrix function |br|
       **Modin**: Loads Modin DataFrames through the DMatrix function
    
   * - statsmodels
     - 50% (1 / 2)
     - **Pandas**: Can accept Pandas DataFrames when fitting models |br|
       **Modin**: Sometimes accepts Modin DataFrames when fitting models (e.g., formula.api.ols), but does not in others (e.g., api.OLS)
     
.. |br| raw:: html

     <br>

A Deeper Dive
''''''''''''''

**seaborn**
-----------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/seaborn.ipynb>`__

Github Issues
    * https://github.com/modin-project/modin/issues/5435 
    * https://github.com/modin-project/modin/issues/5433

**plotly**
----------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/plotly.ipynb>`__

Github Issues
    * https://github.com/modin-project/modin/issues/5447 
    * https://github.com/modin-project/modin/issues/5445

**matplotlib**
--------------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/matplotlib.ipynb>`__


**altair**
----------

`Jupyter Notebook <https://github.com/lmodin-project/modin/blob/main/examples/jupyter/integrations/altair.ipynb>`__

Github Issues
    * https://github.com/modin-project/modin/issues/5438

**bokeh**
---------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/bokeh.ipynb>`__

Github Issues
    * https://github.com/modin-project/modin/issues/5437

**sklearn**
-----------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/sklearn.ipynb>`__

**Hugging Face**
----------------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/huggingface.ipynb>`__

**Tensorflow**
--------------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/tensorflow.ipynb>`__

Github Issues
    * https://github.com/modin-project/modin/issues/5439

**NLTK**
---------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/NLTK.ipynb>`__

**XGBoost**
-----------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/xgboost.ipynb>`__

**statsmodels**
---------------

`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/statsmodels.ipynb>`__

Github Issues
    * https://github.com/modin-project/modin/issues/5440

Appendix: System Information
'''''''''''''''''''''''''''''
The example scripts here were run on the following system:

- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**: macOS Big Sur 11.5.2
- **Modin version**: 0.18.0+3.g4114183f
- **Ray version**: 2.0.1
- **Python version**: 3.9.7.final.0
- **Machine**: MacBook Pro (16-inch, 2019)
- **Processor**: 2.3 GHz 8-core Intel Core i9 processor
- **Memory**: 16 GB 2667 MHz DDR4


================================================
FILE: docs/usage_guide/optimization_notes/index.rst
================================================
Optimization Notes
==================

Modin has chosen default values for a lot of the configurations here that provide excellent performance in most
cases. This page is for those who love to optimize their code and those who are curious about existing optimizations
within Modin. Here you can find more information about Modin's optimizations both for a pipeline of operations as
well as for specific operations. If you want to go ahead and tune the Modin behavior on your own, refer to
:doc:`Modin Configuration Settings </flow/modin/config>` page for the full set of configurations available in Modin.

Range-partitioning in Modin
"""""""""""""""""""""""""""

Modin utilizes a range-partitioning approach for specific operations, significantly enhancing
parallelism and reducing memory consumption in certain scenarios. Range-partitioning is typically
engaged for operations that has key columns (to group on, to merge on, etc).

You can enable `range-partitioning`_ by specifying ``cfg.RangePartitioning`` :doc:`configuration variable: </flow/modin/config>`

.. code-block:: python

    import modin.pandas as pd
    import modin.config as cfg

    cfg.RangePartitioning.put(True) # past this point methods that support range-partitioning
                                    # will use it

    pd.DataFrame(...).groupby(...).mean() # use range-partitioning for groupby.mean()

    cfg.Range-partitioning.put(False)

    pd.DataFrame(...).groupby(...).mean() # use MapReduce implementation for groupby.mean()

Building range-partitioning assumes data reshuffling, which may result into breaking the original
order of rows, for some operation, it will mean that the result will be different from Pandas.

Range-partitioning is not a silver bullet, meaning that enabling it is not always beneficial. Below you find
a link to the list of operations that have support for range-partitioning and practical advices on when one should
enable it: :doc:`operations that support range-partitioning </usage_guide/optimization_notes/range_partitioning_ops>`.

Dynamic-partitioning in Modin
"""""""""""""""""""""""""""""

Ray engine experiences slowdowns when running a large number of small remote tasks at the same time. Ray Core recommends to `avoid tiny task`_.
When modin DataFrame has a large number of partitions, some functions produce a large number of remote tasks, which can cause slowdowns. 
To solve this problem, Modin suggests using dynamic partitioning. This approach reduces the number of remote tasks 
by combining multiple partitions into a single virtual partition and perform a common remote task on them.

Dynamic partitioning is typically used for operations that are fully or partially executed on all partitions separately.

.. code-block:: python

    import modin.pandas as pd
    from modin.config import context

    df = pd.DataFrame(...)

    with context(DynamicPartitioning=True):
        df.abs()

Dynamic partitioning is also not always useful, and this approach is usually used for medium-sized DataFrames with a large number of columns.
If the number of columns is small, the number of partitions will be close to the number of CPUs, and Ray will not have this problem.
If the DataFrame has too many rows, this is also not a good case for using Dynamic-partitioning, since each task is no longer tiny and performing 
the combined tasks carries more overhead than assigning them separately.

Unfortunately, the use of Dynamic-partitioning depends on various factors such as data size, number of CPUs, operations performed, 
and it is up to the user to determine whether Dynamic-partitioning will give a boost in his case or not.

..
  TODO: Define heuristics to automatically enable dynamic partitioning without performance penalty.
  `Issue #7370 <https://github.com/modin-project/modin/issues/7370>`_

Understanding Modin's partitioning mechanism
""""""""""""""""""""""""""""""""""""""""""""

Modin's partitioning is crucial for performance; so we recommend expert users to understand Modin's
partitioning mechanism and how to tune it in order to achieve better performance.

How Modin partitions a dataframe
--------------------------------

Modin uses a partitioning scheme that partitions a dataframe along both axes, resulting in a matrix
of partitions. The row and column chunk sizes are computed independently based
on the length of the appropriate axis and Modin's special :doc:`configuration variables </flow/modin/config>`
(``NPartitions``, ``MinRowPartitionSize`` and ``MinColumnPartitionSize``):

- ``NPartitions`` is the maximum number of splits along an axis; by default, it equals to the number of cores
  on your local machine or cluster of nodes.
- ``MinRowPartitionSize`` is the minimum number of rows to do a split. For instance, if ``MinRowPartitionSize``
  is 32, the row axis will not be split unless the amount of rows is greater than 32. If it is is greater, for example, 34,
  then the row axis is sliced into two partitions: containing 32 and 2 rows accordingly.
- ``MinColumnPartitionSize`` is the minimum number of columns to do a split. For instance, if ``MinColumnPartitionSize``
  is 32, the column axis will not be split unless the amount of columns is greater than 32. If it is is greater, for example, 34,
  then the column axis is sliced into two partitions: containing 32 and 2 columns accordingly.

Beware that ``NPartitions`` specifies a limit for the number of partitions `along a single axis`, which means, that
the actual limit for the entire dataframe itself is the square of ``NPartitions``.

.. figure:: /img/partitioning_mechanism/partitioning_examples.svg
   :align: center

Full-axis functions
-------------------

Some of the aggregation functions require knowledge about the entire axis, for example at ``.apply(foo, axis=0)``
the passed function ``foo`` expects to receive data for the whole column at once.

When a full-axis function is applied, the partitions along this axis are collected at a single worker
that processes the function. After the function is done, the partitioning of the data is back to normal.

.. figure:: /img/partitioning_mechanism/full_axis_function.svg
   :align: center

Note that the amount of remote calls is equal to the number of partitions, which means that since the number
of partitions is decreased for full-axis functions it also decreases the potential for parallelism.

Also note, that reduce functions such as ``.sum()``, ``.mean()``, ``.max()``, etc, are not considered
to be full-axis, so they do not suffer from the decreasing level of parallelism.

How to tune partitioning
------------------------

Configure Modin's default partitioning scheme
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

As you can see from the examples above, the more the dataframe's shape is closer to a square, the closer the number of
partitions to the square of ``NPartitions``. In the case of ``NPartitions`` equals to the number of workers,
that means that a single worker is going to process multiple partitions at once, which slows down overall performance.

If your workflow mainly operates with wide dataframes and non-full-axis functions, it makes sense to reduce the
``NPartitions`` value so a single worker would process a single partition.

.. figure:: /img/partitioning_mechanism/repartition_square_frames.svg
   :align: center

Copy-pastable example, showing how tuning ``NPartitions`` value for wide frames may improve performance on your machine:

.. code-block:: python

  from multiprocessing import cpu_count
  from modin.distributed.dataframe.pandas import unwrap_partitions
  import modin.config as cfg
  import modin.pandas as pd
  import numpy as np
  import timeit

  # Generating data for a square-like dataframe
  data = np.random.randint(0, 100, size=(5000, 5000))

  # Explicitly setting `NPartitions` to its default value
  cfg.NPartitions.put(cpu_count())

  # Each worker processes `cpu_count()` amount of partitions
  df = pd.DataFrame(data)
  print(f"NPartitions: {cfg.NPartitions.get()}")
  # Getting raw partitions to count them
  partitions_shape = np.array(unwrap_partitions(df)).shape
  print(
      f"The frame has {partitions_shape[0]}x{partitions_shape[1]}={np.prod(partitions_shape)} partitions "
      f"when the CPU has only {cpu_count()} cores."
  )
  print(f"10 times of .abs(): {timeit.timeit(lambda: df.abs(), number=10)}s.")
  # Possible output:
  #   NPartitions: 112
  #   The frame has 112x112=12544 partitions when the CPU has only 112 cores.
  #   10 times of .abs(): 23.64s.

  # Taking a square root of the the current `cpu_count` to make more even partitioning
  cfg.NPartitions.put(int(cpu_count() ** 0.5))

  # Each worker processes a single partition
  df = pd.DataFrame(data)
  print(f"NPartitions: {cfg.NPartitions.get()}")
  # Getting raw partitions to count them
  partitions_shape = np.array(unwrap_partitions(df)).shape
  print(
      f"The frame has {partitions_shape[0]}x{partitions_shape[1]}={np.prod(partitions_shape)} "
      f"when the CPU has {cpu_count()} cores."
  )
  print(f"10 times of .abs(): {timeit.timeit(lambda: df.abs(), number=10)}s.")
  # Possible output:
  #   NPartitions: 10
  #   The frame has 10x10=100 partitions when the CPU has 112 cores.
  #   10 times of .abs(): 0.25s.

Manually trigger repartitioning
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

If you're getting unexpectedly poor performance, although you configured ``MODIN_NPARTITIONS``
correctly, then this might be caused by unbalanced partitioning that occurred during the
workflow's execution.

Modin's idealogy is to handle partitioning internally and not let users worry about the possible
consequences of applying a lot of "bad" operations that may affect DataFrame's partitioning.
We're constantly making efforts to find and fix cases where partitioning may cause a headache
for users.

However, if you feel that you're dealing with unbalanced partitioning you may try to call an
internal :py:meth:`modin.pandas.dataframe.DataFrame._repartition` method on your :py:class:`~modin.pandas.dataframe.DataFrame` in order to manually
trigger partitions rebalancing and see whether it improves performance for your case.

.. automethod:: modin.pandas.dataframe.DataFrame._repartition

An actual use-case for this method may be the following:

.. code-block:: python

  import modin.pandas as pd
  import timeit

  df = pd.DataFrame({"col0": [1, 2, 3, 4]})

  # Appending a lot of columns may result into unbalanced partitioning
  for i in range(1, 128):
      df[f"col{i}"] = pd.Series([1, 2, 3, 4])

  print(
      "DataFrame with unbalanced partitioning:",
      timeit.timeit(lambda: df.sum(), number=10)
  ) # 1.44s

  df = df._repartition()
  print(
      "DataFrame after '._repartition()':",
      timeit.timeit(lambda: df.sum(), number=10)
  ) # 0.21s.

Avoid iterating over Modin DataFrame
""""""""""""""""""""""""""""""""""""

Use ``df.apply()`` or other aggregation methods when possible instead of iterating over a dataframe.
For-loops don't scale and forces the distributed data to be collected back at the driver.

Copy-pastable example, showing how replacing a for-loop to the equivalent ``.apply()`` may improve performance:

.. code-block:: python

  import modin.pandas as pd
  import numpy as np
  from timeit import default_timer as timer

  data = np.random.randint(1, 100, (2 ** 10, 2 ** 2))

  md_df = pd.DataFrame(data)

  result = []
  t1 = timer()
  # Iterating over a dataframe forces to collect distributed data to the driver and doesn't scale
  for idx, row in md_df.iterrows():
      result.append((row[1] + row[2]) / row[3])
  print(f"Filling a list by iterating a Modin frame: {timer() - t1:.2f}s.")
  # Possible output: 36.15s.

  t1 = timer()
  # Using `.apply()` perfectly scales to all axis-partitions
  result = md_df.apply(lambda row: (row[1] + row[2]) / row[3], axis=1).to_numpy().tolist()
  print(f"Filling a list by using '.apply()' and converting the result to a list: {timer() - t1:.2f}s.")
  # Possible output: 0.22s.

Use Modin's Dataframe Algebra API to implement custom parallel functions
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

Modin provides a set of low-level parallel-implemented operators which can be used to build most of the
aggregation functions. These operators are present in the :doc:`algebra module </flow/modin/core/dataframe/algebra>`.
Modin DataFrame allows users to use their own aggregations built with this module. Visit the
:doc:`DataFrame's algebra </flow/modin/core/dataframe/algebra>` page of the documentation for the steps to do it.

Avoid mixing pandas and Modin DataFrames
""""""""""""""""""""""""""""""""""""""""

Although Modin is considered to be a drop-in replacement for pandas, Modin and pandas are not intended to be used together
in a single flow. Passing a pandas DataFrame as an argument for a Modin's DataFrame method may either slowdown
the function (because it has to process non-distributed object) or raise an error. You would also get an undefined
behavior if you pass a Modin DataFrame as an input to pandas methods, since pandas identifies Modin's objects as a simple iterable,
and so can't leverage its benefits as a distributed dataframe.

Copy-pastable example, showing how mixing pandas and Modin DataFrames in a single flow may bottleneck performance:

.. code-block:: python

  import modin.pandas as pd
  import numpy as np
  import timeit
  import pandas

  data = np.random.randint(0, 100, (2 ** 20, 2 ** 2))

  md_df, md_df_copy = pd.DataFrame(data), pd.DataFrame(data)
  pd_df, pd_df_copy = pandas.DataFrame(data), pandas.DataFrame(data)

  print("concat modin frame + pandas frame:")
  # Concatenating modin frame + pandas frame using modin '.concat()'
  # This case is bad because Modin have to process non-distributed pandas object
  time = timeit.timeit(lambda: pd.concat([md_df, pd_df]), number=10)
  print(f"\t{time}s.\n")
  # Possible output: 0.44s.

  print("concat modin frame + modin frame:")
  # Concatenating modin frame + modin frame using modin '.concat()'
  # This is an ideal case, Modin is being used as intended
  time = timeit.timeit(lambda: pd.concat([md_df, md_df_copy]), number=10)
  print(f"\t{time}s.\n")
  # Possible output: 0.05s.

  print("concat pandas frame + pandas frame:")
  # Concatenating pandas frame + pandas frame using pandas '.concat()'
  time = timeit.timeit(lambda: pandas.concat([pd_df, pd_df_copy]), number=10)
  print(f"\t{time}s.\n")
  # Possible output: 0.31s.

  print("concat pandas frame + modin frame:")
  # Concatenating pandas frame + modin frame using pandas '.concat()'
  time = timeit.timeit(lambda: pandas.concat([pd_df, md_df]), number=10)
  print(f"\t{time}s.\n")
  # Possible output: TypeError


Using pandas to execute queries with Modin's ``"Pandas"`` backend
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

By default, Modin distributes the data in a dataframe (or series) and attempts
to process data for different partitions in parallel.

However, for certain scenarios, such as handling small datasets, Modin's
parallel execution may introduce unnecessary overhead. In such cases, it's more
efficient to use serial execution with a single, unpartitioned pandas dataframe.
You can enable this kind of local pandas execution by setting Modin's
``Backend``
:doc:`configuration variable </flow/modin/config>` to ``"Pandas"``.

DataFrames created while Modin's global backend is set to ``"Pandas"``
will continue to use native execution even if you switch the global backend
later. Modin supports interoperability between distributed Modin DataFrames
and those using the pandas backend.

Here is an example of using the pandas backend.

.. code-block:: python

  import modin.pandas as pd
  from modin.config import Backend

  # This dataframe will use Modin's default, distributed execution.
  original_backend = Backend.get()
  assert original_backend != "Pandas"
  distributed_df_1 = pd.DataFrame([0])

  # Set backend to "Pandas" for local pandas execution.
  Backend.put("Pandas")
  modin_on_pandas_df = pd.DataFrame([1])
  assert modin_on_pandas_df.get_backend() == "Pandas"

  # Revert to default settings for distributed execution
  Backend.put(original_backend)
  distributed_df_2 = pd.DataFrame([2])
  assert distributed_df_2.get_backend() == original_backend

You can also use the pandas backend for some dataframes while using different
backends for other dataframes. You can switch the backend of an individual
dataframe or series with ``set_backend()`` or its synonym ``move_to()``.
Here's an example of switching the backend for an individual dataframe.

.. code-block:: python

  import modin.pandas as pd

  # This dataframe will use Modin's default, distributed execution.
  original_backend = Backend.get()
  assert original_backend != "Pandas"
  distributed_df_1 = pd.DataFrame([0])

  pandas_df_1 = distributed_df_1.move_to("Pandas")
  assert pandas_df_1.get_backend() == "Pandas"
  pandas_df_1 = pandas_df_1.sort_values(0)
  assert pandas_df_1.get_backend() == "Pandas"

  new_df = pandas_df_1.move_to(original_backend)
  assert new_df.get_backend() == original_backend

  new_df.set_backend("Pandas", inplace=True)
  assert new_df.get_backend() == "Pandas"


Automatic backend switching
"""""""""""""""""""""""""""

*This feature is under active development, and the API is subject to change.*

Modin's backends may define heuristics for whether to automatically move data to another backend
for more efficient computation of certain operations. Modin does not currently define these heuristics
for any of its default backends, but any backends that wish to do so should implement the query
compiler methods discussed in
:ref:`the architecture document<auto-switch architecture>`.

After implementing the relevant query compiler methods, the following APIs can be used to control
when automatic switching occurs:

.. code-block:: python

  import modin.pandas as pd
  from modin.core.storage_formats.pandas.query_compiler_caster import (
    register_function_for_post_op_switch,
    register_function_for_pre_op_switch,
  )
  from modin.config import AutoSwitchBackend

  # Enable automatic switching BEFORE computation for DataFrame.apply
  # when the DataFrame's backend is Pandas
  register_function_for_pre_op_switch(
    class_name="DataFrame",
    method="apply",
    backend="Pandas",
  )

  # Enable automatic switching AFTER computation for Series.max
  # when the Series's backend is Pandas
  register_function_for_post_op_switch(
    class_name="Series",
    method="max",
    backend="Pandas",
  )

  # Enable automatic switching globally (use .disable() to turn off)
  AutoSwitchBackend.enable()

  df = pd.DataFrame([[1, 2, 3]])
  # "pin" a single DataFrame/Series, preventing it from
  # automatically switching backends
  df.pin_backend(inplace=True)
  # "unpin" it to re-enable automatic switching
  df.unpin_backend(inplace=True)


Operation-specific optimizations
""""""""""""""""""""""""""""""""

merge
-----

``merge`` operation in Modin uses the broadcast join algorithm: combining a right Modin DataFrame into a pandas DataFrame and
broadcasting it to the row partitions of the left Modin DataFrame. In order to minimize interprocess communication cost when doing
an inner join you may want to swap left and right DataFrames.

.. code-block:: python

  import modin.pandas as pd
  import numpy as np

  left_data = np.random.randint(0, 100, size=(2**8, 2**8))
  right_data = np.random.randint(0, 100, size=(2**12, 2**12))

  left_df = pd.DataFrame(left_data)
  right_df = pd.DataFrame(right_data)
  %timeit left_df.merge(right_df, how="inner", on=10)
  3.59 s  107 ms per loop (mean  std. dev. of 7 runs, 1 loop each)

  %timeit right_df.merge(left_df, how="inner", on=10)
  1.22 s  40.1 ms per loop (mean  std. dev. of 7 runs, 1 loop each)

Note that result columns order may differ for first and second ``merge``.

.. _range-partitioning: https://www.techopedia.com/definition/31994/range-partitioning
.. _`avoid tiny task`: https://docs.ray.io/en/latest/ray-core/tips-for-first-time.html#tip-2-avoid-tiny-tasks


================================================
FILE: docs/usage_guide/optimization_notes/range_partitioning_ops.rst
================================================
:orphan:

Operations that support range-partitioning in Modin
###################################################

The following operations change their behavior once ``cfg.RangePartitioning`` variable is set to ``True``.
Go through the list find out when it could be beneficial to engage range-partitioning for a certain method.

GroupBy
=======

.. note::
    When grouping on multiple columns using range-partitioning implementation, the result
    may not be sorted even if ``groupby(sort=True, ...)`` was passed: https://github.com/modin-project/modin/issues/6875.

Range-partitioning groupby implementation is automatically engaged for ``groupby.apply()``, ``groupby.transform()``,
``groupby.rolling()``. For groupby aggregations from `this list`_, MapReduce implementation is used by default.
MapReduce tends to show better performance for groupby with low-cardinality. If the cardinality of your columns
to group is expected to be high, it's recommended to engage range-partitioning implementation.


Merge
=====

.. note::
    Range-partitioning approach is implemented only for "left" and "inner" merge and only
    when merging on a single column using `on` argument.

Range-partitioning merge replaces broadcast merge. It is recommended to use range-partitioning implementation
if the right dataframe in merge is as big as the left dataframe. In this case, range-partitioning
implementation works faster and consumes less RAM.

Under the spoiler you can find performance comparison of range-partitioning and broadcast merge in different
scenarios:

.. raw:: html

   <details>
   <summary><a>Performance measurements for merge</a></summary>

The performance was measured on `h2o join queries`_ using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores),
with the number of cores allocated for Modin limited by 44 (``MODIN_CPUS=44``).

Measurements for small 500mb data:

.. image:: /img/range_partitioning_measurements/merge_h2o_500mb.jpg
   :align: center

Measurements for medium 5gb data:

.. image:: /img/range_partitioning_measurements/merge_h2o_5gb.png
   :align: center

.. raw:: html

   </details>


``.unique()`` and ``.drop_duplicates()``
========================================

.. note::
    When range-partitioning is enabled, both ``.unique()`` and ``.drop_duplicates()`` will
    yield results that are sorted along rows. If range-partitioning is disabled,
    the original order will be maintained.

Range-partitioning implementation of ``.unique()`` / ``.drop_duplicates()`` works best when the input data size is big (more than
5_000_000 rows) and when the output size is also expected to be big (no more than 80% values are duplicates).

Under the spoiler you can find performance comparisons in different scenarios:

.. raw:: html

   <details>
   <summary><a>Performance measurements for ``.unique()``</a></summary>

The performance was measured on randomly generated data using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores).
The `duplicate rate` shows the procentage of duplicated rows in the dataset. You can learn more about this micro-benchmark
by reading its source code:

.. raw:: html

   <details>
   <summary><a>Micro-benchmark's source code</a></summary>

.. code-block:: python

    import modin.pandas as pd
    import numpy as np
    import modin.config as cfg

    from modin.utils import execute
    from timeit import default_timer as timer
    import pandas

    cfg.CpuCount.put(16)

    def get_data(nrows, dtype):
        if dtype == int:
            return np.arange(nrows)
        elif dtype == float:
            return np.arange(nrows).astype(float)
        elif dtype == str:
            return np.array([f"value{i}" for i in range(nrows)])
        else:
            raise NotImplementedError(dtype)

    pd.DataFrame(np.arange(cfg.NPartitions.get() * cfg.MinRowPartitionSize.get())).to_numpy()

    nrows = [1_000_000, 5_000_000, 10_000_000, 25_000_000, 50_000_000, 100_000_000]
    duplicate_rate = [0, 0.1, 0.5, 0.95]
    dtypes = [int, str]
    use_range_part = [True, False]

    columns = pandas.MultiIndex.from_product([dtypes, duplicate_rate, use_range_part], names=["dtype", "duplicate rate", "use range-part"])
    result = pandas.DataFrame(index=nrows, columns=columns)

    i = 0
    total_its = len(nrows) * len(duplicate_rate) * len(dtypes) * len(use_range_part)

    for dt in dtypes:
        for nrow in nrows:
            data = get_data(nrow, dt)
            np.random.shuffle(data)
            for dpr in duplicate_rate:
                data_c = data.copy()
                dupl_val = data_c[0]

                num_duplicates = int(dpr * nrow)
                dupl_indices = np.random.choice(np.arange(nrow), num_duplicates, replace=False)
                data_c[dupl_indices] = dupl_val

                for impl in use_range_part:
                    print(f"{round((i / total_its) * 100, 2)}%")
                    i += 1
                    cfg.RangePartitioning.put(impl)

                    sr = pd.Series(data_c)
                    execute(sr)

                    t1 = timer()
                    # returns a list, so no need for materialization
                    sr.unique()
                    tm = timer() - t1
                    print(nrow, dpr, dt, impl, tm)
                    result.loc[nrow, (dt, dpr, impl)] = tm
                    result.to_excel("unique.xlsx")

.. raw:: html

   </details>

Measurements with 16 cores being allocated for Modin (``MODIN_CPUS=16``):

.. image:: /img/range_partitioning_measurements/unique_16cpus.jpg
   :align: center

Measurements with 44 cores being allocated for Modin (``MODIN_CPUS=44``):

.. image:: /img/range_partitioning_measurements/unique_44cpus.jpg
   :align: center

.. raw:: html

   </details>


.. raw:: html

   <details>
   <summary><a>Performance measurements for ``.drop_duplicates()``</a></summary>

The performance was measured on randomly generated data using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores).
The `duplicate rate` shows the procentage of duplicated rows in the dataset. The `subset size` shows the number of
columns being specified as a ``subset`` parameter for ``df.drop_duplicates()``. You can learn more about this
micro-benchmark by reading its source code:

.. raw:: html

   <details>
   <summary><a>Micro-benchmark's source code</a></summary>

.. code-block:: python

    import modin.pandas as pd
    import numpy as np
    import modin.config as cfg

    from modin.utils import execute
    from timeit import default_timer as timer
    import pandas

    cfg.CpuCount.put(16)

    pd.DataFrame(np.arange(cfg.NPartitions.get() * cfg.MinRowPartitionSize.get())).to_numpy()

    nrows = [1_000_000, 5_000_000, 10_000_000, 25_000_000]
    duplicate_rate = [0, 0.1, 0.5, 0.95]
    subset = [["col0"], ["col1", "col2", "col3", "col4"], None]
    ncols = 15
    use_range_part = [True, False]

    columns = pandas.MultiIndex.from_product(
        [
            [len(sbs) if sbs is not None else ncols for sbs in subset],
            duplicate_rate,
            use_range_part
        ],
        names=["subset size", "duplicate rate", "use range-part"]
    )
    result = pandas.DataFrame(index=nrows, columns=columns)

    i = 0
    total_its = len(nrows) * len(duplicate_rate) * len(subset) * len(use_range_part)

    for sbs in subset:
        for nrow in nrows:
            data = {f"col{i}": np.arange(nrow) for i in range(ncols)}
            pandas_df = pandas.DataFrame(data)

            for dpr in duplicate_rate:
                pandas_df_c = pandas_df.copy()
                dupl_val = pandas_df_c.iloc[0]

                num_duplicates = int(dpr * nrow)
                dupl_indices = np.random.choice(np.arange(nrow), num_duplicates, replace=False)
                pandas_df_c.iloc[dupl_indices] = dupl_val

                for impl in use_range_part:
                    print(f"{round((i / total_its) * 100, 2)}%")
                    i += 1
                    cfg.RangePartitioning.put(impl)

                    md_df = pd.DataFrame(pandas_df_c)
                    execute(md_df)

                    t1 = timer()
                    res = md_df.drop_duplicates(subset=sbs)
                    execute(res)
                    tm = timer() - t1

                    sbs_s = len(sbs) if sbs is not None else ncols
                    print("len()", res.shape, nrow, dpr, sbs_s, impl, tm)
                    result.loc[nrow, (sbs_s, dpr, impl)] = tm
                    result.to_excel("drop_dupl.xlsx")

.. raw:: html

   </details>

Measurements with 16 cores being allocated for Modin (``MODIN_CPUS=16``):

.. image:: /img/range_partitioning_measurements/drop_duplicates_16cpus.jpg
   :align: center

Measurements with 44 cores being allocated for Modin (``MODIN_CPUS=44``):

.. image:: /img/range_partitioning_measurements/drop_duplicates_44cpus.jpg
   :align: center

.. raw:: html

   </details>


'.nunique()'
============

.. note::

    Range-partitioning approach is implemented only for ``pd.Series.nunique()`` and 1-column dataframes.
    For multi-column dataframes ``.nunique()`` can only use full-axis reduce implementation.

Range-partitioning implementation of '.nunique()'' works best when the input data size is big (more than
5_000_000 rows) and when the output size is also expected to be big (no more than 80% values are duplicates).

Under the spoiler you can find performance comparisons in different scenarios:

.. raw:: html

   <details>
   <summary><a>Performance measurements for ``.nunique()``</a></summary>

The performance was measured on randomly generated data using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores).
The `duplicate rate` shows the procentage of duplicated rows in the dataset. You can learn more about this micro-benchmark
by reading its source code:

.. raw:: html

   <details>
   <summary><a>Micro-benchmark's source code</a></summary>

.. code-block:: python

    import modin.pandas as pd
    import numpy as np
    import modin.config as cfg

    from modin.utils import execute
    from timeit import default_timer as timer
    import pandas

    cfg.CpuCount.put(16)

    def get_data(nrows, dtype):
        if dtype == int:
            return np.arange(nrows)
        elif dtype == float:
            return np.arange(nrows).astype(float)
        elif dtype == str:
            return np.array([f"value{i}" for i in range(nrows)])
        else:
            raise NotImplementedError(dtype)

    pd.DataFrame(np.arange(cfg.NPartitions.get() * cfg.MinRowPartitionSize.get())).to_numpy()

    nrows = [1_000_000, 5_000_000, 10_000_000, 25_000_000, 50_000_000, 100_000_000]
    duplicate_rate = [0, 0.1, 0.5, 0.95]
    dtypes = [int, str]
    use_range_part = [True, False]

    columns = pandas.MultiIndex.from_product([dtypes, duplicate_rate, use_range_part], names=["dtype", "duplicate rate", "use range-part"])
    result = pandas.DataFrame(index=nrows, columns=columns)

    i = 0
    total_its = len(nrows) * len(duplicate_rate) * len(dtypes) * len(use_range_part)

    for dt in dtypes:
        for nrow in nrows:
            data = get_data(nrow, dt)
            np.random.shuffle(data)
            for dpr in duplicate_rate:
                data_c = data.copy()
                dupl_val = data_c[0]

                num_duplicates = int(dpr * nrow)
                dupl_indices = np.random.choice(np.arange(nrow), num_duplicates, replace=False)
                data_c[dupl_indices] = dupl_val

                for impl in use_range_part:
                    print(f"{round((i / total_its) * 100, 2)}%")
                    i += 1
                    cfg.RangePartitioning.put(impl)

                    sr = pd.Series(data_c)
                    execute(sr)

                    t1 = timer()
                    # returns a scalar, so no need for materialization
                    res = sr.nunique()
                    tm = timer() - t1
                    print(nrow, dpr, dt, impl, tm)
                    result.loc[nrow, (dt, dpr, impl)] = tm
                    result.to_excel("nunique.xlsx")

.. raw:: html

   </details>

Measurements with 16 cores being allocated for Modin (``MODIN_CPUS=16``):

.. image:: /img/range_partitioning_measurements/nunique_16cpus.jpg
   :align: center


.. raw:: html

   </details>

Resample
========

.. note::

    Range-partitioning approach doesn't support transform-like functions (like `.interpolate()`, `.ffill()`, `.bfill()`, ...)

It is recommended to use range-partitioning for resampling if you're dealing with a dataframe that has more than
5_000_000 rows and the expected output is also expected to be big (more than 500_000 rows).

Under the spoiler you can find performance comparisons in different scenarios:

.. raw:: html

   <details>
   <summary><a>Performance measurements for ``.resample()``</a></summary>

The script below measures performance of ``df.resample(rule).sum()`` using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores).
You can learn more about this micro-benchmark by reading its source code:

.. raw:: html

   <details>
   <summary><a>Micro-benchmark's source code</a></summary>

.. code-block:: python

    import pandas
    import numpy as np
    import modin.pandas as pd
    import modin.config as cfg

    from timeit import default_timer as timer

    from modin.utils import execute

    cfg.CpuCount.put(16)

    nrows = [1_000_000, 5_000_000, 10_000_000]
    ncols = [5, 33]
    rules = [
        "500ms", # doubles nrows
        "30s", # decreases nrows in 30 times
        "5min", # decreases nrows in 300
    ]
    use_rparts = [True, False]

    cols = pandas.MultiIndex.from_product([rules, ncols, use_rparts], names=["rule", "ncols", "USE RANGE PART"])
    rres = pandas.DataFrame(index=nrows, columns=cols)

    total_nits = len(nrows) * len(ncols) * len(rules) * len(use_rparts)
    i = 0

    for nrow in nrows:
        for ncol in ncols:
            index = pandas.date_range("31/12/2000", periods=nrow, freq="s")
            data = {f"col{i}": np.arange(nrow) for i in range(ncol)}
            pd_df = pandas.DataFrame(data, index=index)
            for rule in rules:
                for rparts in use_rparts:
                    print(f"{round((i / total_nits) * 100, 2)}%")
                    i += 1
                    cfg.RangePartitioning.put(rparts)

                    df = pd.DataFrame(data, index=index)
                    execute(df)

                    t1 = timer()
                    res = df.resample(rule).sum()
                    execute(res)
                    ts = timer() - t1
                    print(nrow, ncol, rule, rparts, ts)

                    rres.loc[nrow, (rule, ncol, rparts)] = ts
                    rres.to_excel("resample.xlsx")

.. raw:: html

   </details>

Measurements with 16 cores being allocated for Modin (``MODIN_CPUS=16``):

.. image:: /img/range_partitioning_measurements/resample_16cpus.jpg
   :align: center


.. raw:: html

   </details>

pivot_table
===========

Range-partitioning implementation is automatically applied for ``df.pivot_table``
whenever possible, users can't control this.

sort_values
===========

Range-partitioning implementation is automatically applied for ``df.sort_values``
whenever possible, users can't control this.


.. _h2o join queries: https://h2oai.github.io/db-benchmark/
.. _this list: https://github.com/modin-project/modin/blob/7b233e4a920d5f03dce7a82847847b92ae7ad617/modin/core/storage_formats/pandas/groupby.py#L236-L247


================================================
FILE: environment-dev.yml
================================================
name: modin
channels:
  - conda-forge
dependencies:
  - pip

  # required dependencies
  - pandas>=2.2,<2.4
  - numpy>=1.22.4
  - fsspec>=2022.11.0
  - packaging>=21.0
  - psutil>=5.8.0

  # optional dependencies
  # NOTE Keep the ray and dask dependencies in sync with the Linux and Windows
  # Unidist environment dependencies.
  - ray-core>=2.10.0,<3
  - pyarrow>=10.0.1
  # workaround for https://github.com/conda/conda/issues/11744
  - grpcio!=1.45.*
  - grpcio!=1.46.*
  - dask>=2.22.0
  - distributed>=2.22.0
  - xarray>=2022.12.0
  - jinja2>=3.1.2
  - scipy>=1.10.0
  - s3fs>=2022.11.0
  - lxml>=4.9.2
  - openpyxl>=3.1.0
  - xlrd>=2.0.1
  - matplotlib>=3.6.3
  - sqlalchemy>=2.0.0
  - pandas-gbq>=0.19.0
  - pytables>=3.8.0
  # pymssql==2.2.8 broken: https://github.com/modin-project/modin/issues/6429
  - pymssql>=2.1.5,!=2.2.8
  - psycopg2>=2.9.6
  - fastparquet>=2022.12.0
  - tqdm>=4.60.0
  - numexpr>=2.8.4

  # dependencies for making release
  - pygithub>=v1.58.0
  - pygit2>=1.9.2

  # test dependencies
  - coverage>=7.1.0
  - moto>=4.1.0
  - pytest>=7.3.2
  - pytest-benchmark>=4.0.0
  - pytest-cov>=4.0.0
  - pytest-xdist>=3.2.0
  - typing_extensions

  # code linters
  - black>=24.1.0
  - flake8>=6.0.0
  - flake8-no-implicit-concat>=0.3.4
  - flake8-print>=5.0.0
  - mypy>=1.0.0
  - pandas-stubs>=2.0.0
  - isort>=5.12

  - pip:
      - dataframe-api-compat>=0.2.7
      - asv==0.5.1
      # no conda package for windows so we install it with pip
      - connectorx>=0.2.6a4
      - fuzzydata>=0.0.11
      # Fixes breaking ipywidgets changes, but didn't release yet.
      - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
      # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
      - numpydoc==1.6.0
      - polars


================================================
FILE: examples/data/boston_housing.csv
================================================
,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2
5,0.02985,0.0,2.18,0.0,0.458,6.43,58.7,6.0622,3.0,222.0,18.7,394.12,5.21,28.7
6,0.08829,12.5,7.87,0.0,0.524,6.012,66.6,5.5605,5.0,311.0,15.2,395.6,12.43,22.9
7,0.14455,12.5,7.87,0.0,0.524,6.172,96.1,5.9505,5.0,311.0,15.2,396.9,19.15,27.1
8,0.21124,12.5,7.87,0.0,0.524,5.631,100.0,6.0821,5.0,311.0,15.2,386.63,29.93,16.5
9,0.17004,12.5,7.87,0.0,0.524,6.004,85.9,6.5921,5.0,311.0,15.2,386.71,17.1,18.9
10,0.22489,12.5,7.87,0.0,0.524,6.377,94.3,6.3467,5.0,311.0,15.2,392.52,20.45,15.0
11,0.11747,12.5,7.87,0.0,0.524,6.009,82.9,6.2267,5.0,311.0,15.2,396.9,13.27,18.9
12,0.09378,12.5,7.87,0.0,0.524,5.889,39.0,5.4509,5.0,311.0,15.2,390.5,15.71,21.7
13,0.62976,0.0,8.14,0.0,0.538,5.949,61.8,4.7075,4.0,307.0,21.0,396.9,8.26,20.4
14,0.63796,0.0,8.14,0.0,0.538,6.096,84.5,4.4619,4.0,307.0,21.0,380.02,10.26,18.2
15,0.62739,0.0,8.14,0.0,0.538,5.834,56.5,4.4986,4.0,307.0,21.0,395.62,8.47,19.9
16,1.05393,0.0,8.14,0.0,0.538,5.935,29.3,4.4986,4.0,307.0,21.0,386.85,6.58,23.1
17,0.7842,0.0,8.14,0.0,0.538,5.99,81.7,4.2579,4.0,307.0,21.0,386.75,14.67,17.5
18,0.80271,0.0,8.14,0.0,0.538,5.456,36.6,3.7965,4.0,307.0,21.0,288.99,11.69,20.2
19,0.7258,0.0,8.14,0.0,0.538,5.727,69.5,3.7965,4.0,307.0,21.0,390.95,11.28,18.2
20,1.25179,0.0,8.14,0.0,0.538,5.57,98.1,3.7979,4.0,307.0,21.0,376.57,21.02,13.6
21,0.85204,0.0,8.14,0.0,0.538,5.965,89.2,4.0123,4.0,307.0,21.0,392.53,13.83,19.6
22,1.23247,0.0,8.14,0.0,0.538,6.142,91.7,3.9769,4.0,307.0,21.0,396.9,18.72,15.2
23,0.98843,0.0,8.14,0.0,0.538,5.813,100.0,4.0952,4.0,307.0,21.0,394.54,19.88,14.5
24,0.75026,0.0,8.14,0.0,0.538,5.924,94.1,4.3996,4.0,307.0,21.0,394.33,16.3,15.6
25,0.84054,0.0,8.14,0.0,0.538,5.599,85.7,4.4546,4.0,307.0,21.0,303.42,16.51,13.9
26,0.67191,0.0,8.14,0.0,0.538,5.813,90.3,4.682,4.0,307.0,21.0,376.88,14.81,16.6
27,0.95577,0.0,8.14,0.0,0.538,6.047,88.8,4.4534,4.0,307.0,21.0,306.38,17.28,14.8
28,0.77299,0.0,8.14,0.0,0.538,6.495,94.4,4.4547,4.0,307.0,21.0,387.94,12.8,18.4
29,1.00245,0.0,8.14,0.0,0.538,6.674,87.3,4.239,4.0,307.0,21.0,380.23,11.98,21.0
30,1.13081,0.0,8.14,0.0,0.538,5.713,94.1,4.233,4.0,307.0,21.0,360.17,22.6,12.7
31,1.35472,0.0,8.14,0.0,0.538,6.072,100.0,4.175,4.0,307.0,21.0,376.73,13.04,14.5
32,1.38799,0.0,8.14,0.0,0.538,5.95,82.0,3.99,4.0,307.0,21.0,232.6,27.71,13.2
33,1.15172,0.0,8.14,0.0,0.538,5.701,95.0,3.7872,4.0,307.0,21.0,358.77,18.35,13.1
34,1.61282,0.0,8.14,0.0,0.538,6.096,96.9,3.7598,4.0,307.0,21.0,248.31,20.34,13.5
35,0.06417,0.0,5.96,0.0,0.499,5.933,68.2,3.3603,5.0,279.0,19.2,396.9,9.68,18.9
36,0.09744,0.0,5.96,0.0,0.499,5.841,61.4,3.3779,5.0,279.0,19.2,377.56,11.41,20.0
37,0.08014,0.0,5.96,0.0,0.499,5.85,41.5,3.9342,5.0,279.0,19.2,396.9,8.77,21.0
38,0.17505,0.0,5.96,0.0,0.499,5.966,30.2,3.8473,5.0,279.0,19.2,393.43,10.13,24.7
39,0.02763,75.0,2.95,0.0,0.428,6.595,21.8,5.4011,3.0,252.0,18.3,395.63,4.32,30.8
40,0.03359,75.0,2.95,0.0,0.428,7.024,15.8,5.4011,3.0,252.0,18.3,395.62,1.98,34.9
41,0.12744,0.0,6.91,0.0,0.448,6.77,2.9,5.7209,3.0,233.0,17.9,385.41,4.84,26.6
42,0.1415,0.0,6.91,0.0,0.448,6.169,6.6,5.7209,3.0,233.0,17.9,383.37,5.81,25.3
43,0.15936,0.0,6.91,0.0,0.448,6.211,6.5,5.7209,3.0,233.0,17.9,394.46,7.44,24.7
44,0.12269,0.0,6.91,0.0,0.448,6.069,40.0,5.7209,3.0,233.0,17.9,389.39,9.55,21.2
45,0.17142,0.0,6.91,0.0,0.448,5.682,33.8,5.1004,3.0,233.0,17.9,396.9,10.21,19.3
46,0.18836,0.0,6.91,0.0,0.448,5.786,33.3,5.1004,3.0,233.0,17.9,396.9,14.15,20.0
47,0.22927,0.0,6.91,0.0,0.448,6.03,85.5,5.6894,3.0,233.0,17.9,392.74,18.8,16.6
48,0.25387,0.0,6.91,0.0,0.448,5.399,95.3,5.87,3.0,233.0,17.9,396.9,30.81,14.4
49,0.21977,0.0,6.91,0.0,0.448,5.602,62.0,6.0877,3.0,233.0,17.9,396.9,16.2,19.4
50,0.08873,21.0,5.64,0.0,0.439,5.963,45.7,6.8147,4.0,243.0,16.8,395.56,13.45,19.7
51,0.04337,21.0,5.64,0.0,0.439,6.115,63.0,6.8147,4.0,243.0,16.8,393.97,9.43,20.5
52,0.0536,21.0,5.64,0.0,0.439,6.511,21.1,6.8147,4.0,243.0,16.8,396.9,5.28,25.0
53,0.04981,21.0,5.64,0.0,0.439,5.998,21.4,6.8147,4.0,243.0,16.8,396.9,8.43,23.4
54,0.0136,75.0,4.0,0.0,0.41,5.888,47.6,7.3197,3.0,469.0,21.1,396.9,14.8,18.9
55,0.01311,90.0,1.22,0.0,0.403,7.249,21.9,8.6966,5.0,226.0,17.9,395.93,4.81,35.4
56,0.02055,85.0,0.74,0.0,0.41,6.383,35.7,9.1876,2.0,313.0,17.3,396.9,5.77,24.7
57,0.01432,100.0,1.32,0.0,0.411,6.816,40.5,8.3248,5.0,256.0,15.1,392.9,3.95,31.6
58,0.15445,25.0,5.13,0.0,0.453,6.145,29.2,7.8148,8.0,284.0,19.7,390.68,6.86,23.3
59,0.10328,25.0,5.13,0.0,0.453,5.927,47.2,6.932,8.0,284.0,19.7,396.9,9.22,19.6
60,0.14932,25.0,5.13,0.0,0.453,5.741,66.2,7.2254,8.0,284.0,19.7,395.11,13.15,18.7
61,0.17171,25.0,5.13,0.0,0.453,5.966,93.4,6.8185,8.0,284.0,19.7,378.08,14.44,16.0
62,0.11027,25.0,5.13,0.0,0.453,6.456,67.8,7.2255,8.0,284.0,19.7,396.9,6.73,22.2
63,0.1265,25.0,5.13,0.0,0.453,6.762,43.4,7.9809,8.0,284.0,19.7,395.58,9.5,25.0
64,0.01951,17.5,1.38,0.0,0.4161,7.104,59.5,9.2229,3.0,216.0,18.6,393.24,8.05,33.0
65,0.03584,80.0,3.37,0.0,0.398,6.29,17.8,6.6115,4.0,337.0,16.1,396.9,4.67,23.5
66,0.04379,80.0,3.37,0.0,0.398,5.787,31.1,6.6115,4.0,337.0,16.1,396.9,10.24,19.4
67,0.05789,12.5,6.07,0.0,0.409,5.878,21.4,6.498,4.0,345.0,18.9,396.21,8.1,22.0
68,0.13554,12.5,6.07,0.0,0.409,5.594,36.8,6.498,4.0,345.0,18.9,396.9,13.09,17.4
69,0.12816,12.5,6.07,0.0,0.409,5.885,33.0,6.498,4.0,345.0,18.9,396.9,8.79,20.9
70,0.08826,0.0,10.81,0.0,0.413,6.417,6.6,5.2873,4.0,305.0,19.2,383.73,6.72,24.2
71,0.15876,0.0,10.81,0.0,0.413,5.961,17.5,5.2873,4.0,305.0,19.2,376.94,9.88,21.7
72,0.09164,0.0,10.81,0.0,0.413,6.065,7.8,5.2873,4.0,305.0,19.2,390.91,5.52,22.8
73,0.19539,0.0,10.81,0.0,0.413,6.245,6.2,5.2873,4.0,305.0,19.2,377.17,7.54,23.4
74,0.07896,0.0,12.83,0.0,0.437,6.273,6.0,4.2515,5.0,398.0,18.7,394.92,6.78,24.1
75,0.09512,0.0,12.83,0.0,0.437,6.286,45.0,4.5026,5.0,398.0,18.7,383.23,8.94,21.4
76,0.10153,0.0,12.83,0.0,0.437,6.279,74.5,4.0522,5.0,398.0,18.7,373.66,11.97,20.0
77,0.08707,0.0,12.83,0.0,0.437,6.14,45.8,4.0905,5.0,398.0,18.7,386.96,10.27,20.8
78,0.05646,0.0,12.83,0.0,0.437,6.232,53.7,5.0141,5.0,398.0,18.7,386.4,12.34,21.2
79,0.08387,0.0,12.83,0.0,0.437,5.874,36.6,4.5026,5.0,398.0,18.7,396.06,9.1,20.3
80,0.04113,25.0,4.86,0.0,0.426,6.727,33.5,5.4007,4.0,281.0,19.0,396.9,5.29,28.0
81,0.04462,25.0,4.86,0.0,0.426,6.619,70.4,5.4007,4.0,281.0,19.0,395.63,7.22,23.9
82,0.03659,25.0,4.86,0.0,0.426,6.302,32.2,5.4007,4.0,281.0,19.0,396.9,6.72,24.8
83,0.03551,25.0,4.86,0.0,0.426,6.167,46.7,5.4007,4.0,281.0,19.0,390.64,7.51,22.9
84,0.05059,0.0,4.49,0.0,0.449,6.389,48.0,4.7794,3.0,247.0,18.5,396.9,9.62,23.9
85,0.05735,0.0,4.49,0.0,0.449,6.63,56.1,4.4377,3.0,247.0,18.5,392.3,6.53,26.6
86,0.05188,0.0,4.49,0.0,0.449,6.015,45.1,4.4272,3.0,247.0,18.5,395.99,12.86,22.5
87,0.07151,0.0,4.49,0.0,0.449,6.121,56.8,3.7476,3.0,247.0,18.5,395.15,8.44,22.2
88,0.0566,0.0,3.41,0.0,0.489,7.007,86.3,3.4217,2.0,270.0,17.8,396.9,5.5,23.6
89,0.05302,0.0,3.41,0.0,0.489,7.079,63.1,3.4145,2.0,270.0,17.8,396.06,5.7,28.7
90,0.04684,0.0,3.41,0.0,0.489,6.417,66.1,3.0923,2.0,270.0,17.8,392.18,8.81,22.6
91,0.03932,0.0,3.41,0.0,0.489,6.405,73.9,3.0921,2.0,270.0,17.8,393.55,8.2,22.0
92,0.04203,28.0,15.04,0.0,0.464,6.442,53.6,3.6659,4.0,270.0,18.2,395.01,8.16,22.9
93,0.02875,28.0,15.04,0.0,0.464,6.211,28.9,3.6659,4.0,270.0,18.2,396.33,6.21,25.0
94,0.04294,28.0,15.04,0.0,0.464,6.249,77.3,3.615,4.0,270.0,18.2,396.9,10.59,20.6
95,0.12204,0.0,2.89,0.0,0.445,6.625,57.8,3.4952,2.0,276.0,18.0,357.98,6.65,28.4
96,0.11504,0.0,2.89,0.0,0.445,6.163,69.6,3.4952,2.0,276.0,18.0,391.83,11.34,21.4
97,0.12083,0.0,2.89,0.0,0.445,8.069,76.0,3.4952,2.0,276.0,18.0,396.9,4.21,38.7
98,0.08187,0.0,2.89,0.0,0.445,7.82,36.9,3.4952,2.0,276.0,18.0,393.53,3.57,43.8
99,0.0686,0.0,2.89,0.0,0.445,7.416,62.5,3.4952,2.0,276.0,18.0,396.9,6.19,33.2
100,0.14866,0.0,8.56,0.0,0.52,6.727,79.9,2.7778,5.0,384.0,20.9,394.76,9.42,27.5
101,0.11432,0.0,8.56,0.0,0.52,6.781,71.3,2.8561,5.0,384.0,20.9,395.58,7.67,26.5
102,0.22876,0.0,8.56,0.0,0.52,6.405,85.4,2.7147,5.0,384.0,20.9,70.8,10.63,18.6
103,0.21161,0.0,8.56,0.0,0.52,6.137,87.4,2.7147,5.0,384.0,20.9,394.47,13.44,19.3
104,0.1396,0.0,8.56,0.0,0.52,6.167,90.0,2.421,5.0,384.0,20.9,392.69,12.33,20.1
105,0.13262,0.0,8.56,0.0,0.52,5.851,96.7,2.1069,5.0,384.0,20.9,394.05,16.47,19.5
106,0.1712,0.0,8.56,0.0,0.52,5.836,91.9,2.211,5.0,384.0,20.9,395.67,18.66,19.5
107,0.13117,0.0,8.56,0.0,0.52,6.127,85.2,2.1224,5.0,384.0,20.9,387.69,14.09,20.4
108,0.12802,0.0,8.56,0.0,0.52,6.474,97.1,2.4329,5.0,384.0,20.9,395.24,12.27,19.8
109,0.26363,0.0,8.56,0.0,0.52,6.229,91.2,2.5451,5.0,384.0,20.9,391.23,15.55,19.4
110,0.10793,0.0,8.56,0.0,0.52,6.195,54.4,2.7778,5.0,384.0,20.9,393.49,13.0,21.7
111,0.10084,0.0,10.01,0.0,0.547,6.715,81.6,2.6775,6.0,432.0,17.8,395.59,10.16,22.8
112,0.12329,0.0,10.01,0.0,0.547,5.913,92.9,2.3534,6.0,432.0,17.8,394.95,16.21,18.8
113,0.22212,0.0,10.01,0.0,0.547,6.092,95.4,2.548,6.0,432.0,17.8,396.9,17.09,18.7
114,0.14231,0.0,10.01,0.0,0.547,6.254,84.2,2.2565,6.0,432.0,17.8,388.74,10.45,18.5
115,0.17134,0.0,10.01,0.0,0.547,5.928,88.2,2.4631,6.0,432.0,17.8,344.91,15.76,18.3
116,0.13158,0.0,10.01,0.0,0.547,6.176,72.5,2.7301,6.0,432.0,17.8,393.3,12.04,21.2
117,0.15098,0.0,10.01,0.0,0.547,6.021,82.6,2.7474,6.0,432.0,17.8,394.51,10.3,19.2
118,0.13058,0.0,10.01,0.0,0.547,5.872,73.1,2.4775,6.0,432.0,17.8,338.63,15.37,20.4
119,0.14476,0.0,10.01,0.0,0.547,5.731,65.2,2.7592,6.0,432.0,17.8,391.5,13.61,19.3
120,0.06899,0.0,25.65,0.0,0.581,5.87,69.7,2.2577,2.0,188.0,19.1,389.15,14.37,22.0
121,0.07165,0.0,25.65,0.0,0.581,6.004,84.1,2.1974,2.0,188.0,19.1,377.67,14.27,20.3
122,0.09299,0.0,25.65,0.0,0.581,5.961,92.9,2.0869,2.0,188.0,19.1,378.09,17.93,20.5
123,0.15038,0.0,25.65,0.0,0.581,5.856,97.0,1.9444,2.0,188.0,19.1,370.31,25.41,17.3
124,0.09849,0.0,25.65,0.0,0.581,5.879,95.8,2.0063,2.0,188.0,19.1,379.38,17.58,18.8
125,0.16902,0.0,25.65,0.0,0.581,5.986,88.4,1.9929,2.0,188.0,19.1,385.02,14.81,21.4
126,0.38735,0.0,25.65,0.0,0.581,5.613,95.6,1.7572,2.0,188.0,19.1,359.29,27.26,15.7
127,0.25915,0.0,21.89,0.0,0.624,5.693,96.0,1.7883,4.0,437.0,21.2,392.11,17.19,16.2
128,0.32543,0.0,21.89,0.0,0.624,6.431,98.8,1.8125,4.0,437.0,21.2,396.9,15.39,18.0
129,0.88125,0.0,21.89,0.0,0.624,5.637,94.7,1.9799,4.0,437.0,21.2,396.9,18.34,14.3
130,0.34006,0.0,21.89,0.0,0.624,6.458,98.9,2.1185,4.0,437.0,21.2,395.04,12.6,19.2
131,1.19294,0.0,21.89,0.0,0.624,6.326,97.7,2.271,4.0,437.0,21.2,396.9,12.26,19.6
132,0.59005,0.0,21.89,0.0,0.624,6.372,97.9,2.3274,4.0,437.0,21.2,385.76,11.12,23.0
133,0.32982,0.0,21.89,0.0,0.624,5.822,95.4,2.4699,4.0,437.0,21.2,388.69,15.03,18.4
134,0.97617,0.0,21.89,0.0,0.624,5.757,98.4,2.346,4.0,437.0,21.2,262.76,17.31,15.6
135,0.55778,0.0,21.89,0.0,0.624,6.335,98.2,2.1107,4.0,437.0,21.2,394.67,16.96,18.1
136,0.32264,0.0,21.89,0.0,0.624,5.942,93.5,1.9669,4.0,437.0,21.2,378.25,16.9,17.4
137,0.35233,0.0,21.89,0.0,0.624,6.454,98.4,1.8498,4.0,437.0,21.2,394.08,14.59,17.1
138,0.2498,0.0,21.89,0.0,0.624,5.857,98.2,1.6686,4.0,437.0,21.2,392.04,21.32,13.3
139,0.54452,0.0,21.89,0.0,0.624,6.151,97.9,1.6687,4.0,437.0,21.2,396.9,18.46,17.8
140,0.2909,0.0,21.89,0.0,0.624,6.174,93.6,1.6119,4.0,437.0,21.2,388.08,24.16,14.0
141,1.62864,0.0,21.89,0.0,0.624,5.019,100.0,1.4394,4.0,437.0,21.2,396.9,34.41,14.4
142,3.32105,0.0,19.58,1.0,0.871,5.403,100.0,1.3216,5.0,403.0,14.7,396.9,26.82,13.4
143,4.0974,0.0,19.58,0.0,0.871,5.468,100.0,1.4118,5.0,403.0,14.7,396.9,26.42,15.6
144,2.77974,0.0,19.58,0.0,0.871,4.903,97.8,1.3459,5.0,403.0,14.7,396.9,29.29,11.8
145,2.37934,0.0,19.58,0.0,0.871,6.13,100.0,1.4191,5.0,403.0,14.7,172.91,27.8,13.8
146,2.15505,0.0,19.58,0.0,0.871,5.628,100.0,1.5166,5.0,403.0,14.7,169.27,16.65,15.6
147,2.36862,0.0,19.58,0.0,0.871,4.926,95.7,1.4608,5.0,403.0,14.7,391.71,29.53,14.6
148,2.33099,0.0,19.58,0.0,0.871,5.186,93.8,1.5296,5.0,403.0,14.7,356.99,28.32,17.8
149,2.73397,0.0,19.58,0.0,0.871,5.597,94.9,1.5257,5.0,403.0,14.7,351.85,21.45,15.4
150,1.6566,0.0,19.58,0.0,0.871,6.122,97.3,1.618,5.0,403.0,14.7,372.8,14.1,21.5
151,1.49632,0.0,19.58,0.0,0.871,5.404,100.0,1.5916,5.0,403.0,14.7,341.6,13.28,19.6
152,1.12658,0.0,19.58,1.0,0.871,5.012,88.0,1.6102,5.0,403.0,14.7,343.28,12.12,15.3
153,2.14918,0.0,19.58,0.0,0.871,5.709,98.5,1.6232,5.0,403.0,14.7,261.95,15.79,19.4
154,1.41385,0.0,19.58,1.0,0.871,6.129,96.0,1.7494,5.0,403.0,14.7,321.02,15.12,17.0
155,3.53501,0.0,19.58,1.0,0.871,6.152,82.6,1.7455,5.0,403.0,14.7,88.01,15.02,15.6
156,2.44668,0.0,19.58,0.0,0.871,5.272,94.0,1.7364,5.0,403.0,14.7,88.63,16.14,13.1
157,1.22358,0.0,19.58,0.0,0.605,6.943,97.4,1.8773,5.0,403.0,14.7,363.43,4.59,41.3
158,1.34284,0.0,19.58,0.0,0.605,6.066,100.0,1.7573,5.0,403.0,14.7,353.89,6.43,24.3
159,1.42502,0.0,19.58,0.0,0.871,6.51,100.0,1.7659,5.0,403.0,14.7,364.31,7.39,23.3
160,1.27346,0.0,19.58,1.0,0.605,6.25,92.6,1.7984,5.0,403.0,14.7,338.92,5.5,27.0
161,1.46336,0.0,19.58,0.0,0.605,7.489,90.8,1.9709,5.0,403.0,14.7,374.43,1.73,50.0
162,1.83377,0.0,19.58,1.0,0.605,7.802,98.2,2.0407,5.0,403.0,14.7,389.61,1.92,50.0
163,1.51902,0.0,19.58,1.0,0.605,8.375,93.9,2.162,5.0,403.0,14.7,388.45,3.32,50.0
164,2.24236,0.0,19.58,0.0,0.605,5.854,91.8,2.422,5.0,403.0,14.7,395.11,11.64,22.7
165,2.924,0.0,19.58,0.0,0.605,6.101,93.0,2.2834,5.0,403.0,14.7,240.16,9.81,25.0
166,2.01019,0.0,19.58,0.0,0.605,7.929,96.2,2.0459,5.0,403.0,14.7,369.3,3.7,50.0
167,1.80028,0.0,19.58,0.0,0.605,5.877,79.2,2.4259,5.0,403.0,14.7,227.61,12.14,23.8
168,2.3004,0.0,19.58,0.0,0.605,6.319,96.1,2.1,5.0,403.0,14.7,297.09,11.1,23.8
169,2.44953,0.0,19.58,0.0,0.605,6.402,95.2,2.2625,5.0,403.0,14.7,330.04,11.32,22.3
170,1.20742,0.0,19.58,0.0,0.605,5.875,94.6,2.4259,5.0,403.0,14.7,292.29,14.43,17.4
171,2.3139,0.0,19.58,0.0,0.605,5.88,97.3,2.3887,5.0,403.0,14.7,348.13,12.03,19.1
172,0.13914,0.0,4.05,0.0,0.51,5.572,88.5,2.5961,5.0,296.0,16.6,396.9,14.69,23.1
173,0.09178,0.0,4.05,0.0,0.51,6.416,84.1,2.6463,5.0,296.0,16.6,395.5,9.04,23.6
174,0.08447,0.0,4.05,0.0,0.51,5.859,68.7,2.7019,5.0,296.0,16.6,393.23,9.64,22.6
175,0.06664,0.0,4.05,0.0,0.51,6.546,33.1,3.1323,5.0,296.0,16.6,390.96,5.33,29.4
176,0.07022,0.0,4.05,0.0,0.51,6.02,47.2,3.5549,5.0,296.0,16.6,393.23,10.11,23.2
177,0.05425,0.0,4.05,0.0,0.51,6.315,73.4,3.3175,5.0,296.0,16.6,395.6,6.29,24.6
178,0.06642,0.0,4.05,0.0,0.51,6.86,74.4,2.9153,5.0,296.0,16.6,391.27,6.92,29.9
179,0.0578,0.0,2.46,0.0,0.488,6.98,58.4,2.829,3.0,193.0,17.8,396.9,5.04,37.2
180,0.06588,0.0,2.46,0.0,0.488,7.765,83.3,2.741,3.0,193.0,17.8,395.56,7.56,39.8
181,0.06888,0.0,2.46,0.0,0.488,6.144,62.2,2.5979,3.0,193.0,17.8,396.9,9.45,36.2
182,0.09103,0.0,2.46,0.0,0.488,7.155,92.2,2.7006,3.0,193.0,17.8,394.12,4.82,37.9
183,0.10008,0.0,2.46,0.0,0.488,6.563,95.6,2.847,3.0,193.0,17.8,396.9,5.68,32.5
184,0.08308,0.0,2.46,0.0,0.488,5.604,89.8,2.9879,3.0,193.0,17.8,391.0,13.98,26.4
185,0.06047,0.0,2.46,0.0,0.488,6.153,68.8,3.2797,3.0,193.0,17.8,387.11,13.15,29.6
186,0.05602,0.0,2.46,0.0,0.488,7.831,53.6,3.1992,3.0,193.0,17.8,392.63,4.45,50.0
187,0.07875,45.0,3.44,0.0,0.437,6.782,41.1,3.7886,5.0,398.0,15.2,393.87,6.68,32.0
188,0.12579,45.0,3.44,0.0,0.437,6.556,29.1,4.5667,5.0,398.0,15.2,382.84,4.56,29.8
189,0.0837,45.0,3.44,0.0,0.437,7.185,38.9,4.5667,5.0,398.0,15.2,396.9,5.39,34.9
190,0.09068,45.0,3.44,0.0,0.437,6.951,21.5,6.4798,5.0,398.0,15.2,377.68,5.1,37.0
191,0.06911,45.0,3.44,0.0,0.437,6.739,30.8,6.4798,5.0,398.0,15.2,389.71,4.69,30.5
192,0.08664,45.0,3.44,0.0,0.437,7.178,26.3,6.4798,5.0,398.0,15.2,390.49,2.87,36.4
193,0.02187,60.0,2.93,0.0,0.401,6.8,9.9,6.2196,1.0,265.0,15.6,393.37,5.03,31.1
194,0.01439,60.0,2.93,0.0,0.401,6.604,18.8,6.2196,1.0,265.0,15.6,376.7,4.38,29.1
195,0.01381,80.0,0.46,0.0,0.422,7.875,32.0,5.6484,4.0,255.0,14.4,394.23,2.97,50.0
196,0.04011,80.0,1.52,0.0,0.404,7.287,34.1,7.309,2.0,329.0,12.6,396.9,4.08,33.3
197,0.04666,80.0,1.52,0.0,0.404,7.107,36.6,7.309,2.0,329.0,12.6,354.31,8.61,30.3
198,0.03768,80.0,1.52,0.0,0.404,7.274,38.3,7.309,2.0,329.0,12.6,392.2,6.62,34.6
199,0.0315,95.0,1.47,0.0,0.403,6.975,15.3,7.6534,3.0,402.0,17.0,396.9,4.56,34.9
200,0.01778,95.0,1.47,0.0,0.403,7.135,13.9,7.6534,3.0,402.0,17.0,384.3,4.45,32.9
201,0.03445,82.5,2.03,0.0,0.415,6.162,38.4,6.27,2.0,348.0,14.7,393.77,7.43,24.1
202,0.02177,82.5,2.03,0.0,0.415,7.61,15.7,6.27,2.0,348.0,14.7,395.38,3.11,42.3
203,0.0351,95.0,2.68,0.0,0.4161,7.853,33.2,5.118,4.0,224.0,14.7,392.78,3.81,48.5
204,0.02009,95.0,2.68,0.0,0.4161,8.034,31.9,5.118,4.0,224.0,14.7,390.55,2.88,50.0
205,0.13642,0.0,10.59,0.0,0.489,5.891,22.3,3.9454,4.0,277.0,18.6,396.9,10.87,22.6
206,0.22969,0.0,10.59,0.0,0.489,6.326,52.5,4.3549,4.0,277.0,18.6,394.87,10.97,24.4
207,0.25199,0.0,10.59,0.0,0.489,5.783,72.7,4.3549,4.0,277.0,18.6,389.43,18.06,22.5
208,0.13587,0.0,10.59,1.0,0.489,6.064,59.1,4.2392,4.0,277.0,18.6,381.32,14.66,24.4
209,0.43571,0.0,10.59,1.0,0.489,5.344,100.0,3.875,4.0,277.0,18.6,396.9,23.09,20.0
210,0.17446,0.0,10.59,1.0,0.489,5.96,92.1,3.8771,4.0,277.0,18.6,393.25,17.27,21.7
211,0.37578,0.0,10.59,1.0,0.489,5.404,88.6,3.665,4.0,277.0,18.6,395.24,23.98,19.3
212,0.21719,0.0,10.59,1.0,0.489,5.807,53.8,3.6526,4.0,277.0,18.6,390.94,16.03,22.4
213,0.14052,0.0,10.59,0.0,0.489,6.375,32.3,3.9454,4.0,277.0,18.6,385.81,9.38,28.1
214,0.28955,0.0,10.59,0.0,0.489,5.412,9.8,3.5875,4.0,277.0,18.6,348.93,29.55,23.7
215,0.19802,0.0,10.59,0.0,0.489,6.182,42.4,3.9454,4.0,277.0,18.6,393.63,9.47,25.0
216,0.0456,0.0,13.89,1.0,0.55,5.888,56.0,3.1121,5.0,276.0,16.4,392.8,13.51,23.3
217,0.07013,0.0,13.89,0.0,0.55,6.642,85.1,3.4211,5.0,276.0,16.4,392.78,9.69,28.7
218,0.11069,0.0,13.89,1.0,0.55,5.951,93.8,2.8893,5.0,276.0,16.4,396.9,17.92,21.5
219,0.11425,0.0,13.89,1.0,0.55,6.373,92.4,3.3633,5.0,276.0,16.4,393.74,10.5,23.0
220,0.35809,0.0,6.2,1.0,0.507,6.951,88.5,2.8617,8.0,307.0,17.4,391.7,9.71,26.7
221,0.40771,0.0,6.2,1.0,0.507,6.164,91.3,3.048,8.0,307.0,17.4,395.24,21.46,21.7
222,0.62356,0.0,6.2,1.0,0.507,6.879,77.7,3.2721,8.0,307.0,17.4,390.39,9.93,27.5
223,0.6147,0.0,6.2,0.0,0.507,6.618,80.8,3.2721,8.0,307.0,17.4,396.9,7.6,30.1
224,0.31533,0.0,6.2,0.0,0.504,8.266,78.3,2.8944,8.0,307.0,17.4,385.05,4.14,44.8
225,0.52693,0.0,6.2,0.0,0.504,8.725,83.0,2.8944,8.0,307.0,17.4,382.0,4.63,50.0
226,0.38214,0.0,6.2,0.0,0.504,8.04,86.5,3.2157,8.0,307.0,17.4,387.38,3.13,37.6
227,0.41238,0.0,6.2,0.0,0.504,7.163,79.9,3.2157,8.0,307.0,17.4,372.08,6.36,31.6
228,0.29819,0.0,6.2,0.0,0.504,7.686,17.0,3.3751,8.0,307.0,17.4,377.51,3.92,46.7
229,0.44178,0.0,6.2,0.0,0.504,6.552,21.4,3.3751,8.0,307.0,17.4,380.34,3.76,31.5
230,0.537,0.0,6.2,0.0,0.504,5.981,68.1,3.6715,8.0,307.0,17.4,378.35,11.65,24.3
231,0.46296,0.0,6.2,0.0,0.504,7.412,76.9,3.6715,8.0,307.0,17.4,376.14,5.25,31.7
232,0.57529,0.0,6.2,0.0,0.507,8.337,73.3,3.8384,8.0,307.0,17.4,385.91,2.47,41.7
233,0.33147,0.0,6.2,0.0,0.507,8.247,70.4,3.6519,8.0,307.0,17.4,378.95,3.95,48.3
234,0.44791,0.0,6.2,1.0,0.507,6.726,66.5,3.6519,8.0,307.0,17.4,360.2,8.05,29.0
235,0.33045,0.0,6.2,0.0,0.507,6.086,61.5,3.6519,8.0,307.0,17.4,376.75,10.88,24.0
236,0.52058,0.0,6.2,1.0,0.507,6.631,76.5,4.148,8.0,307.0,17.4,388.45,9.54,25.1
237,0.51183,0.0,6.2,0.0,0.507,7.358,71.6,4.148,8.0,307.0,17.4,390.07,4.73,31.5
238,0.08244,30.0,4.93,0.0,0.428,6.481,18.5,6.1899,6.0,300.0,16.6,379.41,6.36,23.7
239,0.09252,30.0,4.93,0.0,0.428,6.606,42.2,6.1899,6.0,300.0,16.6,383.78,7.37,23.3
240,0.11329,30.0,4.93,0.0,0.428,6.897,54.3,6.3361,6.0,300.0,16.6,391.25,11.38,22.0
241,0.10612,30.0,4.93,0.0,0.428,6.095,65.1,6.3361,6.0,300.0,16.6,394.62,12.4,20.1
242,0.1029,30.0,4.93,0.0,0.428,6.358,52.9,7.0355,6.0,300.0,16.6,372.75,11.22,22.2
243,0.12757,30.0,4.93,0.0,0.428,6.393,7.8,7.0355,6.0,300.0,16.6,374.71,5.19,23.7
244,0.20608,22.0,5.86,0.0,0.431,5.593,76.5,7.9549,7.0,330.0,19.1,372.49,12.5,17.6
245,0.19133,22.0,5.86,0.0,0.431,5.605,70.2,7.9549,7.0,330.0,19.1,389.13,18.46,18.5
246,0.33983,22.0,5.86,0.0,0.431,6.108,34.9,8.0555,7.0,330.0,19.1,390.18,9.16,24.3
247,0.19657,22.0,5.86,0.0,0.431,6.226,79.2,8.0555,7.0,330.0,19.1,376.14,10.15,20.5
248,0.16439,22.0,5.86,0.0,0.431,6.433,49.1,7.8265,7.0,330.0,19.1,374.71,9.52,24.5
249,0.19073,22.0,5.86,0.0,0.431,6.718,17.5,7.8265,7.0,330.0,19.1,393.74,6.56,26.2
250,0.1403,22.0,5.86,0.0,0.431,6.487,13.0,7.3967,7.0,330.0,19.1,396.28,5.9,24.4
251,0.21409,22.0,5.86,0.0,0.431,6.438,8.9,7.3967,7.0,330.0,19.1,377.07,3.59,24.8
252,0.08221,22.0,5.86,0.0,0.431,6.957,6.8,8.9067,7.0,330.0,19.1,386.09,3.53,29.6
253,0.36894,22.0,5.86,0.0,0.431,8.259,8.4,8.9067,7.0,330.0,19.1,396.9,3.54,42.8
254,0.04819,80.0,3.64,0.0,0.392,6.108,32.0,9.2203,1.0,315.0,16.4,392.89,6.57,21.9
255,0.03548,80.0,3.64,0.0,0.392,5.876,19.1,9.2203,1.0,315.0,16.4,395.18,9.25,20.9
256,0.01538,90.0,3.75,0.0,0.394,7.454,34.2,6.3361,3.0,244.0,15.9,386.34,3.11,44.0
257,0.61154,20.0,3.97,0.0,0.647,8.704,86.9,1.801,5.0,264.0,13.0,389.7,5.12,50.0
258,0.66351,20.0,3.97,0.0,0.647,7.333,100.0,1.8946,5.0,264.0,13.0,383.29,7.79,36.0
259,0.65665,20.0,3.97,0.0,0.647,6.842,100.0,2.0107,5.0,264.0,13.0,391.93,6.9,30.1
260,0.54011,20.0,3.97,0.0,0.647,7.203,81.8,2.1121,5.0,264.0,13.0,392.8,9.59,33.8
261,0.53412,20.0,3.97,0.0,0.647,7.52,89.4,2.1398,5.0,264.0,13.0,388.37,7.26,43.1
262,0.52014,20.0,3.97,0.0,0.647,8.398,91.5,2.2885,5.0,264.0,13.0,386.86,5.91,48.8
263,0.82526,20.0,3.97,0.0,0.647,7.327,94.5,2.0788,5.0,264.0,13.0,393.42,11.25,31.0
264,0.55007,20.0,3.97,0.0,0.647,7.206,91.6,1.9301,5.0,264.0,13.0,387.89,8.1,36.5
265,0.76162,20.0,3.97,0.0,0.647,5.56,62.8,1.9865,5.0,264.0,13.0,392.4,10.45,22.8
266,0.7857,20.0,3.97,0.0,0.647,7.014,84.6,2.1329,5.0,264.0,13.0,384.07,14.79,30.7
267,0.57834,20.0,3.97,0.0,0.575,8.297,67.0,2.4216,5.0,264.0,13.0,384.54,7.44,50.0
268,0.5405,20.0,3.97,0.0,0.575,7.47,52.6,2.872,5.0,264.0,13.0,390.3,3.16,43.5
269,0.09065,20.0,6.96,1.0,0.464,5.92,61.5,3.9175,3.0,223.0,18.6,391.34,13.65,20.7
270,0.29916,20.0,6.96,0.0,0.464,5.856,42.1,4.429,3.0,223.0,18.6,388.65,13.0,21.1
271,0.16211,20.0,6.96,0.0,0.464,6.24,16.3,4.429,3.0,223.0,18.6,396.9,6.59,25.2
272,0.1146,20.0,6.96,0.0,0.464,6.538,58.7,3.9175,3.0,223.0,18.6,394.96,7.73,24.4
273,0.22188,20.0,6.96,1.0,0.464,7.691,51.8,4.3665,3.0,223.0,18.6,390.77,6.58,35.2
274,0.05644,40.0,6.41,1.0,0.447,6.758,32.9,4.0776,4.0,254.0,17.6,396.9,3.53,32.4
275,0.09604,40.0,6.41,0.0,0.447,6.854,42.8,4.2673,4.0,254.0,17.6,396.9,2.98,32.0
276,0.10469,40.0,6.41,1.0,0.447,7.267,49.0,4.7872,4.0,254.0,17.6,389.25,6.05,33.2
277,0.06127,40.0,6.41,1.0,0.447,6.826,27.6,4.8628,4.0,254.0,17.6,393.45,4.16,33.1
278,0.07978,40.0,6.41,0.0,0.447,6.482,32.1,4.1403,4.0,254.0,17.6,396.9,7.19,29.1
279,0.21038,20.0,3.33,0.0,0.4429,6.812,32.2,4.1007,5.0,216.0,14.9,396.9,4.85,35.1
280,0.03578,20.0,3.33,0.0,0.4429,7.82,64.5,4.6947,5.0,216.0,14.9,387.31,3.76,45.4
281,0.03705,20.0,3.33,0.0,0.4429,6.968,37.2,5.2447,5.0,216.0,14.9,392.23,4.59,35.4
282,0.06129,20.0,3.33,1.0,0.4429,7.645,49.7,5.2119,5.0,216.0,14.9,377.07,3.01,46.0
283,0.01501,90.0,1.21,1.0,0.401,7.923,24.8,5.885,1.0,198.0,13.6,395.52,3.16,50.0
284,0.00906,90.0,2.97,0.0,0.4,7.088,20.8,7.3073,1.0,285.0,15.3,394.72,7.85,32.2
285,0.01096,55.0,2.25,0.0,0.389,6.453,31.9,7.3073,1.0,300.0,15.3,394.72,8.23,22.0
286,0.01965,80.0,1.76,0.0,0.385,6.23,31.5,9.0892,1.0,241.0,18.2,341.6,12.93,20.1
287,0.03871,52.5,5.32,0.0,0.405,6.209,31.3,7.3172,6.0,293.0,16.6,396.9,7.14,23.2
288,0.0459,52.5,5.32,0.0,0.405,6.315,45.6,7.3172,6.0,293.0,16.6,396.9,7.6,22.3
289,0.04297,52.5,5.32,0.0,0.405,6.565,22.9,7.3172,6.0,293.0,16.6,371.72,9.51,24.8
290,0.03502,80.0,4.95,0.0,0.411,6.861,27.9,5.1167,4.0,245.0,19.2,396.9,3.33,28.5
291,0.07886,80.0,4.95,0.0,0.411,7.148,27.7,5.1167,4.0,245.0,19.2,396.9,3.56,37.3
292,0.03615,80.0,4.95,0.0,0.411,6.63,23.4,5.1167,4.0,245.0,19.2,396.9,4.7,27.9
293,0.08265,0.0,13.92,0.0,0.437,6.127,18.4,5.5027,4.0,289.0,16.0,396.9,8.58,23.9
294,0.08199,0.0,13.92,0.0,0.437,6.009,42.3,5.5027,4.0,289.0,16.0,396.9,10.4,21.7
295,0.12932,0.0,13.92,0.0,0.437,6.678,31.1,5.9604,4.0,289.0,16.0,396.9,6.27,28.6
296,0.05372,0.0,13.92,0.0,0.437,6.549,51.0,5.9604,4.0,289.0,16.0,392.85,7.39,27.1
297,0.14103,0.0,13.92,0.0,0.437,5.79,58.0,6.32,4.0,289.0,16.0,396.9,15.84,20.3
298,0.06466,70.0,2.24,0.0,0.4,6.345,20.1,7.8278,5.0,358.0,14.8,368.24,4.97,22.5
299,0.05561,70.0,2.24,0.0,0.4,7.041,10.0,7.8278,5.0,358.0,14.8,371.58,4.74,29.0
300,0.04417,70.0,2.24,0.0,0.4,6.871,47.4,7.8278,5.0,358.0,14.8,390.86,6.07,24.8
301,0.03537,34.0,6.09,0.0,0.433,6.59,40.4,5.4917,7.0,329.0,16.1,395.75,9.5,22.0
302,0.09266,34.0,6.09,0.0,0.433,6.495,18.4,5.4917,7.0,329.0,16.1,383.61,8.67,26.4
303,0.1,34.0,6.09,0.0,0.433,6.982,17.7,5.4917,7.0,329.0,16.1,390.43,4.86,33.1
304,0.05515,33.0,2.18,0.0,0.472,7.236,41.1,4.022,7.0,222.0,18.4,393.68,6.93,36.1
305,0.05479,33.0,2.18,0.0,0.472,6.616,58.1,3.37,7.0,222.0,18.4,393.36,8.93,28.4
306,0.07503,33.0,2.18,0.0,0.472,7.42,71.9,3.0992,7.0,222.0,18.4,396.9,6.47,33.4
307,0.04932,33.0,2.18,0.0,0.472,6.849,70.3,3.1827,7.0,222.0,18.4,396.9,7.53,28.2
308,0.49298,0.0,9.9,0.0,0.544,6.635,82.5,3.3175,4.0,304.0,18.4,396.9,4.54,22.8
309,0.3494,0.0,9.9,0.0,0.544,5.972,76.7,3.1025,4.0,304.0,18.4,396.24,9.97,20.3
310,2.63548,0.0,9.9,0.0,0.544,4.973,37.8,2.5194,4.0,304.0,18.4,350.45,12.64,16.1
311,0.79041,0.0,9.9,0.0,0.544,6.122,52.8,2.6403,4.0,304.0,18.4,396.9,5.98,22.1
312,0.26169,0.0,9.9,0.0,0.544,6.023,90.4,2.834,4.0,304.0,18.4,396.3,11.72,19.4
313,0.26938,0.0,9.9,0.0,0.544,6.266,82.8,3.2628,4.0,304.0,18.4,393.39,7.9,21.6
314,0.3692,0.0,9.9,0.0,0.544,6.567,87.3,3.6023,4.0,304.0,18.4,395.69,9.28,23.8
315,0.25356,0.0,9.9,0.0,0.544,5.705,77.7,3.945,4.0,304.0,18.4,396.42,11.5,16.2
316,0.31827,0.0,9.9,0.0,0.544,5.914,83.2,3.9986,4.0,304.0,18.4,390.7,18.33,17.8
317,0.24522,0.0,9.9,0.0,0.544,5.782,71.7,4.0317,4.0,304.0,18.4,396.9,15.94,19.8
318,0.40202,0.0,9.9,0.0,0.544,6.382,67.2,3.5325,4.0,304.0,18.4,395.21,10.36,23.1
319,0.47547,0.0,9.9,0.0,0.544,6.113,58.8,4.0019,4.0,304.0,18.4,396.23,12.73,21.0
320,0.1676,0.0,7.38,0.0,0.493,6.426,52.3,4.5404,5.0,287.0,19.6,396.9,7.2,23.8
321,0.18159,0.0,7.38,0.0,0.493,6.376,54.3,4.5404,5.0,287.0,19.6,396.9,6.87,23.1
322,0.35114,0.0,7.38,0.0,0.493,6.041,49.9,4.7211,5.0,287.0,19.6,396.9,7.7,20.4
323,0.28392,0.0,7.38,0.0,0.493,5.708,74.3,4.7211,5.0,287.0,19.6,391.13,11.74,18.5
324,0.34109,0.0,7.38,0.0,0.493,6.415,40.1,4.7211,5.0,287.0,19.6,396.9,6.12,25.0
325,0.19186,0.0,7.38,0.0,0.493,6.431,14.7,5.4159,5.0,287.0,19.6,393.68,5.08,24.6
326,0.30347,0.0,7.38,0.0,0.493,6.312,28.9,5.4159,5.0,287.0,19.6,396.9,6.15,23.0
327,0.24103,0.0,7.38,0.0,0.493,6.083,43.7,5.4159,5.0,287.0,19.6,396.9,12.79,22.2
328,0.06617,0.0,3.24,0.0,0.46,5.868,25.8,5.2146,4.0,430.0,16.9,382.44,9.97,19.3
329,0.06724,0.0,3.24,0.0,0.46,6.333,17.2,5.2146,4.0,430.0,16.9,375.21,7.34,22.6
330,0.04544,0.0,3.24,0.0,0.46,6.144,32.2,5.8736,4.0,430.0,16.9,368.57,9.09,19.8
331,0.05023,35.0,6.06,0.0,0.4379,5.706,28.4,6.6407,1.0,304.0,16.9,394.02,12.43,17.1
332,0.03466,35.0,6.06,0.0,0.4379,6.031,23.3,6.6407,1.0,304.0,16.9,362.25,7.83,19.4
333,0.05083,0.0,5.19,0.0,0.515,6.316,38.1,6.4584,5.0,224.0,20.2,389.71,5.68,22.2
334,0.03738,0.0,5.19,0.0,0.515,6.31,38.5,6.4584,5.0,224.0,20.2,389.4,6.75,20.7
335,0.03961,0.0,5.19,0.0,0.515,6.037,34.5,5.9853,5.0,224.0,20.2,396.9,8.01,21.1
336,0.03427,0.0,5.19,0.0,0.515,5.869,46.3,5.2311,5.0,224.0,20.2,396.9,9.8,19.5
337,0.03041,0.0,5.19,0.0,0.515,5.895,59.6,5.615,5.0,224.0,20.2,394.81,10.56,18.5
338,0.03306,0.0,5.19,0.0,0.515,6.059,37.3,4.8122,5.0,224.0,20.2,396.14,8.51,20.6
339,0.05497,0.0,5.19,0.0,0.515,5.985,45.4,4.8122,5.0,224.0,20.2,396.9,9.74,19.0
340,0.06151,0.0,5.19,0.0,0.515,5.968,58.5,4.8122,5.0,224.0,20.2,396.9,9.29,18.7
341,0.01301,35.0,1.52,0.0,0.442,7.241,49.3,7.0379,1.0,284.0,15.5,394.74,5.49,32.7
342,0.02498,0.0,1.89,0.0,0.518,6.54,59.7,6.2669,1.0,422.0,15.9,389.96,8.65,16.5
343,0.02543,55.0,3.78,0.0,0.484,6.696,56.4,5.7321,5.0,370.0,17.6,396.9,7.18,23.9
344,0.03049,55.0,3.78,0.0,0.484,6.874,28.1,6.4654,5.0,370.0,17.6,387.97,4.61,31.2
345,0.03113,0.0,4.39,0.0,0.442,6.014,48.5,8.0136,3.0,352.0,18.8,385.64,10.53,17.5
346,0.06162,0.0,4.39,0.0,0.442,5.898,52.3,8.0136,3.0,352.0,18.8,364.61,12.67,17.2
347,0.0187,85.0,4.15,0.0,0.429,6.516,27.7,8.5353,4.0,351.0,17.9,392.43,6.36,23.1
348,0.01501,80.0,2.01,0.0,0.435,6.635,29.7,8.344,4.0,280.0,17.0,390.94,5.99,24.5
349,0.02899,40.0,1.25,0.0,0.429,6.939,34.5,8.7921,1.0,335.0,19.7,389.85,5.89,26.6
350,0.06211,40.0,1.25,0.0,0.429,6.49,44.4,8.7921,1.0,335.0,19.7,396.9,5.98,22.9
351,0.0795,60.0,1.69,0.0,0.411,6.579,35.9,10.7103,4.0,411.0,18.3,370.78,5.49,24.1
352,0.07244,60.0,1.69,0.0,0.411,5.884,18.5,10.7103,4.0,411.0,18.3,392.33,7.79,18.6
353,0.01709,90.0,2.02,0.0,0.41,6.728,36.1,12.1265,5.0,187.0,17.0,384.46,4.5,30.1
354,0.04301,80.0,1.91,0.0,0.413,5.663,21.9,10.5857,4.0,334.0,22.0,382.8,8.05,18.2
355,0.10659,80.0,1.91,0.0,0.413,5.936,19.5,10.5857,4.0,334.0,22.0,376.04,5.57,20.6
356,8.98296,0.0,18.1,1.0,0.77,6.212,97.4,2.1222,24.0,666.0,20.2,377.73,17.6,17.8
357,3.8497,0.0,18.1,1.0,0.77,6.395,91.0,2.5052,24.0,666.0,20.2,391.34,13.27,21.7
358,5.20177,0.0,18.1,1.0,0.77,6.127,83.4,2.7227,24.0,666.0,20.2,395.43,11.48,22.7
359,4.26131,0.0,18.1,0.0,0.77,6.112,81.3,2.5091,24.0,666.0,20.2,390.74,12.67,22.6
360,4.54192,0.0,18.1,0.0,0.77,6.398,88.0,2.5182,24.0,666.0,20.2,374.56,7.79,25.0
361,3.83684,0.0,18.1,0.0,0.77,6.251,91.1,2.2955,24.0,666.0,20.2,350.65,14.19,19.9
362,3.67822,0.0,18.1,0.0,0.77,5.362,96.2,2.1036,24.0,666.0,20.2,380.79,10.19,20.8
363,4.22239,0.0,18.1,1.0,0.77,5.803,89.0,1.9047,24.0,666.0,20.2,353.04,14.64,16.8
364,3.47428,0.0,18.1,1.0,0.718,8.78,82.9,1.9047,24.0,666.0,20.2,354.55,5.29,21.9
365,4.55587,0.0,18.1,0.0,0.718,3.561,87.9,1.6132,24.0,666.0,20.2,354.7,7.12,27.5
366,3.69695,0.0,18.1,0.0,0.718,4.963,91.4,1.7523,24.0,666.0,20.2,316.03,14.0,21.9
367,13.5222,0.0,18.1,0.0,0.631,3.863,100.0,1.5106,24.0,666.0,20.2,131.42,13.33,23.1
368,4.89822,0.0,18.1,0.0,0.631,4.97,100.0,1.3325,24.0,666.0,20.2,375.52,3.26,50.0
369,5.66998,0.0,18.1,1.0,0.631,6.683,96.8,1.3567,24.0,666.0,20.2,375.33,3.73,50.0
370,6.53876,0.0,18.1,1.0,0.631,7.016,97.5,1.2024,24.0,666.0,20.2,392.05,2.96,50.0
371,9.2323,0.0,18.1,0.0,0.631,6.216,100.0,1.1691,24.0,666.0,20.2,366.15,9.53,50.0
372,8.26725,0.0,18.1,1.0,0.668,5.875,89.6,1.1296,24.0,666.0,20.2,347.88,8.88,50.0
373,11.1081,0.0,18.1,0.0,0.668,4.906,100.0,1.1742,24.0,666.0,20.2,396.9,34.77,13.8
374,18.4982,0.0,18.1,0.0,0.668,4.138,100.0,1.137,24.0,666.0,20.2,396.9,37.97,13.8
375,19.6091,0.0,18.1,0.0,0.671,7.313,97.9,1.3163,24.0,666.0,20.2,396.9,13.44,15.0
376,15.288,0.0,18.1,0.0,0.671,6.649,93.3,1.3449,24.0,666.0,20.2,363.02,23.24,13.9
377,9.82349,0.0,18.1,0.0,0.671,6.794,98.8,1.358,24.0,666.0,20.2,396.9,21.24,13.3
378,23.6482,0.0,18.1,0.0,0.671,6.38,96.2,1.3861,24.0,666.0,20.2,396.9,23.69,13.1
379,17.8667,0.0,18.1,0.0,0.671,6.223,100.0,1.3861,24.0,666.0,20.2,393.74,21.78,10.2
380,88.9762,0.0,18.1,0.0,0.671,6.968,91.9,1.4165,24.0,666.0,20.2,396.9,17.21,10.4
381,15.8744,0.0,18.1,0.0,0.671,6.545,99.1,1.5192,24.0,666.0,20.2,396.9,21.08,10.9
382,9.18702,0.0,18.1,0.0,0.7,5.536,100.0,1.5804,24.0,666.0,20.2,396.9,23.6,11.3
383,7.99248,0.0,18.1,0.0,0.7,5.52,100.0,1.5331,24.0,666.0,20.2,396.9,24.56,12.3
384,20.0849,0.0,18.1,0.0,0.7,4.368,91.2,1.4395,24.0,666.0,20.2,285.83,30.63,8.8
385,16.8118,0.0,18.1,0.0,0.7,5.277,98.1,1.4261,24.0,666.0,20.2,396.9,30.81,7.2
386,24.3938,0.0,18.1,0.0,0.7,4.652,100.0,1.4672,24.0,666.0,20.2,396.9,28.28,10.5
387,22.5971,0.0,18.1,0.0,0.7,5.0,89.5,1.5184,24.0,666.0,20.2,396.9,31.99,7.4
388,14.3337,0.0,18.1,0.0,0.7,4.88,100.0,1.5895,24.0,666.0,20.2,372.92,30.62,10.2
389,8.15174,0.0,18.1,0.0,0.7,5.39,98.9,1.7281,24.0,666.0,20.2,396.9,20.85,11.5
390,6.96215,0.0,18.1,0.0,0.7,5.713,97.0,1.9265,24.0,666.0,20.2,394.43,17.11,15.1
391,5.29305,0.0,18.1,0.0,0.7,6.051,82.5,2.1678,24.0,666.0,20.2,378.38,18.76,23.2
392,11.5779,0.0,18.1,0.0,0.7,5.036,97.0,1.77,24.0,666.0,20.2,396.9,25.68,9.7
393,8.64476,0.0,18.1,0.0,0.693,6.193,92.6,1.7912,24.0,666.0,20.2,396.9,15.17,13.8
394,13.3598,0.0,18.1,0.0,0.693,5.887,94.7,1.7821,24.0,666.0,20.2,396.9,16.35,12.7
395,8.71675,0.0,18.1,0.0,0.693,6.471,98.8,1.7257,24.0,666.0,20.2,391.98,17.12,13.1
396,5.87205,0.0,18.1,0.0,0.693,6.405,96.0,1.6768,24.0,666.0,20.2,396.9,19.37,12.5
397,7.67202,0.0,18.1,0.0,0.693,5.747,98.9,1.6334,24.0,666.0,20.2,393.1,19.92,8.5
398,38.3518,0.0,18.1,0.0,0.693,5.453,100.0,1.4896,24.0,666.0,20.2,396.9,30.59,5.0
399,9.91655,0.0,18.1,0.0,0.693,5.852,77.8,1.5004,24.0,666.0,20.2,338.16,29.97,6.3
400,25.0461,0.0,18.1,0.0,0.693,5.987,100.0,1.5888,24.0,666.0,20.2,396.9,26.77,5.6
401,14.2362,0.0,18.1,0.0,0.693,6.343,100.0,1.5741,24.0,666.0,20.2,396.9,20.32,7.2
402,9.59571,0.0,18.1,0.0,0.693,6.404,100.0,1.639,24.0,666.0,20.2,376.11,20.31,12.1
403,24.8017,0.0,18.1,0.0,0.693,5.349,96.0,1.7028,24.0,666.0,20.2,396.9,19.77,8.3
404,41.5292,0.0,18.1,0.0,0.693,5.531,85.4,1.6074,24.0,666.0,20.2,329.46,27.38,8.5
405,67.9208,0.0,18.1,0.0,0.693,5.683,100.0,1.4254,24.0,666.0,20.2,384.97,22.98,5.0
406,20.7162,0.0,18.1,0.0,0.659,4.138,100.0,1.1781,24.0,666.0,20.2,370.22,23.34,11.9
407,11.9511,0.0,18.1,0.0,0.659,5.608,100.0,1.2852,24.0,666.0,20.2,332.09,12.13,27.9
408,7.40389,0.0,18.1,0.0,0.597,5.617,97.9,1.4547,24.0,666.0,20.2,314.64,26.4,17.2
409,14.4383,0.0,18.1,0.0,0.597,6.852,100.0,1.4655,24.0,666.0,20.2,179.36,19.78,27.5
410,51.1358,0.0,18.1,0.0,0.597,5.757,100.0,1.413,24.0,666.0,20.2,2.6,10.11,15.0
411,14.0507,0.0,18.1,0.0,0.597,6.657,100.0,1.5275,24.0,666.0,20.2,35.05,21.22,17.2
412,18.811,0.0,18.1,0.0,0.597,4.628,100.0,1.5539,24.0,666.0,20.2,28.79,34.37,17.9
413,28.6558,0.0,18.1,0.0,0.597,5.155,100.0,1.5894,24.0,666.0,20.2,210.97,20.08,16.3
414,45.7461,0.0,18.1,0.0,0.693,4.519,100.0,1.6582,24.0,666.0,20.2,88.27,36.98,7.0
415,18.0846,0.0,18.1,0.0,0.679,6.434,100.0,1.8347,24.0,666.0,20.2,27.25,29.05,7.2
416,10.8342,0.0,18.1,0.0,0.679,6.782,90.8,1.8195,24.0,666.0,20.2,21.57,25.79,7.5
417,25.9406,0.0,18.1,0.0,0.679,5.304,89.1,1.6475,24.0,666.0,20.2,127.36,26.64,10.4
418,73.5341,0.0,18.1,0.0,0.679,5.957,100.0,1.8026,24.0,666.0,20.2,16.45,20.62,8.8
419,11.8123,0.0,18.1,0.0,0.718,6.824,76.5,1.794,24.0,666.0,20.2,48.45,22.74,8.4
420,11.0874,0.0,18.1,0.0,0.718,6.411,100.0,1.8589,24.0,666.0,20.2,318.75,15.02,16.7
421,7.02259,0.0,18.1,0.0,0.718,6.006,95.3,1.8746,24.0,666.0,20.2,319.98,15.7,14.2
422,12.0482,0.0,18.1,0.0,0.614,5.648,87.6,1.9512,24.0,666.0,20.2,291.55,14.1,20.8
423,7.05042,0.0,18.1,0.0,0.614,6.103,85.1,2.0218,24.0,666.0,20.2,2.52,23.29,13.4
424,8.79212,0.0,18.1,0.0,0.584,5.565,70.6,2.0635,24.0,666.0,20.2,3.65,17.16,11.7
425,15.8603,0.0,18.1,0.0,0.679,5.896,95.4,1.9096,24.0,666.0,20.2,7.68,24.39,8.3
426,12.2472,0.0,18.1,0.0,0.584,5.837,59.7,1.9976,24.0,666.0,20.2,24.65,15.69,10.2
427,37.6619,0.0,18.1,0.0,0.679,6.202,78.7,1.8629,24.0,666.0,20.2,18.82,14.52,10.9
428,7.36711,0.0,18.1,0.0,0.679,6.193,78.1,1.9356,24.0,666.0,20.2,96.73,21.52,11.0
429,9.33889,0.0,18.1,0.0,0.679,6.38,95.6,1.9682,24.0,666.0,20.2,60.72,24.08,9.5
430,8.49213,0.0,18.1,0.0,0.584,6.348,86.1,2.0527,24.0,666.0,20.2,83.45,17.64,14.5
431,10.0623,0.0,18.1,0.0,0.584,6.833,94.3,2.0882,24.0,666.0,20.2,81.33,19.69,14.1
432,6.44405,0.0,18.1,0.0,0.584,6.425,74.8,2.2004,24.0,666.0,20.2,97.95,12.03,16.1
433,5.58107,0.0,18.1,0.0,0.713,6.436,87.9,2.3158,24.0,666.0,20.2,100.19,16.22,14.3
434,13.9134,0.0,18.1,0.0,0.713,6.208,95.0,2.2222,24.0,666.0,20.2,100.63,15.17,11.7
435,11.1604,0.0,18.1,0.0,0.74,6.629,94.6,2.1247,24.0,666.0,20.2,109.85,23.27,13.4
436,14.4208,0.0,18.1,0.0,0.74,6.461,93.3,2.0026,24.0,666.0,20.2,27.49,18.05,9.6
437,15.1772,0.0,18.1,0.0,0.74,6.152,100.0,1.9142,24.0,666.0,20.2,9.32,26.45,8.7
438,13.6781,0.0,18.1,0.0,0.74,5.935,87.9,1.8206,24.0,666.0,20.2,68.95,34.02,8.4
439,9.39063,0.0,18.1,0.0,0.74,5.627,93.9,1.8172,24.0,666.0,20.2,396.9,22.88,12.8
440,22.0511,0.0,18.1,0.0,0.74,5.818,92.4,1.8662,24.0,666.0,20.2,391.45,22.11,10.5
441,9.72418,0.0,18.1,0.0,0.74,6.406,97.2,2.0651,24.0,666.0,20.2,385.96,19.52,17.1
442,5.66637,0.0,18.1,0.0,0.74,6.219,100.0,2.0048,24.0,666.0,20.2,395.69,16.59,18.4
443,9.96654,0.0,18.1,0.0,0.74,6.485,100.0,1.9784,24.0,666.0,20.2,386.73,18.85,15.4
444,12.8023,0.0,18.1,0.0,0.74,5.854,96.6,1.8956,24.0,666.0,20.2,240.52,23.79,10.8
445,0.6718,0.0,18.1,0.0,0.74,6.459,94.8,1.9879,24.0,666.0,20.2,43.06,23.98,11.8
446,6.28807,0.0,18.1,0.0,0.74,6.341,96.4,2.072,24.0,666.0,20.2,318.01,17.79,14.9
447,9.92485,0.0,18.1,0.0,0.74,6.251,96.6,2.198,24.0,666.0,20.2,388.52,16.44,12.6
448,9.32909,0.0,18.1,0.0,0.713,6.185,98.7,2.2616,24.0,666.0,20.2,396.9,18.13,14.1
449,7.52601,0.0,18.1,0.0,0.713,6.417,98.3,2.185,24.0,666.0,20.2,304.21,19.31,13.0
450,6.71772,0.0,18.1,0.0,0.713,6.749,92.6,2.3236,24.0,666.0,20.2,0.32,17.44,13.4
451,5.44114,0.0,18.1,0.0,0.713,6.655,98.2,2.3552,24.0,666.0,20.2,355.29,17.73,15.2
452,5.09017,0.0,18.1,0.0,0.713,6.297,91.8,2.3682,24.0,666.0,20.2,385.09,17.27,16.1
453,8.24809,0.0,18.1,0.0,0.713,7.393,99.3,2.4527,24.0,666.0,20.2,375.87,16.74,17.8
454,9.51363,0.0,18.1,0.0,0.713,6.728,94.1,2.4961,24.0,666.0,20.2,6.68,18.71,14.9
455,4.75237,0.0,18.1,0.0,0.713,6.525,86.5,2.4358,24.0,666.0,20.2,50.92,18.13,14.1
456,4.66883,0.0,18.1,0.0,0.713,5.976,87.9,2.5806,24.0,666.0,20.2,10.48,19.01,12.7
457,8.20058,0.0,18.1,0.0,0.713,5.936,80.3,2.7792,24.0,666.0,20.2,3.5,16.94,13.5
458,7.75223,0.0,18.1,0.0,0.713,6.301,83.7,2.7831,24.0,666.0,20.2,272.21,16.23,14.9
459,6.80117,0.0,18.1,0.0,0.713,6.081,84.4,2.7175,24.0,666.0,20.2,396.9,14.7,20.0
460,4.81213,0.0,18.1,0.0,0.713,6.701,90.0,2.5975,24.0,666.0,20.2,255.23,16.42,16.4
461,3.69311,0.0,18.1,0.0,0.713,6.376,88.4,2.5671,24.0,666.0,20.2,391.43,14.65,17.7
462,6.65492,0.0,18.1,0.0,0.713,6.317,83.0,2.7344,24.0,666.0,20.2,396.9,13.99,19.5
463,5.82115,0.0,18.1,0.0,0.713,6.513,89.9,2.8016,24.0,666.0,20.2,393.82,10.29,20.2
464,7.83932,0.0,18.1,0.0,0.655,6.209,65.4,2.9634,24.0,666.0,20.2,396.9,13.22,21.4
465,3.1636,0.0,18.1,0.0,0.655,5.759,48.2,3.0665,24.0,666.0,20.2,334.4,14.13,19.9
466,3.77498,0.0,18.1,0.0,0.655,5.952,84.7,2.8715,24.0,666.0,20.2,22.01,17.15,19.0
467,4.42228,0.0,18.1,0.0,0.584,6.003,94.5,2.5403,24.0,666.0,20.2,331.29,21.32,19.1
468,15.5757,0.0,18.1,0.0,0.58,5.926,71.0,2.9084,24.0,666.0,20.2,368.74,18.13,19.1
469,13.0751,0.0,18.1,0.0,0.58,5.713,56.7,2.8237,24.0,666.0,20.2,396.9,14.76,20.1
470,4.34879,0.0,18.1,0.0,0.58,6.167,84.0,3.0334,24.0,666.0,20.2,396.9,16.29,19.9
471,4.03841,0.0,18.1,0.0,0.532,6.229,90.7,3.0993,24.0,666.0,20.2,395.33,12.87,19.6
472,3.56868,0.0,18.1,0.0,0.58,6.437,75.0,2.8965,24.0,666.0,20.2,393.37,14.36,23.2
473,4.64689,0.0,18.1,0.0,0.614,6.98,67.6,2.5329,24.0,666.0,20.2,374.68,11.66,29.8
474,8.05579,0.0,18.1,0.0,0.584,5.427,95.4,2.4298,24.0,666.0,20.2,352.58,18.14,13.8
475,6.39312,0.0,18.1,0.0,0.584,6.162,97.4,2.206,24.0,666.0,20.2,302.76,24.1,13.3
476,4.87141,0.0,18.1,0.0,0.614,6.484,93.6,2.3053,24.0,666.0,20.2,396.21,18.68,16.7
477,15.0234,0.0,18.1,0.0,0.614,5.304,97.3,2.1007,24.0,666.0,20.2,349.48,24.91,12.0
478,10.233,0.0,18.1,0.0,0.614,6.185,96.7,2.1705,24.0,666.0,20.2,379.7,18.03,14.6
479,14.3337,0.0,18.1,0.0,0.614,6.229,88.0,1.9512,24.0,666.0,20.2,383.32,13.11,21.4
480,5.82401,0.0,18.1,0.0,0.532,6.242,64.7,3.4242,24.0,666.0,20.2,396.9,10.74,23.0
481,5.70818,0.0,18.1,0.0,0.532,6.75,74.9,3.3317,24.0,666.0,20.2,393.07,7.74,23.7
482,5.73116,0.0,18.1,0.0,0.532,7.061,77.0,3.4106,24.0,666.0,20.2,395.28,7.01,25.0
483,2.81838,0.0,18.1,0.0,0.532,5.762,40.3,4.0983,24.0,666.0,20.2,392.92,10.42,21.8
484,2.37857,0.0,18.1,0.0,0.583,5.871,41.9,3.724,24.0,666.0,20.2,370.73,13.34,20.6
485,3.67367,0.0,18.1,0.0,0.583,6.312,51.9,3.9917,24.0,666.0,20.2,388.62,10.58,21.2
486,5.69175,0.0,18.1,0.0,0.583,6.114,79.8,3.5459,24.0,666.0,20.2,392.68,14.98,19.1
487,4.83567,0.0,18.1,0.0,0.583,5.905,53.2,3.1523,24.0,666.0,20.2,388.22,11.45,20.6
488,0.15086,0.0,27.74,0.0,0.609,5.454,92.7,1.8209,4.0,711.0,20.1,395.09,18.06,15.2
489,0.18337,0.0,27.74,0.0,0.609,5.414,98.3,1.7554,4.0,711.0,20.1,344.05,23.97,7.0
490,0.20746,0.0,27.74,0.0,0.609,5.093,98.0,1.8226,4.0,711.0,20.1,318.43,29.68,8.1
491,0.10574,0.0,27.74,0.0,0.609,5.983,98.8,1.8681,4.0,711.0,20.1,390.11,18.07,13.6
492,0.11132,0.0,27.74,0.0,0.609,5.983,83.5,2.1099,4.0,711.0,20.1,396.9,13.35,20.1
493,0.17331,0.0,9.69,0.0,0.585,5.707,54.0,2.3817,6.0,391.0,19.2,396.9,12.01,21.8
494,0.27957,0.0,9.69,0.0,0.585,5.926,42.6,2.3817,6.0,391.0,19.2,396.9,13.59,24.5
495,0.17899,0.0,9.69,0.0,0.585,5.67,28.8,2.7986,6.0,391.0,19.2,393.29,17.6,23.1
496,0.2896,0.0,9.69,0.0,0.585,5.39,72.9,2.7986,6.0,391.0,19.2,396.9,21.14,19.7
497,0.26838,0.0,9.69,0.0,0.585,5.794,70.6,2.8927,6.0,391.0,19.2,396.9,14.1,18.3
498,0.23912,0.0,9.69,0.0,0.585,6.019,65.3,2.4091,6.0,391.0,19.2,396.9,12.92,21.2
499,0.17783,0.0,9.69,0.0,0.585,5.569,73.5,2.3999,6.0,391.0,19.2,395.77,15.1,17.5
500,0.22438,0.0,9.69,0.0,0.585,6.027,79.7,2.4982,6.0,391.0,19.2,396.9,14.33,16.8
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.12,76.7,2.2875,1.0,273.0,21.0,396.9,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.9,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0
505,0.04741,0.0,11.93,0.0,0.573,6.03,80.8,2.505,1.0,273.0,21.0,396.9,7.88,11.9


================================================
FILE: examples/data/census_1k.csv
================================================
"YEAR","DATANUM","SERIAL","CBSERIAL","HHWT","CPI99","GQ","QGQ","PERNUM","PERWT","SEX","AGE","EDUC","EDUCD","INCTOT","SEX_HEAD","SEX_MOM","SEX_POP","SEX_SP","SEX_MOM2","SEX_POP2","AGE_HEAD","AGE_MOM","AGE_POP","AGE_SP","AGE_MOM2","AGE_POP2","EDUC_HEAD","EDUC_MOM","EDUC_POP","EDUC_SP","EDUC_MOM2","EDUC_POP2","EDUCD_HEAD","EDUCD_MOM","EDUCD_POP","EDUCD_SP","EDUCD_MOM2","EDUCD_POP2","INCTOT_HEAD","INCTOT_MOM","INCTOT_POP","INCTOT_SP","INCTOT_MOM2","INCTOT_POP2"
1970,2,1,,100,4.54,1,0,1,100,1,39,6,60,12450,1,,,2,,,39,,,36,,,6,,,3,,,60,,,30,,,12450,,,3450,,
1970,2,1,,100,4.54,1,0,2,100,2,36,3,30,3450,1,,,1,,,39,,,39,,,6,,,6,,,60,,,60,,,12450,,,12450,,
1970,2,2,,100,4.54,1,0,1,100,1,56,7,70,9050,1,,,2,,,56,,,54,,,7,,,6,,,70,,,60,,,9050,,,0,,
1970,2,2,,100,4.54,1,0,2,100,2,54,6,60,0,1,,,1,,,56,,,56,,,7,,,7,,,70,,,70,,,9050,,,9050,,
1970,2,4,,100,4.54,1,0,1,100,1,82,1,17,7450,1,,,2,,,82,,,74,,,1,,,2,,,17,,,23,,,7450,,,650,,
1970,2,4,,100,4.54,1,0,2,100,2,74,2,23,650,1,,,1,,,82,,,82,,,1,,,1,,,17,,,17,,,7450,,,7450,,
1970,2,5,,100,4.54,1,0,1,100,1,66,10,100,6950,1,,,2,,,66,,,62,,,10,,,6,,,100,,,60,,,6950,,,250,,
1970,2,5,,100,4.54,1,0,2,100,2,62,6,60,250,1,,,1,,,66,,,66,,,10,,,10,,,100,,,100,,,6950,,,6950,,
1970,2,6,,100,4.54,1,0,1,100,2,70,4,40,1250,2,,,,,,70,,,,,,4,,,,,,40,,,,,,1250,,,,,
1970,2,7,,100,4.54,1,0,1,100,1,25,6,60,11150,1,,,2,,,25,,,22,,,6,,,6,,,60,,,60,,,11150,,,4050,,
1970,2,7,,100,4.54,1,0,2,100,2,22,6,60,4050,1,,,1,,,25,,,25,,,6,,,6,,,60,,,60,,,11150,,,11150,,
1970,2,7,,100,4.54,1,0,3,100,1,1,0,1,9999999,1,2,1,,,,25,22,25,,,,6,6,6,,,,60,60,60,,,,11150,4050,11150,,,
1970,2,8,,100,4.54,3,0,1,100,2,98,2,26,550,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1970,2,9,,100,4.54,1,0,1,100,1,25,10,100,6150,1,,,2,,,25,,,27,,,10,,,9,,,100,,,90,,,6150,,,1050,,
1970,2,9,,100,4.54,1,0,2,100,2,27,9,90,1050,1,,,1,,,25,,,25,,,10,,,10,,,100,,,100,,,6150,,,6150,,
1970,2,10,,100,4.54,1,0,1,100,1,41,11,111,8050,1,2,,,,,41,78,,,,,11,6,,,,,111,60,,,,,8050,0,,,,
1970,2,10,,100,4.54,1,0,2,100,2,78,6,60,0,1,,,,,,41,,,,,,11,,,,,,111,,,,,,8050,,,,,
1970,2,10,,100,4.54,1,0,3,100,2,38,6,60,7150,1,2,,,,,41,78,,,,,11,6,,,,,111,60,,,,,8050,0,,,,
1970,2,11,,100,4.54,1,0,1,100,1,20,6,60,2050,1,,,,,,20,,,,,,6,,,,,,60,,,,,,2050,,,,,
1970,2,13,,100,4.54,1,0,1,100,1,37,6,65,16850,1,,,2,,,37,,,30,,,6,,,8,,,65,,,80,,,16850,,,350,,
1970,2,13,,100,4.54,1,0,2,100,2,30,8,80,350,1,,,1,,,37,,,37,,,6,,,6,,,65,,,65,,,16850,,,16850,,
1970,2,13,,100,4.54,1,0,3,100,1,5,0,2,9999999,1,2,1,,,,37,30,37,,,,6,8,6,,,,65,80,65,,,,16850,350,16850,,,
1970,2,13,,100,4.54,1,0,4,100,2,1,0,1,9999999,1,2,1,,,,37,30,37,,,,6,8,6,,,,65,80,65,,,,16850,350,16850,,,
1970,2,14,,100,4.54,1,0,1,100,1,49,2,23,8850,1,,,2,,,49,,,35,,,2,,,3,,,23,,,30,,,8850,,,4850,,
1970,2,14,,100,4.54,1,0,2,100,2,35,3,30,4850,1,,,1,,,49,,,49,,,2,,,2,,,23,,,23,,,8850,,,8850,,
1970,2,14,,100,4.54,1,0,3,100,2,17,3,30,250,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,
1970,2,14,,100,4.54,1,0,4,100,2,14,2,25,0,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,
1970,2,14,,100,4.54,1,0,5,100,1,10,1,15,9999999,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,
1970,2,14,,100,4.54,1,0,6,100,2,8,1,14,9999999,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,
1970,2,14,,100,4.54,1,0,7,100,1,0,0,1,9999999,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,
1970,2,15,,100,4.54,1,0,1,100,2,62,7,70,7750,2,,,,,,62,,,,,,7,,,,,,70,,,,,,7750,,,,,
1970,2,15,,100,4.54,1,0,2,100,1,35,11,111,5350,2,2,,,,,62,62,,,,,7,7,,,,,70,70,,,,,7750,7750,,,,
1970,2,16,,100,4.54,1,0,1,100,1,57,4,40,11250,1,,,2,,,57,,,54,,,4,,,2,,,40,,,26,,,11250,,,150,,
1970,2,16,,100,4.54,1,0,2,100,2,54,2,26,150,1,2,,1,,,57,86,,57,,,4,2,,4,,,40,26,,40,,,11250,1250,,11250,,
1970,2,16,,100,4.54,1,0,3,100,2,86,2,26,1250,1,,,,,,57,,,,,,4,,,,,,40,,,,,,11250,,,,,
1970,2,17,,100,4.54,1,0,1,100,1,54,6,60,6050,1,,,,,,54,,,,,,6,,,,,,60,,,,,,6050,,,,,
1970,2,17,,100,4.54,1,0,2,100,2,64,2,26,0,1,,,,,,54,,,,,,6,,,,,,60,,,,,,6050,,,,,
1970,2,18,,100,4.54,1,0,1,100,1,52,7,70,12050,1,,,2,,,52,,,44,,,7,,,6,,,70,,,60,,,12050,,,650,,
1970,2,18,,100,4.54,1,0,2,100,2,44,6,60,650,1,,,1,,,52,,,52,,,7,,,7,,,70,,,70,,,12050,,,12050,,
1970,2,18,,100,4.54,1,0,3,100,2,16,4,40,950,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,
1970,2,18,,100,4.54,1,0,4,100,2,15,3,30,350,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,
1970,2,18,,100,4.54,1,0,5,100,1,14,2,25,350,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,
1970,2,18,,100,4.54,1,0,6,100,1,12,2,22,9999999,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,
1970,2,18,,100,4.54,1,0,7,100,1,6,1,12,9999999,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,
1970,2,19,,100,4.54,1,0,1,100,1,77,2,26,250,1,,,2,,,77,,,79,,,2,,,2,,,26,,,26,,,250,,,0,,
1970,2,19,,100,4.54,1,0,2,100,2,79,2,26,0,1,,,1,,,77,,,77,,,2,,,2,,,26,,,26,,,250,,,250,,
1970,2,20,,100,4.54,1,0,1,100,1,36,6,60,11450,1,,,2,,,36,,,32,,,6,,,6,,,60,,,60,,,11450,,,5550,,
1970,2,20,,100,4.54,1,0,2,100,2,32,6,60,5550,1,,,1,,,36,,,36,,,6,,,6,,,60,,,60,,,11450,,,11450,,
1970,2,20,,100,4.54,1,0,3,100,2,9,1,16,9999999,1,2,1,,,,36,32,36,,,,6,6,6,,,,60,60,60,,,,11450,5550,11450,,,
1970,2,21,,100,4.54,1,0,1,100,1,21,4,40,2450,1,,,2,,,21,,,20,,,4,,,4,,,40,,,40,,,2450,,,4550,,
1970,2,21,,100,4.54,1,0,2,100,2,20,4,40,4550,1,,,1,,,21,,,21,,,4,,,4,,,40,,,40,,,2450,,,2450,,
1970,2,21,,100,4.54,1,0,3,100,1,5,1,12,9999999,1,2,1,,,,21,20,21,,,,4,4,4,,,,40,40,40,,,,2450,4550,2450,,,
1970,2,21,,100,4.54,1,0,4,100,1,4,1,11,9999999,1,2,1,,,,21,20,21,,,,4,4,4,,,,40,40,40,,,,2450,4550,2450,,,
1970,2,22,,100,4.54,1,0,1,100,1,23,2,26,5050,1,,,,,,23,,,,,,2,,,,,,26,,,,,,5050,,,,,
1970,2,22,,100,4.54,1,0,2,100,2,23,3,30,1850,1,,,,,,23,,,,,,2,,,,,,26,,,,,,5050,,,,,
1970,2,23,,100,4.54,1,0,1,100,1,63,6,60,5050,1,,,,,,63,,,,,,6,,,,,,60,,,,,,5050,,,,,
1970,2,24,,100,4.54,1,0,1,100,2,68,3,30,2150,2,,,,,,68,,,,,,3,,,,,,30,,,,,,2150,,,,,
1970,2,25,,100,4.54,1,0,1,100,1,65,2,22,4850,1,,,2,,,65,,,61,,,2,,,4,,,22,,,40,,,4850,,,4350,,
1970,2,25,,100,4.54,1,0,2,100,2,61,4,40,4350,1,,,1,,,65,,,65,,,2,,,2,,,22,,,22,,,4850,,,4850,,
1970,2,26,,100,4.54,1,0,1,100,1,61,8,80,2150,1,,,2,,,61,,,66,,,8,,,6,,,80,,,60,,,2150,,,5650,,
1970,2,26,,100,4.54,1,0,2,100,2,66,6,60,5650,1,,,1,,,61,,,61,,,8,,,8,,,80,,,80,,,2150,,,2150,,
1970,2,27,,100,4.54,1,0,1,100,2,77,1,14,4050,2,,,,,,77,,,,,,1,,,,,,14,,,,,,4050,,,,,
1970,2,27,,100,4.54,1,0,2,100,1,75,1,14,2050,2,,,,,,77,,,,,,1,,,,,,14,,,,,,4050,,,,,
1970,2,28,,100,4.54,1,0,1,100,1,32,8,80,5050,1,,,,,,32,,,,,,8,,,,,,80,,,,,,5050,,,,,
1970,2,29,,100,4.54,1,0,1,100,1,59,5,50,15050,1,,,2,,,59,,,55,,,5,,,6,,,50,,,60,,,15050,,,0,,
1970,2,29,,100,4.54,1,0,2,100,2,55,6,60,0,1,,,1,,,59,,,59,,,5,,,5,,,50,,,50,,,15050,,,15050,,
1970,2,30,,100,4.54,1,0,1,100,2,47,6,60,0,2,,,,,,47,,,,,,6,,,,,,60,,,,,,0,,,,,
1970,2,31,,100,4.54,1,0,1,100,1,43,8,80,7050,1,,,2,,,43,,,41,,,8,,,6,,,80,,,60,,,7050,,,2050,,
1970,2,31,,100,4.54,1,0,2,100,2,41,6,60,2050,1,,,1,,,43,,,43,,,8,,,8,,,80,,,80,,,7050,,,7050,,
1970,2,31,,100,4.54,1,0,3,100,2,18,6,65,4050,1,2,1,,,,43,41,43,,,,8,6,8,,,,80,60,80,,,,7050,2050,7050,,,
1970,2,31,,100,4.54,1,0,4,100,2,15,2,26,0,1,2,1,,,,43,41,43,,,,8,6,8,,,,80,60,80,,,,7050,2050,7050,,,
1970,2,31,,100,4.54,1,0,5,100,2,10,1,17,9999999,1,2,1,,,,43,41,43,,,,8,6,8,,,,80,60,80,,,,7050,2050,7050,,,
1970,2,32,,100,4.54,1,0,1,100,1,40,10,100,13350,1,,,2,,,40,,,36,,,10,,,10,,,100,,,100,,,13350,,,0,,
1970,2,32,,100,4.54,1,0,2,100,2,36,10,100,0,1,,,1,,,40,,,40,,,10,,,10,,,100,,,100,,,13350,,,13350,,
1970,2,32,,100,4.54,1,0,3,100,1,14,2,25,0,1,2,1,,,,40,36,40,,,,10,10,10,,,,100,100,100,,,,13350,0,13350,,,
1970,2,32,,100,4.54,1,0,4,100,1,10,1,16,9999999,1,2,1,,,,40,36,40,,,,10,10,10,,,,100,100,100,,,,13350,0,13350,,,
1970,2,32,,100,4.54,1,0,5,100,2,4,1,11,9999999,1,2,1,,,,40,36,40,,,,10,10,10,,,,100,100,100,,,,13350,0,13350,,,
1970,2,33,,100,4.54,1,0,1,100,1,40,11,111,25050,1,,,2,,,40,,,32,,,11,,,10,,,111,,,100,,,25050,,,0,,
1970,2,33,,100,4.54,1,0,2,100,2,32,10,100,0,1,,,1,,,40,,,40,,,11,,,11,,,111,,,111,,,25050,,,25050,,
1970,2,33,,100,4.54,1,0,3,100,1,5,0,2,9999999,1,2,1,,,,40,32,40,,,,11,10,11,,,,111,100,111,,,,25050,0,25050,,,
1970,2,33,,100,4.54,1,0,4,100,1,3,0,2,9999999,1,2,1,,,,40,32,40,,,,11,10,11,,,,111,100,111,,,,25050,0,25050,,,
1970,2,34,,100,4.54,1,0,1,100,1,31,11,111,19350,1,,,2,,,31,,,31,,,11,,,10,,,111,,,100,,,19350,,,0,,
1970,2,34,,100,4.54,1,0,2,100,2,31,10,100,0,1,,,1,,,31,,,31,,,11,,,11,,,111,,,111,,,19350,,,19350,,
1970,2,34,,100,4.54,1,0,3,100,2,3,0,2,9999999,1,2,1,,,,31,31,31,,,,11,10,11,,,,111,100,111,,,,19350,0,19350,,,
1970,2,35,,100,4.54,1,0,1,100,1,64,11,111,17150,1,,,,,,64,,,,,,11,,,,,,111,,,,,,17150,,,,,
1970,2,36,,100,4.54,1,0,1,100,1,55,3,30,9050,1,,,2,,,55,,,51,,,3,,,6,,,30,,,60,,,9050,,,2950,,
1970,2,36,,100,4.54,1,0,2,100,2,51,6,60,2950,1,,,1,,,55,,,55,,,3,,,3,,,30,,,30,,,9050,,,9050,,
1970,2,37,,100,4.54,1,0,1,100,1,43,11,111,50000,1,,,2,,,43,,,40,,,11,,,10,,,111,,,100,,,50000,,,1150,,
1970,2,37,,100,4.54,1,0,2,100,2,40,10,100,1150,1,,,1,,,43,,,43,,,11,,,11,,,111,,,111,,,50000,,,50000,,
1970,2,37,,100,4.54,1,0,3,100,1,16,4,40,250,1,2,1,,,,43,40,43,,,,11,10,11,,,,111,100,111,,,,50000,1150,50000,,,
1970,2,37,,100,4.54,1,0,4,100,2,15,2,26,50,1,2,1,,,,43,40,43,,,,11,10,11,,,,111,100,111,,,,50000,1150,50000,,,
1970,2,37,,100,4.54,1,0,5,100,1,12,2,22,9999999,1,2,1,,,,43,40,43,,,,11,10,11,,,,111,100,111,,,,50000,1150,50000,,,
1970,2,38,,100,4.54,1,0,1,100,1,32,10,100,22150,1,,,2,,,32,,,31,,,10,,,7,,,100,,,70,,,22150,,,0,,
1970,2,38,,100,4.54,1,0,2,100,2,31,7,70,0,1,,,1,,,32,,,32,,,10,,,10,,,100,,,100,,,22150,,,22150,,
1970,2,38,,100,4.54,1,0,3,100,2,5,0,2,9999999,1,2,1,,,,32,31,32,,,,10,7,10,,,,100,70,100,,,,22150,0,22150,,,
1970,2,38,,100,4.54,1,0,4,100,1,4,0,2,9999999,1,2,1,,,,32,31,32,,,,10,7,10,,,,100,70,100,,,,22150,0,22150,,,
1970,2,38,,100,4.54,1,0,5,100,2,2,0,1,9999999,1,2,1,,,,32,31,32,,,,10,7,10,,,,100,70,100,,,,22150,0,22150,,,
1970,2,39,,100,4.54,1,0,1,100,1,54,8,80,17850,1,,,2,,,54,,,47,,,8,,,6,,,80,,,60,,,17850,,,0,,
1970,2,39,,100,4.54,1,0,2,100,2,47,6,60,0,1,,,1,,,54,,,54,,,8,,,8,,,80,,,80,,,17850,,,17850,,
1970,2,39,,100,4.54,1,0,3,100,1,19,6,60,2750,1,2,1,,,,54,47,54,,,,8,6,8,,,,80,60,80,,,,17850,0,17850,,,
1970,2,39,,100,4.54,1,0,4,100,1,12,2,23,9999999,1,2,1,,,,54,47,54,,,,8,6,8,,,,80,60,80,,,,17850,0,17850,,,
1970,2,40,,100,4.54,1,0,1,100,1,46,6,60,17150,1,,,2,,,46,,,46,,,6,,,6,,,60,,,60,,,17150,,,5050,,
1970,2,40,,100,4.54,1,0,2,100,2,46,6,60,5050,1,,,1,,,46,,,46,,,6,,,6,,,60,,,60,,,17150,,,17150,,
1970,2,41,,100,4.54,1,0,1,100,1,70,6,60,11450,1,,,,,,70,,,,,,6,,,,,,60,,,,,,11450,,,,,
1970,2,41,,100,4.54,1,0,2,100,2,28,5,50,2150,1,,,,,,70,,,,,,6,,,,,,60,,,,,,11450,,,,,
1970,2,41,,100,4.54,1,0,3,100,1,33,8,80,9850,1,,1,,,,70,,70,,,,6,,6,,,,60,,60,,,,11450,,11450,,,
1970,2,41,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,,1,,,,70,,33,,,,6,,8,,,,60,,80,,,,11450,,9850,,,
1970,2,41,,100,4.54,1,0,5,100,2,5,0,2,9999999,1,,1,,,,70,,33,,,,6,,8,,,,60,,80,,,,11450,,9850,,,
1970,2,42,,100,4.54,1,0,1,100,1,53,3,30,10050,1,,,2,,,53,,,51,,,3,,,6,,,30,,,60,,,10050,,,0,,
1970,2,42,,100,4.54,1,0,2,100,2,51,6,60,0,1,,,1,,,53,,,53,,,3,,,3,,,30,,,30,,,10050,,,10050,,
1970,2,42,,100,4.54,1,0,3,100,2,14,2,26,0,1,2,1,,,,53,51,53,,,,3,6,3,,,,30,60,30,,,,10050,0,10050,,,
1970,2,43,,100,4.54,1,0,1,100,1,41,11,111,32050,1,,,2,,,41,,,40,,,11,,,11,,,111,,,111,,,32050,,,250,,
1970,2,43,,100,4.54,1,0,2,100,2,40,11,111,250,1,,,1,,,41,,,41,,,11,,,11,,,111,,,111,,,32050,,,32050,,
1970,2,43,,100,4.54,1,0,3,100,2,10,1,17,9999999,1,2,1,,,,41,40,41,,,,11,11,11,,,,111,111,111,,,,32050,250,32050,,,
1970,2,43,,100,4.54,1,0,4,100,2,6,1,12,9999999,1,2,1,,,,41,40,41,,,,11,11,11,,,,111,111,111,,,,32050,250,32050,,,
1970,2,44,,100,4.54,1,0,1,100,1,47,2,26,14050,1,,,2,,,47,,,44,,,2,,,2,,,26,,,26,,,14050,,,0,,
1970,2,44,,100,4.54,1,0,2,100,2,44,2,26,0,1,,,1,,,47,,,47,,,2,,,2,,,26,,,26,,,14050,,,14050,,
1970,2,44,,100,4.54,1,0,3,100,2,21,8,80,2050,1,2,1,,,,47,44,47,,,,2,2,2,,,,26,26,26,,,,14050,0,14050,,,
1970,2,44,,100,4.54,1,0,4,100,1,18,5,50,1750,1,2,1,,,,47,44,47,,,,2,2,2,,,,26,26,26,,,,14050,0,14050,,,
1970,2,44,,100,4.54,1,0,5,100,1,4,0,2,9999999,1,2,1,,,,47,44,47,,,,2,2,2,,,,26,26,26,,,,14050,0,14050,,,
1970,2,45,,100,4.54,1,0,1,100,1,60,8,80,16550,1,,,2,,,60,,,50,,,8,,,6,,,80,,,60,,,16550,,,2950,,
1970,2,45,,100,4.54,1,0,2,100,2,50,6,60,2950,1,,,1,,,60,,,60,,,8,,,8,,,80,,,80,,,16550,,,16550,,
1970,2,46,,100,4.54,1,0,1,100,1,47,10,100,16250,1,2,,,,,47,83,,,,,10,2,,,,,100,22,,,,,16250,1250,,,,
1970,2,46,,100,4.54,1,0,2,100,2,83,2,22,1250,1,,,,,,47,,,,,,10,,,,,,100,,,,,,16250,,,,,
1970,2,46,,100,4.54,1,0,3,100,2,83,2,22,1450,1,,,,,,47,,,,,,10,,,,,,100,,,,,,16250,,,,,
1970,2,48,,100,4.54,1,0,1,100,1,49,6,60,8850,1,2,,2,,,49,89,,48,,,6,2,,6,,,60,26,,60,,,8850,450,,6050,,
1970,2,48,,100,4.54,1,0,2,100,2,48,6,60,6050,1,,,1,,,49,,,49,,,6,,,6,,,60,,,60,,,8850,,,8850,,
1970,2,48,,100,4.54,1,0,3,100,2,89,2,26,450,1,,,,,,49,,,,,,6,,,,,,60,,,,,,8850,,,,,
1970,2,49,,100,4.54,1,0,1,100,1,30,10,100,12050,1,,,2,,,30,,,30,,,10,,,10,,,100,,,100,,,12050,,,5650,,
1970,2,49,,100,4.54,1,0,2,100,2,30,10,100,5650,1,,,1,,,30,,,30,,,10,,,10,,,100,,,100,,,12050,,,12050,,
1970,2,49,,100,4.54,1,0,3,100,1,3,0,2,9999999,1,2,1,,,,30,30,30,,,,10,10,10,,,,100,100,100,,,,12050,5650,12050,,,
1970,2,49,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,2,1,,,,30,30,30,,,,10,10,10,,,,100,100,100,,,,12050,5650,12050,,,
1970,2,50,,100,4.54,1,0,1,100,1,54,8,80,10150,1,,,,,,54,,,,,,8,,,,,,80,,,,,,10150,,,,,
1970,2,51,,100,4.54,1,0,1,100,2,64,10,100,8650,2,,,,,,64,,,,,,10,,,,,,100,,,,,,8650,,,,,
1970,2,52,,100,4.54,1,0,1,100,2,37,7,70,11350,2,,,,,,37,,,,,,7,,,,,,70,,,,,,11350,,,,,
1970,2,52,,100,4.54,1,0,2,100,2,6,1,12,9999999,2,2,,,,,37,37,,,,,7,7,,,,,70,70,,,,,11350,11350,,,,
1970,2,53,,100,4.54,1,0,1,100,2,34,6,60,8050,2,,,,,,34,,,,,,6,,,,,,60,,,,,,8050,,,,,
1970,2,53,,100,4.54,1,0,2,100,1,11,2,22,9999999,2,2,,,,,34,34,,,,,6,6,,,,,60,60,,,,,8050,8050,,,,
1970,2,53,,100,4.54,1,0,3,100,2,9,1,15,9999999,2,2,,,,,34,34,,,,,6,6,,,,,60,60,,,,,8050,8050,,,,
1970,2,54,,100,4.54,1,0,1,100,1,48,0,2,0,1,,,2,,,48,,,51,,,0,,,2,,,2,,,26,,,0,,,0,,
1970,2,54,,100,4.54,1,0,2,100,2,51,2,26,0,1,,,1,,,48,,,48,,,0,,,0,,,2,,,2,,,0,,,0,,
1970,2,55,,100,4.54,1,0,1,100,2,27,2,25,150,2,,,,,,27,,,,,,2,,,,,,25,,,,,,150,,,,,
1970,2,55,,100,4.54,1,0,2,100,2,12,2,23,9999999,2,2,,,,,27,27,,,,,2,2,,,,,25,25,,,,,150,150,,,,
1970,2,55,,100,4.54,1,0,3,100,2,7,1,14,9999999,2,2,,,,,27,27,,,,,2,2,,,,,25,25,,,,,150,150,,,,
1970,2,55,,100,4.54,1,0,4,100,2,6,1,12,9999999,2,2,,,,,27,27,,,,,2,2,,,,,25,25,,,,,150,150,,,,
1970,2,55,,100,4.54,1,0,5,100,2,5,1,11,9999999,2,2,,,,,27,27,,,,,2,2,,,,,25,25,,,,,150,150,,,,
1970,2,56,,100,4.54,1,0,1,100,1,58,2,22,8050,1,,,2,,,58,,,52,,,2,,,6,,,22,,,60,,,8050,,,0,,
1970,2,56,,100,4.54,1,0,2,100,2,52,6,60,0,1,,,1,,,58,,,58,,,2,,,2,,,22,,,22,,,8050,,,8050,,
1970,2,56,,100,4.54,1,0,3,100,2,23,9,90,0,1,2,1,,,,58,52,58,,,,2,6,2,,,,22,60,22,,,,8050,0,8050,,,
1970,2,57,,100,4.54,1,0,1,100,1,32,2,26,7050,1,,,2,,,32,,,32,,,2,,,5,,,26,,,50,,,7050,,,5050,,
1970,2,57,,100,4.54,1,0,2,100,2,32,5,50,5050,1,,,1,,,32,,,32,,,2,,,2,,,26,,,26,,,7050,,,7050,,
1970,2,57,,100,4.54,1,0,3,100,2,5,0,2,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,
1970,2,57,,100,4.54,1,0,4,100,2,3,0,2,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,
1970,2,57,,100,4.54,1,0,5,100,2,13,2,23,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,
1970,2,57,,100,4.54,1,0,6,100,1,10,1,17,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,
1970,2,57,,100,4.54,1,0,7,100,2,9,1,16,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,
1970,2,57,,100,4.54,1,0,8,100,2,8,1,15,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,
1970,2,57,,100,4.54,1,0,9,100,1,6,1,12,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,
1970,2,58,,100,4.54,1,0,1,100,1,24,11,110,1350,1,,,2,,,24,,,25,,,11,,,11,,,110,,,110,,,1350,,,8150,,
1970,2,58,,100,4.54,1,0,2,100,2,25,11,110,8150,1,,,1,,,24,,,24,,,11,,,11,,,110,,,110,,,1350,,,1350,,
1970,2,58,,100,4.54,1,0,3,100,1,1,0,1,9999999,1,2,1,,,,24,25,24,,,,11,11,11,,,,110,110,110,,,,1350,8150,1350,,,
1970,2,59,,100,4.54,1,0,1,100,1,34,5,50,14150,1,,,2,,,34,,,31,,,5,,,6,,,50,,,60,,,14150,,,5050,,
1970,2,59,,100,4.54,1,0,2,100,2,31,6,60,5050,1,,,1,,,34,,,34,,,5,,,5,,,50,,,50,,,14150,,,14150,,
1970,2,59,,100,4.54,1,0,3,100,2,12,2,22,9999999,1,2,1,,,,34,31,34,,,,5,6,5,,,,50,60,50,,,,14150,5050,14150,,,
1970,2,59,,100,4.54,1,0,4,100,1,11,2,22,9999999,1,2,1,,,,34,31,34,,,,5,6,5,,,,50,60,50,,,,14150,5050,14150,,,
1970,2,59,,100,4.54,1,0,5,100,2,10,1,17,9999999,1,2,1,,,,34,31,34,,,,5,6,5,,,,50,60,50,,,,14150,5050,14150,,,
1970,2,59,,100,4.54,1,0,6,100,1,6,1,12,9999999,1,2,1,,,,34,31,34,,,,5,6,5,,,,50,60,50,,,,14150,5050,14150,,,
1970,2,60,,100,4.54,1,0,1,100,1,35,10,100,14050,1,,,,,,35,,,,,,10,,,,,,100,,,,,,14050,,,,,
1970,2,62,,100,4.54,1,0,1,100,1,53,6,60,10650,1,,,2,,,53,,,49,,,6,,,6,,,60,,,60,,,10650,,,6650,,
1970,2,62,,100,4.54,1,0,2,100,2,49,6,60,6650,1,,,1,,,53,,,53,,,6,,,6,,,60,,,60,,,10650,,,10650,,
1970,2,63,,100,4.54,1,0,1,100,1,78,2,26,1250,1,,,,,,78,,,,,,2,,,,,,26,,,,,,1250,,,,,
1970,2,63,,100,4.54,1,0,2,100,2,38,7,70,7050,1,,1,,,,78,,78,,,,2,,2,,,,26,,26,,,,1250,,1250,,,
1970,2,64,,100,4.54,1,0,1,100,1,37,6,60,7050,1,,,2,,,37,,,36,,,6,,,6,,,60,,,60,,,7050,,,0,,
1970,2,64,,100,4.54,1,0,2,100,2,36,6,60,0,1,,,1,,,37,,,37,,,6,,,6,,,60,,,60,,,7050,,,7050,,
1970,2,64,,100,4.54,1,0,3,100,1,13,2,25,9999999,1,2,1,,,,37,36,37,,,,6,6,6,,,,60,60,60,,,,7050,0,7050,,,
1970,2,64,,100,4.54,1,0,4,100,1,12,2,22,9999999,1,2,1,,,,37,36,37,,,,6,6,6,,,,60,60,60,,,,7050,0,7050,,,
1970,2,64,,100,4.54,1,0,5,100,1,5,0,2,9999999,1,2,1,,,,37,36,37,,,,6,6,6,,,,60,60,60,,,,7050,0,7050,,,
1970,2,65,,100,4.54,1,0,1,100,2,33,8,80,8050,2,,,,,,33,,,,,,8,,,,,,80,,,,,,8050,,,,,
1970,2,66,,100,4.54,1,0,1,100,1,25,4,40,8050,1,,,2,,,25,,,23,,,4,,,6,,,40,,,60,,,8050,,,6050,,
1970,2,66,,100,4.54,1,0,2,100,2,23,6,60,6050,1,,,1,,,25,,,25,,,4,,,4,,,40,,,40,,,8050,,,8050,,
1970,2,66,,100,4.54,1,0,3,100,2,4,1,11,9999999,1,2,1,,,,25,23,25,,,,4,6,4,,,,40,60,40,,,,8050,6050,8050,,,
1970,2,66,,100,4.54,1,0,4,100,1,2,0,1,9999999,1,2,1,,,,25,23,25,,,,4,6,4,,,,40,60,40,,,,8050,6050,8050,,,
1970,2,68,,100,4.54,1,0,1,100,1,40,5,50,3850,1,,,2,,,40,,,34,,,5,,,10,,,50,,,100,,,3850,,,850,,
1970,2,68,,100,4.54,1,0,2,100,2,34,10,100,850,1,,,1,,,40,,,40,,,5,,,5,,,50,,,50,,,3850,,,3850,,
1970,2,68,,100,4.54,1,0,3,100,1,1,0,1,9999999,1,2,1,,,,40,34,40,,,,5,10,5,,,,50,100,50,,,,3850,850,3850,,,
1970,2,68,,100,4.54,1,0,4,100,2,0,0,1,9999999,1,2,1,,,,40,34,40,,,,5,10,5,,,,50,100,50,,,,3850,850,3850,,,
1970,2,68,,100,4.54,1,0,5,100,2,5,1,11,9999999,1,2,1,,,,40,34,40,,,,5,10,5,,,,50,100,50,,,,3850,850,3850,,,
1970,2,69,,100,4.54,1,0,1,100,2,35,4,40,3850,2,,,,,,35,,,,,,4,,,,,,40,,,,,,3850,,,,,
1970,2,69,,100,4.54,1,0,2,100,2,8,1,15,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,
1970,2,69,,100,4.54,1,0,3,100,2,7,1,15,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,
1970,2,69,,100,4.54,1,0,4,100,2,6,1,14,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,
1970,2,69,,100,4.54,1,0,5,100,2,2,0,1,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,
1970,2,69,,100,4.54,1,0,6,100,1,13,2,23,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,
1970,2,70,,100,4.54,1,0,1,100,1,29,2,26,1750,1,,,,,,29,,,,,,2,,,,,,26,,,,,,1750,,,,,
1970,2,71,,100,4.54,1,0,1,100,1,35,7,70,12050,1,,,,,,35,,,,,,7,,,,,,70,,,,,,12050,,,,,
1970,2,71,,100,4.54,1,0,2,100,2,31,10,100,5450,1,,,,,,35,,,,,,7,,,,,,70,,,,,,12050,,,,,
1970,2,72,,100,4.54,1,0,1,100,2,52,4,40,1550,2,,,,,,52,,,,,,4,,,,,,40,,,,,,1550,,,,,
1970,2,72,,100,4.54,1,0,2,100,2,26,5,50,2450,2,2,,,,,52,52,,,,,4,4,,,,,40,40,,,,,1550,1550,,,,
1970,2,72,,100,4.54,1,0,3,100,2,6,1,14,9999999,2,2,,,,,52,52,,,,,4,4,,,,,40,40,,,,,1550,1550,,,,
1970,2,72,,100,4.54,1,0,4,100,2,2,0,1,9999999,2,2,,,,,52,52,,,,,4,4,,,,,40,40,,,,,1550,1550,,,,
1970,2,72,,100,4.54,1,0,5,100,1,1,0,1,9999999,2,2,,,,,52,52,,,,,4,4,,,,,40,40,,,,,1550,1550,,,,
1970,2,74,,100,4.54,1,0,1,100,2,33,6,60,10050,2,,,,,,33,,,,,,6,,,,,,60,,,,,,10050,,,,,
1970,2,74,,100,4.54,1,0,2,100,1,13,2,22,9999999,2,2,,,,,33,33,,,,,6,6,,,,,60,60,,,,,10050,10050,,,,
1970,2,74,,100,4.54,1,0,3,100,1,11,1,17,9999999,2,2,,,,,33,33,,,,,6,6,,,,,60,60,,,,,10050,10050,,,,
1970,2,75,,100,4.54,1,0,1,100,2,26,9,90,10550,2,,,,,,26,,,,,,9,,,,,,90,,,,,,10550,,,,,
1970,2,76,,100,4.54,1,0,1,100,1,35,11,111,17250,1,,,,,,35,,,,,,11,,,,,,111,,,,,,17250,,,,,
1970,2,77,,100,4.54,1,0,1,100,1,49,6,60,11050,1,,,2,,,49,,,49,,,6,,,6,,,60,,,60,,,11050,,,0,,
1970,2,77,,100,4.54,1,0,2,100,2,49,6,60,0,1,,,1,,,49,,,49,,,6,,,6,,,60,,,60,,,11050,,,11050,,
1970,2,77,,100,4.54,1,0,3,100,2,16,4,40,3550,1,2,1,,,,49,49,49,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,
1970,2,77,,100,4.54,1,0,4,100,1,12,2,23,9999999,1,2,1,,,,49,49,49,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,
1970,2,77,,100,4.54,1,0,5,100,2,20,6,60,6050,1,2,1,,,,49,49,49,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,
1970,2,78,,100,4.54,1,0,1,100,1,72,2,26,2050,1,,,2,,,72,,,65,,,2,,,2,,,26,,,26,,,2050,,,750,,
1970,2,78,,100,4.54,1,0,2,100,2,65,2,26,750,1,,,1,,,72,,,72,,,2,,,2,,,26,,,26,,,2050,,,2050,,
1970,2,79,,100,4.54,1,0,1,100,1,37,6,60,7550,1,,,2,,,37,,,35,,,6,,,9,,,60,,,90,,,7550,,,5550,,
1970,2,79,,100,4.54,1,0,2,100,2,35,9,90,5550,1,,,1,,,37,,,37,,,6,,,6,,,60,,,60,,,7550,,,7550,,
1970,2,80,,100,4.54,1,0,1,100,1,44,6,60,6050,1,,,2,,,44,,,45,,,6,,,6,,,60,,,60,,,6050,,,0,,
1970,2,80,,100,4.54,1,0,2,100,2,45,6,60,0,1,,,1,,,44,,,44,,,6,,,6,,,60,,,60,,,6050,,,6050,,
1970,2,80,,100,4.54,1,0,3,100,1,25,6,60,5050,1,2,1,,,,44,45,44,,,,6,6,6,,,,60,60,60,,,,6050,0,6050,,,
1970,2,81,,100,4.54,1,0,1,100,1,35,2,22,4050,1,,,2,,,35,,,31,,,2,,,2,,,22,,,23,,,4050,,,0,,
1970,2,81,,100,4.54,1,0,2,100,2,31,2,23,0,1,,,1,,,35,,,35,,,2,,,2,,,22,,,22,,,4050,,,4050,,
1970,2,81,,100,4.54,1,0,3,100,2,13,2,23,9999999,1,2,1,,,,35,31,35,,,,2,2,2,,,,22,23,22,,,,4050,0,4050,,,
1970,2,81,,100,4.54,1,0,4,100,2,4,0,2,9999999,1,2,1,,,,35,31,35,,,,2,2,2,,,,22,23,22,,,,4050,0,4050,,,
1970,2,81,,100,4.54,1,0,5,100,2,1,0,1,9999999,1,2,1,,,,35,31,35,,,,2,2,2,,,,22,23,22,,,,4050,0,4050,,,
1970,2,82,,100,4.54,1,0,1,100,1,45,6,60,10050,1,,,2,,,45,,,46,,,6,,,7,,,60,,,70,,,10050,,,4050,,
1970,2,82,,100,4.54,1,0,2,100,2,46,7,70,4050,1,,,1,,,45,,,45,,,6,,,6,,,60,,,60,,,10050,,,10050,,
1970,2,82,,100,4.54,1,0,3,100,1,19,6,60,2050,1,2,1,,,,45,46,45,,,,6,7,6,,,,60,70,60,,,,10050,4050,10050,,,
1970,2,82,,100,4.54,1,0,4,100,2,13,2,23,9999999,1,2,1,,,,45,46,45,,,,6,7,6,,,,60,70,60,,,,10050,4050,10050,,,
1970,2,83,,100,4.54,1,0,1,100,1,58,2,26,9650,1,,,2,,,58,,,59,,,2,,,4,,,26,,,40,,,9650,,,0,,
1970,2,83,,100,4.54,1,0,2,100,2,59,4,40,0,1,,,1,,,58,,,58,,,2,,,2,,,26,,,26,,,9650,,,9650,,
1970,2,83,,100,4.54,1,0,3,100,1,23,10,100,2050,1,2,1,,,,58,59,58,,,,2,4,2,,,,26,40,26,,,,9650,0,9650,,,
1970,2,84,,100,4.54,1,0,1,100,1,68,2,26,5050,1,,,,,,68,,,,,,2,,,,,,26,,,,,,5050,,,,,
1970,2,85,,100,4.54,1,0,1,100,1,40,2,22,8050,1,,,2,,,40,,,35,,,2,,,1,,,22,,,17,,,8050,,,0,,
1970,2,85,,100,4.54,1,0,2,100,2,35,1,17,0,1,,,1,,,40,,,40,,,2,,,2,,,22,,,22,,,8050,,,8050,,
1970,2,85,,100,4.54,1,0,3,100,2,1,0,1,9999999,1,2,1,,,,40,35,40,,,,2,1,2,,,,22,17,22,,,,8050,0,8050,,,
1970,2,85,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,2,1,,,,40,35,40,,,,2,1,2,,,,22,17,22,,,,8050,0,8050,,,
1970,2,86,,100,4.54,1,0,1,100,1,56,2,23,7950,1,,,,,,56,,,,,,2,,,,,,23,,,,,,7950,,,,,
1970,2,86,,100,4.54,1,0,2,100,2,82,2,26,1150,1,,1,,,,56,,56,,,,2,,2,,,,23,,23,,,,7950,,7950,,,
1970,2,87,,100,4.54,1,0,1,100,1,28,6,60,12150,1,,,2,,,28,,,28,,,6,,,6,,,60,,,60,,,12150,,,0,,
1970,2,87,,100,4.54,1,0,2,100,2,28,6,60,0,1,,,1,,,28,,,28,,,6,,,6,,,60,,,60,,,12150,,,12150,,
1970,2,87,,100,4.54,1,0,3,100,1,1,0,1,9999999,1,2,1,,,,28,28,28,,,,6,6,6,,,,60,60,60,,,,12150,0,12150,,,
1970,2,88,,100,4.54,1,0,1,100,2,70,2,26,3550,2,,,,,,70,,,,,,2,,,,,,26,,,,,,3550,,,,,
1970,2,89,,100,4.54,1,0,1,100,1,54,2,26,7150,1,,,2,,,54,,,55,,,2,,,4,,,26,,,40,,,7150,,,0,,
1970,2,89,,100,4.54,1,0,2,100,2,55,4,40,0,1,,,1,,,54,,,54,,,2,,,2,,,26,,,26,,,7150,,,7150,,
1970,2,90,,100,4.54,1,0,1,100,2,42,6,60,0,2,,,,,,42,,,,,,6,,,,,,60,,,,,,0,,,,,
1970,2,90,,100,4.54,1,0,2,100,1,14,2,26,0,2,2,,,,,42,42,,,,,6,6,,,,,60,60,,,,,0,0,,,,
1970,2,90,,100,4.54,1,0,3,100,1,13,2,23,9999999,2,2,,,,,42,42,,,,,6,6,,,,,60,60,,,,,0,0,,,,
1970,2,90,,100,4.54,1,0,4,100,1,11,2,22,9999999,2,2,,,,,42,42,,,,,6,6,,,,,60,60,,,,,0,0,,,,
1970,2,90,,100,4.54,1,0,5,100,2,8,1,14,9999999,2,2,,,,,42,42,,,,,6,6,,,,,60,60,,,,,0,0,,,,
1970,2,91,,100,4.54,1,0,1,100,2,36,6,60,3250,2,,,,,,36,,,,,,6,,,,,,60,,,,,,3250,,,,,
1970,2,92,,100,4.54,1,0,1,100,1,42,4,40,6250,1,,,2,,,42,,,33,,,4,,,4,,,40,,,40,,,6250,,,750,,
1970,2,92,,100,4.54,1,0,2,100,2,33,4,40,750,1,,,1,,,42,,,42,,,4,,,4,,,40,,,40,,,6250,,,6250,,
1970,2,92,,100,4.54,1,0,3,100,2,13,2,25,9999999,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,
1970,2,92,,100,4.54,1,0,4,100,1,12,1,17,9999999,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,
1970,2,92,,100,4.54,1,0,5,100,1,10,1,15,9999999,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,
1970,2,92,,100,4.54,1,0,6,100,2,6,1,12,9999999,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,
1970,2,92,,100,4.54,1,0,7,100,2,95,2,23,1750,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,
1970,2,93,,100,4.54,1,0,1,100,1,35,10,100,12050,1,,,2,,,35,,,31,,,10,,,6,,,100,,,60,,,12050,,,3250,,
1970,2,93,,100,4.54,1,0,2,100,2,31,6,60,3250,1,,,1,,,35,,,35,,,10,,,10,,,100,,,100,,,12050,,,12050,,
1970,2,93,,100,4.54,1,0,3,100,1,14,2,26,0,1,2,1,,,,35,31,35,,,,10,6,10,,,,100,60,100,,,,12050,3250,12050,,,
1970,2,93,,100,4.54,1,0,4,100,2,12,2,23,9999999,1,2,1,,,,35,31,35,,,,10,6,10,,,,100,60,100,,,,12050,3250,12050,,,
1970,2,93,,100,4.54,1,0,5,100,2,10,1,16,9999999,1,2,1,,,,35,31,35,,,,10,6,10,,,,100,60,100,,,,12050,3250,12050,,,
1970,2,94,,100,4.54,1,0,1,100,2,72,9,90,6350,2,,,,,,72,,,,,,9,,,,,,90,,,,,,6350,,,,,
1970,2,94,,100,4.54,1,0,2,100,2,32,11,111,8650,2,2,,,,,72,72,,,,,9,9,,,,,90,90,,,,,6350,6350,,,,
1970,2,94,,100,4.54,1,0,3,100,2,42,6,60,7050,2,2,,,,,72,72,,,,,9,9,,,,,90,90,,,,,6350,6350,,,,
1970,2,95,,100,4.54,1,0,1,100,1,50,5,50,16150,1,,,2,,,50,,,48,,,5,,,6,,,50,,,60,,,16150,,,50,,
1970,2,95,,100,4.54,1,0,2,100,2,48,6,60,50,1,,,1,,,50,,,50,,,5,,,5,,,50,,,50,,,16150,,,16150,,
1970,2,95,,100,4.54,1,0,3,100,2,15,3,30,0,1,2,1,,,,50,48,50,,,,5,6,5,,,,50,60,50,,,,16150,50,16150,,,
1970,2,95,,100,4.54,1,0,4,100,1,13,2,23,9999999,1,2,1,,,,50,48,50,,,,5,6,5,,,,50,60,50,,,,16150,50,16150,,,
1970,2,96,,100,4.54,1,0,1,100,1,21,4,40,12050,1,,,2,,,21,,,19,,,4,,,6,,,40,,,60,,,12050,,,12050,,
1970,2,96,,100,4.54,1,0,2,100,2,19,6,60,12050,1,,,1,,,21,,,21,,,4,,,4,,,40,,,40,,,12050,,,12050,,
1970,2,97,,100,4.54,1,0,1,100,1,66,4,40,7150,1,,,2,,,66,,,64,,,4,,,2,,,40,,,23,,,7150,,,550,,
1970,2,97,,100,4.54,1,0,2,100,2,64,2,23,550,1,,,1,,,66,,,66,,,4,,,4,,,40,,,40,,,7150,,,7150,,
1970,2,98,,100,4.54,1,0,1,100,1,56,6,60,11050,1,,,2,,,56,,,53,,,6,,,6,,,60,,,60,,,11050,,,0,,
1970,2,98,,100,4.54,1,0,2,100,2,53,6,60,0,1,,,1,,,56,,,56,,,6,,,6,,,60,,,60,,,11050,,,11050,,
1970,2,98,,100,4.54,1,0,3,100,1,29,7,70,5050,1,2,1,,,,56,53,56,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,
1970,2,98,,100,4.54,1,0,4,100,1,18,5,50,0,1,2,1,,,,56,53,56,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,
1970,2,99,,100,4.54,1,0,1,100,1,51,8,80,12050,1,,,2,,,51,,,55,,,8,,,4,,,80,,,40,,,12050,,,0,,
1970,2,99,,100,4.54,1,0,2,100,2,55,4,40,0,1,,,1,,,51,,,51,,,8,,,8,,,80,,,80,,,12050,,,12050,,
1970,2,99,,100,4.54,1,0,3,100,2,11,2,22,9999999,1,2,1,,,,51,55,51,,,,8,4,8,,,,80,40,80,,,,12050,0,12050,,,
1970,2,100,,100,4.54,1,0,1,100,2,56,4,40,6250,2,,,,,,56,,,,,,4,,,,,,40,,,,,,6250,,,,,
1970,2,101,,100,4.54,1,0,1,100,1,42,9,90,21850,1,,,2,,,42,,,39,,,9,,,6,,,90,,,60,,,21850,,,650,,
1970,2,101,,100,4.54,1,0,2,100,2,39,6,60,650,1,,,1,,,42,,,42,,,9,,,9,,,90,,,90,,,21850,,,21850,,
1970,2,101,,100,4.54,1,0,3,100,1,18,5,50,650,1,2,1,,,,42,39,42,,,,9,6,9,,,,90,60,90,,,,21850,650,21850,,,
1970,2,101,,100,4.54,1,0,4,100,2,11,2,22,9999999,1,2,1,,,,42,39,42,,,,9,6,9,,,,90,60,90,,,,21850,650,21850,,,
1970,2,102,,100,4.54,1,0,1,100,1,49,6,60,19150,1,,,2,,,49,,,46,,,6,,,6,,,60,,,60,,,19150,,,0,,
1970,2,102,,100,4.54,1,0,2,100,2,46,6,60,0,1,,,1,,,49,,,49,,,6,,,6,,,60,,,60,,,19150,,,19150,,
1970,2,102,,100,4.54,1,0,3,100,2,9,1,16,9999999,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,
1970,2,102,,100,4.54,1,0,4,100,1,6,1,12,9999999,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,
1970,2,102,,100,4.54,1,0,5,100,1,17,5,50,2050,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,
1970,2,102,,100,4.54,1,0,6,100,1,16,3,30,1450,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,
1970,2,102,,100,4.54,1,0,7,100,1,16,3,30,1450,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,
1970,2,102,,100,4.54,1,0,8,100,2,11,2,22,9999999,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,
1970,2,103,,100,4.54,1,0,1,100,1,59,7,70,8850,1,,,,,,59,,,,,,7,,,,,,70,,,,,,8850,,,,,
1970,2,103,,100,4.54,1,0,2,100,2,69,6,60,450,1,,,,,,59,,,,,,7,,,,,,70,,,,,,8850,,,,,
1970,2,104,,100,4.54,1,0,1,100,1,59,5,50,6750,1,,,2,,,59,,,59,,,5,,,2,,,50,,,26,,,6750,,,4850,,
1970,2,104,,100,4.54,1,0,2,100,2,59,2,26,4850,1,,,1,,,59,,,59,,,5,,,5,,,50,,,50,,,6750,,,6750,,
1970,2,104,,100,4.54,1,0,3,100,1,20,6,60,1650,1,2,1,,,,59,59,59,,,,5,2,5,,,,50,26,50,,,,6750,4850,6750,,,
1970,2,105,,100,4.54,1,0,1,100,2,55,7,70,9450,2,,,,,,55,,,,,,7,,,,,,70,,,,,,9450,,,,,
1970,2,106,,100,4.54,1,0,1,100,1,63,2,23,7950,1,,,2,,,63,,,57,,,2,,,4,,,23,,,40,,,7950,,,4650,,
1970,2,106,,100,4.54,1,0,2,100,2,57,4,40,4650,1,,,1,,,63,,,63,,,2,,,2,,,23,,,23,,,7950,,,7950,,
1970,2,106,,100,4.54,1,0,3,100,1,21,8,80,1450,1,2,1,,,,63,57,63,,,,2,4,2,,,,23,40,23,,,,7950,4650,7950,,,
1970,2,107,,100,4.54,1,0,1,100,1,40,3,30,18650,1,,,2,,,40,,,37,,,3,,,6,,,30,,,60,,,18650,,,0,,
1970,2,107,,100,4.54,1,0,2,100,2,37,6,60,0,1,,,1,,,40,,,40,,,3,,,3,,,30,,,30,,,18650,,,18650,,
1970,2,107,,100,4.54,1,0,3,100,1,14,2,26,0,1,2,1,,,,40,37,40,,,,3,6,3,,,,30,60,30,,,,18650,0,18650,,,
1970,2,107,,100,4.54,1,0,4,100,1,8,1,15,9999999,1,2,1,,,,40,37,40,,,,3,6,3,,,,30,60,30,,,,18650,0,18650,,,
1970,2,108,,100,4.54,1,0,1,100,1,36,6,65,4050,1,,,2,,,36,,,36,,,6,,,2,,,65,,,26,,,4050,,,1550,,
1970,2,108,,100,4.54,1,0,2,100,2,36,2,26,1550,1,,,1,,,36,,,36,,,6,,,6,,,65,,,65,,,4050,,,4050,,
1970,2,108,,100,4.54,1,0,3,100,2,17,5,50,950,1,2,1,,,,36,36,36,,,,6,2,6,,,,65,26,65,,,,4050,1550,4050,,,
1970,2,108,,100,4.54,1,0,4,100,2,14,2,26,0,1,2,1,,,,36,36,36,,,,6,2,6,,,,65,26,65,,,,4050,1550,4050,,,
1970,2,108,,100,4.54,1,0,5,100,1,10,1,17,9999999,1,2,1,,,,36,36,36,,,,6,2,6,,,,65,26,65,,,,4050,1550,4050,,,
1970,2,108,,100,4.54,1,0,6,100,2,8,1,15,9999999,1,2,1,,,,36,36,36,,,,6,2,6,,,,65,26,65,,,,4050,1550,4050,,,
1970,2,109,,100,4.54,1,0,1,100,1,37,10,100,1550,1,,,,,,37,,,,,,10,,,,,,100,,,,,,1550,,,,,
1970,2,109,,100,4.54,1,0,2,100,1,48,10,100,11050,1,,,,,,37,,,,,,10,,,,,,100,,,,,,1550,,,,,
1970,2,110,,100,4.54,1,0,1,100,2,78,6,60,3950,2,,,,,,78,,,,,,6,,,,,,60,,,,,,3950,,,,,
1970,2,111,,100,4.54,1,0,1,100,1,32,4,40,6050,1,,,,,,32,,,,,,4,,,,,,40,,,,,,6050,,,,,
1970,2,112,,100,4.54,1,0,1,100,2,63,2,25,250,2,,,,,,63,,,,,,2,,,,,,25,,,,,,250,,,,,
1970,2,113,,100,4.54,1,0,1,100,1,42,5,50,10050,1,,,2,,,42,,,34,,,5,,,6,,,50,,,60,,,10050,,,550,,
1970,2,113,,100,4.54,1,0,2,100,2,34,6,60,550,1,,,1,,,42,,,42,,,5,,,5,,,50,,,50,,,10050,,,10050,,
1970,2,113,,100,4.54,1,0,3,100,1,13,2,23,9999999,1,2,1,,,,42,34,42,,,,5,6,5,,,,50,60,50,,,,10050,550,10050,,,
1970,2,113,,100,4.54,1,0,4,100,1,12,2,22,9999999,1,2,1,,,,42,34,42,,,,5,6,5,,,,50,60,50,,,,10050,550,10050,,,
1970,2,113,,100,4.54,1,0,5,100,2,11,1,17,9999999,1,2,1,,,,42,34,42,,,,5,6,5,,,,50,60,50,,,,10050,550,10050,,,
1970,2,114,,100,4.54,1,0,1,100,1,82,2,26,0,1,,,2,,,82,,,75,,,2,,,2,,,26,,,26,,,0,,,1450,,
1970,2,114,,100,4.54,1,0,2,100,2,75,2,26,1450,1,,,1,,,82,,,82,,,2,,,2,,,26,,,26,,,0,,,0,,
1970,2,114,,100,4.54,1,0,3,100,1,47,0,2,0,1,2,1,,,,82,75,82,,,,2,2,2,,,,26,26,26,,,,0,1450,0,,,
1970,2,115,,100,4.54,1,0,1,100,2,70,2,23,1250,2,,,,,,70,,,,,,2,,,,,,23,,,,,,1250,,,,,
1970,2,116,,100,4.54,1,0,1,100,1,65,3,30,3450,1,,,2,,,65,,,65,,,3,,,6,,,30,,,60,,,3450,,,350,,
1970,2,116,,100,4.54,1,0,2,100,2,65,6,60,350,1,,,1,,,65,,,65,,,3,,,3,,,30,,,30,,,3450,,,3450,,
1970,2,116,,100,4.54,1,0,3,100,1,42,7,70,6750,1,2,1,,,,65,65,65,,,,3,6,3,,,,30,60,30,,,,3450,350,3450,,,
1970,2,117,,100,4.54,1,0,1,100,1,34,6,60,14050,1,,,2,,,34,,,31,,,6,,,7,,,60,,,70,,,14050,,,0,,
1970,2,117,,100,4.54,1,0,2,100,2,31,7,70,0,1,,,1,,,34,,,34,,,6,,,6,,,60,,,60,,,14050,,,14050,,
1970,2,117,,100,4.54,1,0,3,100,1,8,1,15,9999999,1,2,1,,,,34,31,34,,,,6,7,6,,,,60,70,60,,,,14050,0,14050,,,
1970,2,117,,100,4.54,1,0,4,100,2,7,1,14,9999999,1,2,1,,,,34,31,34,,,,6,7,6,,,,60,70,60,,,,14050,0,14050,,,
1970,2,117,,100,4.54,1,0,5,100,2,5,1,11,9999999,1,2,1,,,,34,31,34,,,,6,7,6,,,,60,70,60,,,,14050,0,14050,,,
1970,2,117,,100,4.54,1,0,6,100,2,1,0,1,9999999,1,2,1,,,,34,31,34,,,,6,7,6,,,,60,70,60,,,,14050,0,14050,,,
1970,2,118,,100,4.54,1,0,1,100,1,23,6,60,6050,1,,,2,,,23,,,20,,,6,,,7,,,60,,,70,,,6050,,,6050,,
1970,2,118,,100,4.54,1,0,2,100,2,20,7,70,6050,1,,,1,,,23,,,23,,,6,,,6,,,60,,,60,,,6050,,,6050,,
1970,2,119,,100,4.54,1,0,1,100,1,24,11,110,8650,1,,,2,,,24,,,25,,,11,,,6,,,110,,,60,,,8650,,,4050,,
1970,2,119,,100,4.54,1,0,2,100,2,25,6,60,4050,1,,,1,,,24,,,24,,,11,,,11,,,110,,,110,,,8650,,,8650,,
1970,2,119,,100,4.54,1,0,3,100,2,0,0,1,9999999,1,2,1,,,,24,25,24,,,,11,6,11,,,,110,60,110,,,,8650,4050,8650,,,
1970,2,120,,100,4.54,1,0,1,100,1,35,2,26,10150,1,,,2,,,35,,,29,,,2,,,6,,,26,,,60,,,10150,,,2550,,
1970,2,120,,100,4.54,1,0,2,100,2,29,6,60,2550,1,,,1,,,35,,,35,,,2,,,2,,,26,,,26,,,10150,,,10150,,
1970,2,120,,100,4.54,1,0,3,100,2,2,0,1,9999999,1,2,1,,,,35,29,35,,,,2,6,2,,,,26,60,26,,,,10150,2550,10150,,,
1970,2,120,,100,4.54,1,0,4,100,1,4,0,2,9999999,1,2,1,,,,35,29,35,,,,2,6,2,,,,26,60,26,,,,10150,2550,10150,,,
1970,2,121,,100,4.54,1,0,1,100,1,45,4,40,12550,1,,,,,,45,,,,,,4,,,,,,40,,,,,,12550,,,,,
1970,2,122,,100,4.54,1,0,1,100,1,64,6,60,5550,1,,,2,,,64,,,62,,,6,,,2,,,60,,,26,,,5550,,,0,,
1970,2,122,,100,4.54,1,0,2,100,2,62,2,26,0,1,,,1,,,64,,,64,,,6,,,6,,,60,,,60,,,5550,,,5550,,
1970,2,123,,100,4.54,1,0,1,100,1,26,1,12,1950,1,,,2,,,26,,,27,,,1,,,8,,,12,,,80,,,1950,,,550,,
1970,2,123,,100,4.54,1,0,2,100,2,27,8,80,550,1,,,1,,,26,,,26,,,1,,,1,,,12,,,12,,,1950,,,1950,,
1970,2,123,,100,4.54,1,0,3,100,1,4,0,2,9999999,1,2,1,,,,26,27,26,,,,1,8,1,,,,12,80,12,,,,1950,550,1950,,,
1970,2,123,,100,4.54,1,0,4,100,1,4,0,2,9999999,1,2,1,,,,26,27,26,,,,1,8,1,,,,12,80,12,,,,1950,550,1950,,,
1970,2,123,,100,4.54,1,0,5,100,2,2,0,1,9999999,1,2,1,,,,26,27,26,,,,1,8,1,,,,12,80,12,,,,1950,550,1950,,,
1970,2,124,,100,4.54,1,0,1,100,1,44,2,23,10750,1,,,2,,,44,,,48,,,2,,,2,,,23,,,26,,,10750,,,0,,
1970,2,124,,100,4.54,1,0,2,100,2,48,2,26,0,1,,,1,,,44,,,44,,,2,,,2,,,23,,,23,,,10750,,,10750,,
1970,2,124,,100,4.54,1,0,3,100,2,10,1,17,9999999,1,2,1,,,,44,48,44,,,,2,2,2,,,,23,26,23,,,,10750,0,10750,,,
1970,2,125,,100,4.54,1,0,1,100,1,55,2,26,11450,1,,,2,,,55,,,50,,,2,,,7,,,26,,,70,,,11450,,,7050,,
1970,2,125,,100,4.54,1,0,2,100,2,50,7,70,7050,1,,,1,,,55,,,55,,,2,,,2,,,26,,,26,,,11450,,,11450,,
1970,2,126,,100,4.54,1,0,1,100,1,53,2,26,11150,1,,,2,,,53,,,49,,,2,,,2,,,26,,,26,,,11150,,,0,,
1970,2,126,,100,4.54,1,0,2,100,2,49,2,26,0,1,,,1,,,53,,,53,,,2,,,2,,,26,,,26,,,11150,,,11150,,
1970,2,126,,100,4.54,1,0,3,100,1,21,5,50,750,1,2,1,,,,53,49,53,,,,2,2,2,,,,26,26,26,,,,11150,0,11150,,,
1970,2,126,,100,4.54,1,0,4,100,2,15,3,30,0,1,2,1,,,,53,49,53,,,,2,2,2,,,,26,26,26,,,,11150,0,11150,,,
1970,2,127,,100,4.54,1,0,1,100,1,40,8,80,11550,1,,,2,,,40,,,39,,,8,,,6,,,80,,,65,,,11550,,,2950,,
1970,2,127,,100,4.54,1,0,2,100,2,39,6,65,2950,1,,,1,,,40,,,40,,,8,,,8,,,80,,,80,,,11550,,,11550,,
1970,2,127,,100,4.54,1,0,3,100,2,17,5,50,250,1,2,1,,,,40,39,40,,,,8,6,8,,,,80,65,80,,,,11550,2950,11550,,,
1970,2,127,,100,4.54,1,0,4,100,1,14,2,26,350,1,2,1,,,,40,39,40,,,,8,6,8,,,,80,65,80,,,,11550,2950,11550,,,
1970,2,127,,100,4.54,1,0,5,100,2,8,1,15,9999999,1,2,1,,,,40,39,40,,,,8,6,8,,,,80,65,80,,,,11550,2950,11550,,,
1970,2,128,,100,4.54,1,0,1,100,1,48,3,30,9050,1,,,2,,,48,,,44,,,3,,,3,,,30,,,30,,,9050,,,0,,
1970,2,128,,100,4.54,1,0,2,100,2,44,3,30,0,1,,,1,,,48,,,48,,,3,,,3,,,30,,,30,,,9050,,,9050,,
1970,2,128,,100,4.54,1,0,3,100,1,21,6,60,6850,1,2,1,,,,48,44,48,,,,3,3,3,,,,30,30,30,,,,9050,0,9050,,,
1970,2,128,,100,4.54,1,0,4,100,2,16,3,30,0,1,2,1,,,,48,44,48,,,,3,3,3,,,,30,30,30,,,,9050,0,9050,,,
1970,2,128,,100,4.54,1,0,5,100,2,15,3,30,0,1,2,1,,,,48,44,48,,,,3,3,3,,,,30,30,30,,,,9050,0,9050,,,
1970,2,128,,100,4.54,1,0,6,100,2,11,2,22,9999999,1,2,1,,,,48,44,48,,,,3,3,3,,,,30,30,30,,,,9050,0,9050,,,
1970,2,130,,100,4.54,1,0,1,100,1,61,2,22,10250,1,,,2,,,61,,,58,,,2,,,2,,,22,,,25,,,10250,,,3050,,
1970,2,130,,100,4.54,1,0,2,100,2,58,2,25,3050,1,,,1,,,61,,,61,,,2,,,2,,,22,,,22,,,10250,,,10250,,
1970,2,131,,100,4.54,1,0,1,100,1,59,4,40,6550,1,,,,,,59,,,,,,4,,,,,,40,,,,,,6550,,,,,
1970,2,132,,100,4.54,1,0,1,100,1,44,2,26,8850,1,,,2,,,44,,,39,,,2,,,1,,,26,,,17,,,8850,,,0,,
1970,2,132,,100,4.54,1,0,2,100,2,39,1,17,0,1,,,1,,,44,,,44,,,2,,,2,,,26,,,26,,,8850,,,8850,,
1970,2,132,,100,4.54,1,0,3,100,1,15,4,40,0,1,2,1,,,,44,39,44,,,,2,1,2,,,,26,17,26,,,,8850,0,8850,,,
1970,2,132,,100,4.54,1,0,4,100,2,10,1,17,9999999,1,2,1,,,,44,39,44,,,,2,1,2,,,,26,17,26,,,,8850,0,8850,,,
1970,2,134,,100,4.54,1,0,1,100,1,22,1,16,4850,1,,,,,,22,,,,,,1,,,,,,16,,,,,,4850,,,,,
1970,2,134,,100,4.54,1,0,2,100,1,20,2,23,4850,1,,,,,,22,,,,,,1,,,,,,16,,,,,,4850,,,,,
1970,2,135,,100,4.54,1,0,1,100,1,72,2,25,3350,1,,,2,,,72,,,75,,,2,,,2,,,25,,,26,,,3350,,,0,,
1970,2,135,,100,4.54,1,0,2,100,2,75,2,26,0,1,,,1,,,72,,,72,,,2,,,2,,,25,,,25,,,3350,,,3350,,
1970,2,136,,100,4.54,3,0,1,100,1,14,2,23,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1970,2,137,,100,4.54,1,0,1,100,1,26,6,60,650,1,,,2,,,26,,,23,,,6,,,6,,,60,,,60,,,650,,,0,,
1970,2,137,,100,4.54,1,0,2,100,2,23,6,60,0,1,,,1,,,26,,,26,,,6,,,6,,,60,,,60,,,650,,,650,,
1970,2,137,,100,4.54,1,0,3,100,1,3,0,2,9999999,1,2,1,,,,26,23,26,,,,6,6,6,,,,60,60,60,,,,650,0,650,,,
1970,2,137,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,26,23,26,,,,6,6,6,,,,60,60,60,,,,650,0,650,,,
1970,2,137,,100,4.54,1,0,5,100,1,0,0,1,9999999,1,2,1,,,,26,23,26,,,,6,6,6,,,,60,60,60,,,,650,0,650,,,
1970,2,138,,100,4.54,1,0,1,100,1,32,6,60,-850,1,,,,,,32,,,,,,6,,,,,,60,,,,,,-850,,,,,
1970,2,138,,100,4.54,1,0,2,100,2,38,6,60,2650,1,,,,,,32,,,,,,6,,,,,,60,,,,,,-850,,,,,
1970,2,140,,100,4.54,1,0,1,100,1,57,1,15,3050,1,,,2,,,57,,,52,,,1,,,2,,,15,,,25,,,3050,,,0,,
1970,2,140,,100,4.54,1,0,2,100,2,52,2,25,0,1,,,1,,,57,,,57,,,1,,,1,,,15,,,15,,,3050,,,3050,,
1970,2,140,,100,4.54,1,0,3,100,1,30,6,60,4050,1,2,1,,,,57,52,57,,,,1,2,1,,,,15,25,15,,,,3050,0,3050,,,
1970,2,141,,100,4.54,1,0,1,100,2,75,2,23,5550,2,,,,,,75,,,,,,2,,,,,,23,,,,,,5550,,,,,
1970,2,142,,100,4.54,1,0,1,100,1,76,2,22,750,1,,,,,,76,,,,,,2,,,,,,22,,,,,,750,,,,,
1970,2,142,,100,4.54,1,0,2,100,2,72,2,26,250,1,,,,,,76,,,,,,2,,,,,,22,,,,,,750,,,,,
1970,2,143,,100,4.54,1,0,1,100,1,58,6,60,7050,1,,,2,,,58,,,57,,,6,,,6,,,60,,,60,,,7050,,,6550,,
1970,2,143,,100,4.54,1,0,2,100,2,57,6,60,6550,1,,,1,,,58,,,58,,,6,,,6,,,60,,,60,,,7050,,,7050,,
1970,2,144,,100,4.54,1,0,1,100,1,30,11,110,15050,1,,,2,,,30,,,29,,,11,,,9,,,110,,,90,,,15050,,,0,,
1970,2,144,,100,4.54,1,0,2,100,2,29,9,90,0,1,,,1,,,30,,,30,,,11,,,11,,,110,,,110,,,15050,,,15050,,
1970,2,144,,100,4.54,1,0,3,100,2,3,0,2,9999999,1,2,1,,,,30,29,30,,,,11,9,11,,,,110,90,110,,,,15050,0,15050,,,
1970,2,145,,100,4.54,1,0,1,100,1,30,2,26,5150,1,,,2,,,30,,,29,,,2,,,4,,,26,,,40,,,5150,,,3850,,
1970,2,145,,100,4.54,1,0,2,100,2,29,4,40,3850,1,,,1,,,30,,,30,,,2,,,2,,,26,,,26,,,5150,,,5150,,
1970,2,145,,100,4.54,1,0,3,100,2,12,2,22,9999999,1,2,1,,,,30,29,30,,,,2,4,2,,,,26,40,26,,,,5150,3850,5150,,,
1970,2,145,,100,4.54,1,0,4,100,1,10,1,17,9999999,1,2,1,,,,30,29,30,,,,2,4,2,,,,26,40,26,,,,5150,3850,5150,,,
1970,2,145,,100,4.54,1,0,5,100,2,8,1,14,9999999,1,2,1,,,,30,29,30,,,,2,4,2,,,,26,40,26,,,,5150,3850,5150,,,
1970,2,145,,100,4.54,1,0,6,100,2,3,0,2,9999999,1,2,1,,,,30,29,30,,,,2,4,2,,,,26,40,26,,,,5150,3850,5150,,,
1970,2,146,,100,4.54,1,0,1,100,2,70,6,60,1950,2,,,,,,70,,,,,,6,,,,,,60,,,,,,1950,,,,,
1970,2,146,,100,4.54,1,0,2,100,1,37,2,26,7550,2,2,,,,,70,70,,,,,6,6,,,,,60,60,,,,,1950,1950,,,,
1970,2,147,,100,4.54,1,0,1,100,1,34,9,90,13050,1,,,2,,,34,,,34,,,9,,,11,,,90,,,110,,,13050,,,0,,
1970,2,147,,100,4.54,1,0,2,100,2,34,11,110,0,1,,,1,,,34,,,34,,,9,,,9,,,90,,,90,,,13050,,,13050,,
1970,2,147,,100,4.54,1,0,3,100,2,9,1,16,9999999,1,2,1,,,,34,34,34,,,,9,11,9,,,,90,110,90,,,,13050,0,13050,,,
1970,2,147,,100,4.54,1,0,4,100,1,7,1,14,9999999,1,2,1,,,,34,34,34,,,,9,11,9,,,,90,110,90,,,,13050,0,13050,,,
1970,2,148,,100,4.54,1,0,1,100,2,79,6,60,1150,2,,,,,,79,,,,,,6,,,,,,60,,,,,,1150,,,,,
1970,2,149,,100,4.54,1,0,1,100,1,70,2,26,11050,1,,,2,,,70,,,68,,,2,,,2,,,26,,,26,,,11050,,,0,,
1970,2,149,,100,4.54,1,0,2,100,2,68,2,26,0,1,,,1,,,70,,,70,,,2,,,2,,,26,,,26,,,11050,,,11050,,
1970,2,150,,100,4.54,1,0,1,100,1,35,6,60,15050,1,,,2,,,35,,,31,,,6,,,6,,,60,,,60,,,15050,,,1550,,
1970,2,150,,100,4.54,1,0,2,100,2,31,6,60,1550,1,,,1,,,35,,,35,,,6,,,6,,,60,,,60,,,15050,,,15050,,
1970,2,150,,100,4.54,1,0,3,100,1,8,1,14,9999999,1,2,1,,,,35,31,35,,,,6,6,6,,,,60,60,60,,,,15050,1550,15050,,,
1970,2,150,,100,4.54,1,0,4,100,2,5,1,11,9999999,1,2,1,,,,35,31,35,,,,6,6,6,,,,60,60,60,,,,15050,1550,15050,,,
1970,2,152,,100,4.54,1,0,1,100,1,58,6,60,12650,1,,,2,,,58,,,55,,,6,,,2,,,60,,,26,,,12650,,,0,,
1970,2,152,,100,4.54,1,0,2,100,2,55,2,26,0,1,,,1,,,58,,,58,,,6,,,6,,,60,,,60,,,12650,,,12650,,
1970,2,153,,100,4.54,1,0,1,100,2,49,4,40,4550,2,,,,,,49,,,,,,4,,,,,,40,,,,,,4550,,,,,
1970,2,153,,100,4.54,1,0,2,100,1,39,5,50,6050,2,2,,,,,49,49,,,,,4,4,,,,,40,40,,,,,4550,4550,,,,
1970,2,154,,100,4.54,3,0,1,100,1,73,2,26,4250,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1970,2,155,,100,4.54,1,0,1,100,1,88,4,40,1550,1,,,2,,,88,,,84,,,4,,,2,,,40,,,26,,,1550,,,850,,
1970,2,155,,100,4.54,1,0,2,100,2,84,2,26,850,1,,,1,,,88,,,88,,,4,,,4,,,40,,,40,,,1550,,,1550,,
1970,2,156,,100,4.54,1,0,1,100,1,25,5,50,10050,1,,,2,,,25,,,24,,,5,,,6,,,50,,,60,,,10050,,,750,,
1970,2,156,,100,4.54,1,0,2,100,2,24,6,60,750,1,,,1,,,25,,,25,,,5,,,5,,,50,,,50,,,10050,,,10050,,
1970,2,156,,100,4.54,1,0,3,100,2,6,1,12,9999999,1,2,1,,,,25,24,25,,,,5,6,5,,,,50,60,50,,,,10050,750,10050,,,
1970,2,156,,100,4.54,1,0,4,100,1,3,0,2,9999999,1,2,1,,,,25,24,25,,,,5,6,5,,,,50,60,50,,,,10050,750,10050,,,
1970,2,157,,100,4.54,1,0,1,100,2,56,0,2,3450,2,,,,,,56,,,,,,0,,,,,,2,,,,,,3450,,,,,
1970,2,158,,100,4.54,3,0,1,100,2,79,6,60,2250,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1970,2,159,,100,4.54,1,0,1,100,1,25,8,80,1850,1,,,2,,,25,,,36,,,8,,,6,,,80,,,60,,,1850,,,3250,,
1970,2,159,,100,4.54,1,0,2,100,2,36,6,60,3250,1,,,1,,,25,,,25,,,8,,,8,,,80,,,80,,,1850,,,1850,,
1970,2,160,,100,4.54,1,0,1,100,2,28,6,60,3550,2,,,,,,28,,,,,,6,,,,,,60,,,,,,3550,,,,,
1970,2,160,,100,4.54,1,0,2,100,1,7,1,12,9999999,2,2,,,,,28,28,,,,,6,6,,,,,60,60,,,,,3550,3550,,,,
1970,2,161,,100,4.54,1,0,1,100,2,21,8,80,1350,2,,,,,,21,,,,,,8,,,,,,80,,,,,,1350,,,,,
1970,2,161,,100,4.54,1,0,2,100,2,23,10,100,2750,2,,,,,,21,,,,,,8,,,,,,80,,,,,,1350,,,,,
1970,2,162,,100,4.54,1,0,1,100,1,22,10,100,2050,1,,,2,,,22,,,22,,,10,,,10,,,100,,,100,,,2050,,,1250,,
1970,2,162,,100,4.54,1,0,2,100,2,22,10,100,1250,1,,,1,,,22,,,22,,,10,,,10,,,100,,,100,,,2050,,,2050,,
1970,2,163,,100,4.54,1,0,1,100,2,60,6,60,4350,2,,,,,,60,,,,,,6,,,,,,60,,,,,,4350,,,,,
1970,2,164,,100,4.54,1,0,1,100,1,20,8,80,2250,1,,,,,,20,,,,,,8,,,,,,80,,,,,,2250,,,,,
1970,2,165,,100,4.54,1,0,1,100,1,78,2,23,3250,1,,,,,,78,,,,,,2,,,,,,23,,,,,,3250,,,,,
1970,2,166,,100,4.54,1,0,1,100,1,24,10,100,9050,1,,,2,,,24,,,24,,,10,,,8,,,100,,,80,,,9050,,,3250,,
1970,2,166,,100,4.54,1,0,2,100,2,24,8,80,3250,1,,,1,,,24,,,24,,,10,,,10,,,100,,,100,,,9050,,,9050,,
1970,2,166,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,24,24,24,,,,10,8,10,,,,100,80,100,,,,9050,3250,9050,,,
1970,2,167,,100,4.54,1,0,1,100,1,34,10,100,17050,1,,,2,,,34,,,33,,,10,,,10,,,100,,,100,,,17050,,,0,,
1970,2,167,,100,4.54,1,0,2,100,2,33,10,100,0,1,,,1,,,34,,,34,,,10,,,10,,,100,,,100,,,17050,,,17050,,
1970,2,167,,100,4.54,1,0,3,100,1,9,1,16,9999999,1,2,1,,,,34,33,34,,,,10,10,10,,,,100,100,100,,,,17050,0,17050,,,
1970,2,167,,100,4.54,1,0,4,100,2,7,1,12,9999999,1,2,1,,,,34,33,34,,,,10,10,10,,,,100,100,100,,,,17050,0,17050,,,
1970,2,168,,100,4.54,1,0,1,100,1,27,6,60,16650,1,,,2,,,27,,,26,,,6,,,6,,,60,,,60,,,16650,,,0,,
1970,2,168,,100,4.54,1,0,2,100,2,26,6,60,0,1,,,1,,,27,,,27,,,6,,,6,,,60,,,60,,,16650,,,16650,,
1970,2,168,,100,4.54,1,0,3,100,2,5,1,11,9999999,1,2,1,,,,27,26,27,,,,6,6,6,,,,60,60,60,,,,16650,0,16650,,,
1970,2,168,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,27,26,27,,,,6,6,6,,,,60,60,60,,,,16650,0,16650,,,
1970,2,168,,100,4.54,1,0,5,100,2,1,0,1,9999999,1,2,1,,,,27,26,27,,,,6,6,6,,,,60,60,60,,,,16650,0,16650,,,
1970,2,169,,100,4.54,1,0,1,100,1,28,6,65,11250,1,,,2,,,28,,,23,,,6,,,7,,,65,,,70,,,11250,,,6550,,
1970,2,169,,100,4.54,1,0,2,100,2,23,7,70,6550,1,,,1,,,28,,,28,,,6,,,6,,,65,,,65,,,11250,,,11250,,
1970,2,170,,100,4.54,1,0,1,100,1,45,6,60,12950,1,,,2,,,45,,,46,,,6,,,4,,,60,,,40,,,12950,,,0,,
1970,2,170,,100,4.54,1,0,2,100,2,46,4,40,0,1,,,1,,,45,,,45,,,6,,,6,,,60,,,60,,,12950,,,12950,,
1970,2,170,,100,4.54,1,0,3,100,1,18,6,65,1650,1,2,1,,,,45,46,45,,,,6,4,6,,,,60,40,60,,,,12950,0,12950,,,
1970,2,170,,100,4.54,1,0,4,100,2,23,6,60,5550,1,2,1,,,,45,46,45,,,,6,4,6,,,,60,40,60,,,,12950,0,12950,,,
1970,2,170,,100,4.54,1,0,5,100,2,0,0,1,9999999,1,2,,,,,45,23,,,,,6,6,,,,,60,60,,,,,12950,5550,,,,
1970,2,171,,100,4.54,1,0,1,100,1,26,6,60,11450,1,,,2,,,26,,,27,,,6,,,6,,,60,,,60,,,11450,,,0,,
1970,2,171,,100,4.54,1,0,2,100,2,27,6,60,0,1,,,1,,,26,,,26,,,6,,,6,,,60,,,60,,,11450,,,11450,,
1970,2,171,,100,4.54,1,0,3,100,1,4,0,2,9999999,1,2,1,,,,26,27,26,,,,6,6,6,,,,60,60,60,,,,11450,0,11450,,,
1970,2,171,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,26,27,26,,,,6,6,6,,,,60,60,60,,,,11450,0,11450,,,
1970,2,171,,100,4.54,1,0,5,100,2,0,0,1,9999999,1,2,1,,,,26,27,26,,,,6,6,6,,,,60,60,60,,,,11450,0,11450,,,
1970,2,172,,100,4.54,1,0,1,100,1,33,6,60,1450,1,,,2,,,33,,,37,,,6,,,2,,,60,,,26,,,1450,,,3450,,
1970,2,172,,100,4.54,1,0,2,100,2,37,2,26,3450,1,,,1,,,33,,,33,,,6,,,6,,,60,,,60,,,1450,,,1450,,
1970,2,172,,100,4.54,1,0,3,100,2,6,1,12,9999999,1,2,1,,,,33,37,33,,,,6,2,6,,,,60,26,60,,,,1450,3450,1450,,,
1970,2,172,,100,4.54,1,0,4,100,1,5,1,11,9999999,1,2,1,,,,33,37,33,,,,6,2,6,,,,60,26,60,,,,1450,3450,1450,,,
1970,2,174,,100,4.54,1,0,1,100,1,60,4,40,13850,1,,,2,,,60,,,48,,,4,,,6,,,40,,,60,,,13850,,,0,,
1970,2,174,,100,4.54,1,0,2,100,2,48,6,60,0,1,,,1,,,60,,,60,,,4,,,4,,,40,,,40,,,13850,,,13850,,
1970,2,175,,100,4.54,1,0,1,100,1,37,1,15,3550,1,,,,,,37,,,,,,1,,,,,,15,,,,,,3550,,,,,
1970,2,175,,100,4.54,1,0,2,100,1,29,1,17,1550,1,,,,,,37,,,,,,1,,,,,,15,,,,,,3550,,,,,
1970,2,175,,100,4.54,1,0,3,100,1,28,1,17,2050,1,,,,,,37,,,,,,1,,,,,,15,,,,,,3550,,,,,
1970,2,175,,100,4.54,1,0,4,100,1,29,2,23,1350,1,,,,,,37,,,,,,1,,,,,,15,,,,,,3550,,,,,
1970,2,176,,100,4.54,4,0,1,100,1,34,11,111,11550,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1970,2,177,,100,4.54,1,0,1,100,2,75,8,80,50000,2,,,,,,75,,,,,,8,,,,,,80,,,,,,50000,,,,,
1970,2,177,,100,4.54,1,0,2,100,2,44,6,60,0,2,2,,,,,75,75,,,,,8,8,,,,,80,80,,,,,50000,50000,,,,
1970,2,178,,100,4.54,1,0,1,100,2,28,8,80,6050,2,,,,,,28,,,,,,8,,,,,,80,,,,,,6050,,,,,
1970,2,178,,100,4.54,1,0,2,100,1,5,1,11,9999999,2,2,,,,,28,28,,,,,8,8,,,,,80,80,,,,,6050,6050,,,,
1970,2,179,,100,4.54,1,0,1,100,1,35,11,111,19150,1,,,2,,,35,,,30,,,11,,,11,,,111,,,110,,,19150,,,4050,,
1970,2,179,,100,4.54,1,0,2,100,2,30,11,110,4050,1,,,1,,,35,,,35,,,11,,,11,,,111,,,111,,,19150,,,19150,,
1970,2,179,,100,4.54,1,0,3,100,1,4,1,11,9999999,1,2,1,,,,35,30,35,,,,11,11,11,,,,111,110,111,,,,19150,4050,19150,,,
1970,2,180,,100,4.54,1,0,1,100,1,28,10,100,2350,1,,,2,,,28,,,26,,,10,,,11,,,100,,,110,,,2350,,,7250,,
1970,2,180,,100,4.54,1,0,2,100,2,26,11,110,7250,1,,,1,,,28,,,28,,,10,,,10,,,100,,,100,,,2350,,,2350,,
1970,2,182,,100,4.54,1,0,1,100,1,73,2,25,1050,1,,,,,,73,,,,,,2,,,,,,25,,,,,,1050,,,,,
1970,2,183,,100,4.54,1,0,1,100,1,27,6,60,15050,1,,,2,,,27,,,27,,,6,,,6,,,60,,,60,,,15050,,,0,,
1970,2,183,,100,4.54,1,0,2,100,2,27,6,60,0,1,,,1,,,27,,,27,,,6,,,6,,,60,,,60,,,15050,,,15050,,
1970,2,183,,100,4.54,1,0,3,100,2,4,0,2,9999999,1,2,1,,,,27,27,27,,,,6,6,6,,,,60,60,60,,,,15050,0,15050,,,
1970,2,183,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,2,1,,,,27,27,27,,,,6,6,6,,,,60,60,60,,,,15050,0,15050,,,
1970,2,184,,100,4.54,1,0,1,100,2,49,2,25,2550,2,,,,,,49,,,,,,2,,,,,,25,,,,,,2550,,,,,
1970,2,184,,100,4.54,1,0,2,100,2,48,2,25,0,2,,,,,,49,,,,,,2,,,,,,25,,,,,,2550,,,,,
1970,2,185,,100,4.54,1,0,1,100,1,29,6,60,8250,1,,,2,,,29,,,22,,,6,,,6,,,60,,,60,,,8250,,,5050,,
1970,2,185,,100,4.54,1,0,2,100,2,22,6,60,5050,1,,,1,,,29,,,29,,,6,,,6,,,60,,,60,,,8250,,,8250,,
1970,2,187,,100,4.54,1,0,1,100,1,51,8,80,10050,1,,,2,,,51,,,44,,,8,,,6,,,80,,,60,,,10050,,,1450,,
1970,2,187,,100,4.54,1,0,2,100,2,44,6,60,1450,1,,,1,,,51,,,51,,,8,,,8,,,80,,,80,,,10050,,,10050,,
1970,2,187,,100,4.54,1,0,3,100,2,16,4,40,0,1,2,1,,,,51,44,51,,,,8,6,8,,,,80,60,80,,,,10050,1450,10050,,,
1970,2,187,,100,4.54,1,0,4,100,2,10,1,17,9999999,1,2,1,,,,51,44,51,,,,8,6,8,,,,80,60,80,,,,10050,1450,10050,,,
1970,2,187,,100,4.54,1,0,5,100,2,9,1,15,9999999,1,2,1,,,,51,44,51,,,,8,6,8,,,,80,60,80,,,,10050,1450,10050,,,
1970,2,187,,100,4.54,1,0,6,100,2,19,6,60,0,1,2,1,,,,51,44,51,,,,8,6,8,,,,80,60,80,,,,10050,1450,10050,,,
1970,2,187,,100,4.54,1,0,7,100,2,0,0,1,9999999,1,2,,,,,51,19,,,,,8,6,,,,,80,60,,,,,10050,0,,,,
1970,2,188,,100,4.54,1,0,1,100,1,55,6,60,11250,1,,,2,,,55,,,50,,,6,,,2,,,60,,,26,,,11250,,,650,,
1970,2,188,,100,4.54,1,0,2,100,2,50,2,26,650,1,,,1,,,55,,,55,,,6,,,6,,,60,,,60,,,11250,,,11250,,
1970,2,188,,100,4.54,1,0,3,100,2,10,2,22,9999999,1,2,1,,,,55,50,55,,,,6,2,6,,,,60,26,60,,,,11250,650,11250,,,
1970,2,189,,100,4.54,1,0,1,100,1,51,6,60,8050,1,,,2,,,51,,,51,,,6,,,4,,,60,,,40,,,8050,,,0,,
1970,2,189,,100,4.54,1,0,2,100,2,51,4,40,0,1,,,1,,,51,,,51,,,6,,,6,,,60,,,60,,,8050,,,8050,,
1970,2,189,,100,4.54,1,0,3,100,2,22,8,80,0,1,2,1,,,,51,51,51,,,,6,4,6,,,,60,40,60,,,,8050,0,8050,,,
1970,2,189,,100,4.54,1,0,4,100,1,17,5,50,1250,1,2,1,,,,51,51,51,,,,6,4,6,,,,60,40,60,,,,8050,0,8050,,,
1970,2,190,,100,4.54,1,0,1,100,1,43,11,111,21150,1,,,2,,,43,,,39,,,11,,,6,,,111,,,60,,,21150,,,0,,
1970,2,190,,100,4.54,1,0,2,100,2,39,6,60,0,1,,,1,,,43,,,43,,,11,,,11,,,111,,,111,,,21150,,,21150,,
1970,2,190,,100,4.54,1,0,3,100,1,15,2,26,0,1,2,1,,,,43,39,43,,,,11,6,11,,,,111,60,111,,,,21150,0,21150,,,
1970,2,190,,100,4.54,1,0,4,100,1,12,2,22,9999999,1,2,1,,,,43,39,43,,,,11,6,11,,,,111,60,111,,,,21150,0,21150,,,
1970,2,190,,100,4.54,1,0,5,100,1,9,1,16,9999999,1,2,1,,,,43,39,43,,,,11,6,11,,,,111,60,111,,,,21150,0,21150,,,
1970,2,191,,100,4.54,1,0,1,100,1,54,3,30,7550,1,,,2,,,54,,,50,,,3,,,2,,,30,,,26,,,7550,,,5050,,
1970,2,191,,100,4.54,1,0,2,100,2,50,2,26,5050,1,,,1,,,54,,,54,,,3,,,3,,,30,,,30,,,7550,,,7550,,
1970,2,192,,100,4.54,1,0,1,100,2,62,2,26,3550,2,,,,,,62,,,,,,2,,,,,,26,,,,,,3550,,,,,
1970,2,193,,100,4.54,1,0,1,100,1,26,4,40,11050,1,,,2,,,26,,,21,,,4,,,6,,,40,,,60,,,11050,,,0,,
1970,2,193,,100,4.54,1,0,2,100,2,21,6,60,0,1,,,1,,,26,,,26,,,4,,,4,,,40,,,40,,,11050,,,11050,,
1970,2,193,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,26,21,26,,,,4,6,4,,,,40,60,40,,,,11050,0,11050,,,
1970,2,194,,100,4.54,1,0,1,100,1,28,1,16,2550,1,,,2,,,28,,,23,,,1,,,0,,,16,,,2,,,2550,,,1550,,
1970,2,194,,100,4.54,1,0,2,100,2,23,0,2,1550,1,,,1,,,28,,,28,,,1,,,1,,,16,,,16,,,2550,,,2550,,
1970,2,194,,100,4.54,1,0,3,100,1,4,0,2,9999999,1,2,1,,,,28,23,28,,,,1,0,1,,,,16,2,16,,,,2550,1550,2550,,,
1970,2,194,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,28,23,28,,,,1,0,1,,,,16,2,16,,,,2550,1550,2550,,,
1970,2,195,,100,4.54,1,0,1,100,1,48,6,60,9450,1,,,2,,,48,,,45,,,6,,,4,,,60,,,40,,,9450,,,3650,,
1970,2,195,,100,4.54,1,0,2,100,2,45,4,40,3650,1,,,1,,,48,,,48,,,6,,,6,,,60,,,60,,,9450,,,9450,,
1970,2,196,,100,4.54,1,0,1,100,1,43,3,30,18050,1,,,2,,,43,,,37,,,3,,,6,,,30,,,60,,,18050,,,2550,,
1970,2,196,,100,4.54,1,0,2,100,2,37,6,60,2550,1,,,1,,,43,,,43,,,3,,,3,,,30,,,30,,,18050,,,18050,,
1970,2,196,,100,4.54,1,0,3,100,2,12,2,22,9999999,1,2,1,,,,43,37,43,,,,3,6,3,,,,30,60,30,,,,18050,2550,18050,,,
1970,2,196,,100,4.54,1,0,4,100,2,8,1,14,9999999,1,2,1,,,,43,37,43,,,,3,6,3,,,,30,60,30,,,,18050,2550,18050,,,
1970,2,196,,100,4.54,1,0,5,100,2,14,2,26,0,1,2,1,,,,43,37,43,,,,3,6,3,,,,30,60,30,,,,18050,2550,18050,,,
1970,2,197,,100,4.54,1,0,1,100,1,56,2,23,7650,1,,,,,,56,,,,,,2,,,,,,23,,,,,,7650,,,,,
1970,2,198,,100,4.54,1,0,1,100,1,62,2,22,12050,1,,,2,,,62,,,59,,,2,,,7,,,22,,,70,,,12050,,,2650,,
1970,2,198,,100,4.54,1,0,2,100,2,59,7,70,2650,1,,,1,,,62,,,62,,,2,,,2,,,22,,,22,,,12050,,,12050,,
1970,2,198,,100,4.54,1,0,3,100,2,22,7,70,4350,1,2,1,,,,62,59,62,,,,2,7,2,,,,22,70,22,,,,12050,2650,12050,,,
1970,2,198,,100,4.54,1,0,4,100,2,20,1,14,1050,1,2,1,,,,62,59,62,,,,2,7,2,,,,22,70,22,,,,12050,2650,12050,,,
1970,2,199,,100,4.54,1,0,1,100,1,50,2,26,12050,1,,,2,,,50,,,51,,,2,,,2,,,26,,,25,,,12050,,,0,,
1970,2,199,,100,4.54,1,0,2,100,2,51,2,25,0,1,,,1,,,50,,,50,,,2,,,2,,,26,,,26,,,12050,,,12050,,
1970,2,199,,100,4.54,1,0,3,100,2,18,5,50,750,1,2,1,,,,50,51,50,,,,2,2,2,,,,26,25,26,,,,12050,0,12050,,,
1970,2,200,,100,4.54,1,0,1,100,1,63,4,40,12350,1,,,2,,,63,,,59,,,4,,,6,,,40,,,60,,,12350,,,0,,
1970,2,200,,100,4.54,1,0,2,100,2,59,6,60,0,1,,,1,,,63,,,63,,,4,,,4,,,40,,,40,,,12350,,,12350,,
1970,2,201,,100,4.54,1,0,1,100,1,67,4,40,6350,1,,,2,,,67,,,61,,,4,,,2,,,40,,,26,,,6350,,,0,,
1970,2,201,,100,4.54,1,0,2,100,2,61,2,26,0,1,,,1,,,67,,,67,,,4,,,4,,,40,,,40,,,6350,,,6350,,
1970,2,202,,100,4.54,1,0,1,100,1,51,6,60,10050,1,,,2,,,51,,,45,,,6,,,6,,,60,,,60,,,10050,,,0,,
1970,2,202,,100,4.54,1,0,2,100,2,45,6,60,0,1,,,1,,,51,,,51,,,6,,,6,,,60,,,60,,,10050,,,10050,,
1970,2,202,,100,4.54,1,0,3,100,2,17,4,40,150,1,2,1,,,,51,45,51,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,
1970,2,203,,100,4.54,1,0,1,100,1,42,6,60,13750,1,,,2,,,42,,,43,,,6,,,3,,,60,,,30,,,13750,,,2750,,
1970,2,203,,100,4.54,1,0,2,100,2,43,3,30,2750,1,,,1,,,42,,,42,,,6,,,6,,,60,,,60,,,13750,,,13750,,
1970,2,203,,100,4.54,1,0,3,100,1,19,5,50,1550,1,2,1,,,,42,43,42,,,,6,3,6,,,,60,30,60,,,,13750,2750,13750,,,
1970,2,203,,100,4.54,1,0,4,100,1,15,3,30,50,1,2,1,,,,42,43,42,,,,6,3,6,,,,60,30,60,,,,13750,2750,13750,,,
1970,2,203,,100,4.54,1,0,5,100,2,11,1,17,9999999,1,2,1,,,,42,43,42,,,,6,3,6,,,,60,30,60,,,,13750,2750,13750,,,
1970,2,204,,100,4.54,1,0,1,100,1,46,2,23,5250,1,,,2,,,46,,,42,,,2,,,1,,,23,,,17,,,5250,,,0,,
1970,2,204,,100,4.54,1,0,2,100,2,42,1,17,0,1,,,1,,,46,,,46,,,2,,,2,,,23,,,23,,,5250,,,5250,,
1970,2,205,,100,4.54,1,0,1,100,2,47,2,26,3550,2,,,,,,47,,,,,,2,,,,,,26,,,,,,3550,,,,,
1970,2,205,,100,4.54,1,0,2,100,1,8,1,14,9999999,2,2,,,,,47,28,,,,,2,5,,,,,26,50,,,,,3550,6050,,,,
1970,2,205,,100,4.54,1,0,3,100,2,28,5,50,6050,2,2,,,,,47,47,,,,,2,2,,,,,26,26,,,,,3550,3550,,,,
1970,2,205,,100,4.54,1,0,4,100,1,1,0,1,9999999,2,2,,,,,47,28,,,,,2,5,,,,,26,50,,,,,3550,6050,,,,
1970,2,206,,100,4.54,1,0,1,100,1,29,2,23,7250,1,,,2,,,29,,,24,,,2,,,2,,,23,,,26,,,7250,,,0,,
1970,2,206,,100,4.54,1,0,2,100,2,24,2,26,0,1,,,1,,,29,,,29,,,2,,,2,,,23,,,23,,,7250,,,7250,,
1970,2,206,,100,4.54,1,0,3,100,1,2,0,1,9999999,1,2,1,,,,29,24,29,,,,2,2,2,,,,23,26,23,,,,7250,0,7250,,,
1970,2,206,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,2,1,,,,29,24,29,,,,2,2,2,,,,23,26,23,,,,7250,0,7250,,,
1970,2,207,,100,4.54,1,0,1,100,2,49,6,60,7450,2,,,,,,49,,,,,,6,,,,,,60,,,,,,7450,,,,,
1970,2,208,,100,4.54,1,0,1,100,1,35,4,40,7250,1,,,2,,,35,,,29,,,4,,,2,,,40,,,22,,,7250,,,0,,
1970,2,208,,100,4.54,1,0,2,100,2,29,2,22,0,1,,,1,,,35,,,35,,,4,,,4,,,40,,,40,,,7250,,,7250,,
1970,2,208,,100,4.54,1,0,3,100,2,7,1,16,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,
1970,2,208,,100,4.54,1,0,4,100,1,8,1,14,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,
1970,2,208,,100,4.54,1,0,5,100,1,5,0,2,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,
1970,2,208,,100,4.54,1,0,6,100,1,3,0,2,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,
1970,2,208,,100,4.54,1,0,7,100,1,0,0,1,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,
1970,2,209,,100,4.54,1,0,1,100,1,29,4,40,7050,1,,,2,,,29,,,40,,,4,,,0,,,40,,,2,,,7050,,,0,,
1970,2,209,,100,4.54,1,0,2,100,2,40,0,2,0,1,,,1,,,29,,,29,,,4,,,4,,,40,,,40,,,7050,,,7050,,
1970,2,209,,100,4.54,1,0,3,100,1,7,1,14,9999999,1,2,1,,,,29,40,29,,,,4,0,4,,,,40,2,40,,,,7050,0,7050,,,
1970,2,210,,100,4.54,1,0,1,100,1,77,1,15,1750,1,,,2,,,77,,,64,,,1,,,2,,,15,,,26,,,1750,,,1050,,
1970,2,210,,100,4.54,1,0,2,100,2,64,2,26,1050,1,,,1,,,77,,,77,,,1,,,1,,,15,,,15,,,1750,,,1750,,
1970,2,210,,100,4.54,1,0,3,100,1,26,6,65,5350,1,,,,,,77,,,,,,1,,,,,,15,,,,,,1750,,,,,
1970,2,211,,100,4.54,1,0,1,100,1,92,0,2,2250,1,,,2,,,92,,,76,,,0,,,1,,,2,,,16,,,2250,,,1150,,
1970,2,211,,100,4.54,1,0,2,100,2,76,1,16,1150,1,,,1,,,92,,,92,,,0,,,0,,,2,,,2,,,2250,,,2250,,
1970,2,212,,100,4.54,1,0,1,100,1,41,6,60,7350,1,,,2,,,41,,,38,,,6,,,4,,,60,,,40,,,7350,,,0,,
1970,2,212,,100,4.54,1,0,2,100,2,38,4,40,0,1,,,1,,,41,,,41,,,6,,,6,,,60,,,60,,,7350,,,7350,,
1970,2,212,,100,4.54,1,0,3,100,1,14,2,26,650,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,
1970,2,212,,100,4.54,1,0,4,100,2,18,6,60,0,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,
1970,2,212,,100,4.54,1,0,5,100,1,17,4,40,0,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,
1970,2,212,,100,4.54,1,0,6,100,2,12,2,23,9999999,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,
1970,2,212,,100,4.54,1,0,7,100,1,10,1,16,9999999,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,
1970,2,212,,100,4.54,1,0,8,100,2,15,2,25,2150,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,
1970,2,213,,100,4.54,1,0,1,100,2,55,2,22,1650,2,,,,,,55,,,,,,2,,,,,,22,,,,,,1650,,,,,
1970,2,213,,100,4.54,1,0,2,100,1,23,6,60,0,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,
1970,2,213,,100,4.54,1,0,3,100,1,21,6,65,0,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,
1970,2,213,,100,4.54,1,0,4,100,1,16,4,40,0,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,
1970,2,213,,100,4.54,1,0,5,100,1,13,2,26,9999999,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,
1970,2,213,,100,4.54,1,0,6,100,1,12,2,23,9999999,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,
1970,2,214,,100,4.54,1,0,1,100,1,37,2,25,9550,1,,,2,,,37,,,47,,,2,,,6,,,25,,,60,,,9550,,,10650,,
1970,2,214,,100,4.54,1,0,2,100,2,47,6,60,10650,1,,,1,,,37,,,37,,,2,,,2,,,25,,,25,,,9550,,,9550,,
1970,2,214,,100,4.54,1,0,3,100,1,19,6,65,1950,1,2,1,,,,37,47,37,,,,2,6,2,,,,25,60,25,,,,9550,10650,9550,,,
1970,2,215,,100,4.54,1,0,1,100,1,52,6,60,15350,1,,,2,,,52,,,51,,,6,,,6,,,60,,,60,,,15350,,,0,,
1970,2,215,,100,4.54,1,0,2,100,2,51,6,60,0,1,,,1,,,52,,,52,,,6,,,6,,,60,,,60,,,15350,,,15350,,
1970,2,215,,100,4.54,1,0,3,100,1,26,6,60,5750,1,2,1,,,,52,51,52,,,,6,6,6,,,,60,60,60,,,,15350,0,15350,,,
1970,2,216,,100,4.54,1,0,1,100,1,50,10,100,6450,1,,,2,,,50,,,38,,,10,,,6,,,100,,,60,,,6450,,,2450,,
1970,2,216,,100,4.54,1,0,2,100,2,38,6,60,2450,1,,,1,,,50,,,50,,,10,,,10,,,100,,,100,,,6450,,,6450,,
1970,2,216,,100,4.54,1,0,3,100,1,17,4,40,650,1,2,1,,,,50,38,50,,,,10,6,10,,,,100,60,100,,,,6450,2450,6450,,,
1970,2,216,,100,4.54,1,0,4,100,1,16,4,40,950,1,2,1,,,,50,38,50,,,,10,6,10,,,,100,60,100,,,,6450,2450,6450,,,
1970,2,216,,100,4.54,1,0,5,100,1,14,2,26,150,1,2,1,,,,50,38,50,,,,10,6,10,,,,100,60,100,,,,6450,2450,6450,,,
1970,2,217,,100,4.54,1,0,1,100,1,39,6,60,11050,1,,,2,,,39,,,41,,,6,,,6,,,60,,,60,,,11050,,,1250,,
1970,2,217,,100,4.54,1,0,2,100,2,41,6,60,1250,1,,,1,,,39,,,39,,,6,,,6,,,60,,,60,,,11050,,,11050,,
1970,2,217,,100,4.54,1,0,3,100,1,17,4,40,0,1,2,1,,,,39,41,39,,,,6,6,6,,,,60,60,60,,,,11050,1250,11050,,,
1970,2,218,,100,4.54,1,0,1,100,1,29,6,60,17750,1,,,2,,,29,,,29,,,6,,,6,,,60,,,60,,,17750,,,0,,
1970,2,218,,100,4.54,1,0,2,100,2,29,6,60,0,1,,,1,,,29,,,29,,,6,,,6,,,60,,,60,,,17750,,,17750,,
1970,2,218,,100,4.54,1,0,3,100,1,6,1,12,9999999,1,2,1,,,,29,29,29,,,,6,6,6,,,,60,60,60,,,,17750,0,17750,,,
1970,2,218,,100,4.54,1,0,4,100,2,3,0,2,9999999,1,2,1,,,,29,29,29,,,,6,6,6,,,,60,60,60,,,,17750,0,17750,,,
1970,2,219,,100,4.54,1,0,1,100,1,30,9,90,10050,1,,,2,,,30,,,22,,,9,,,6,,,90,,,60,,,10050,,,6050,,
1970,2,219,,100,4.54,1,0,2,100,2,22,6,60,6050,1,,,1,,,30,,,30,,,9,,,9,,,90,,,90,,,10050,,,10050,,
1970,2,220,,100,4.54,1,0,1,100,1,34,10,100,12050,1,,,2,,,34,,,30,,,10,,,10,,,100,,,100,,,12050,,,0,,
1970,2,220,,100,4.54,1,0,2,100,2,30,10,100,0,1,,,1,,,34,,,34,,,10,,,10,,,100,,,100,,,12050,,,12050,,
1970,2,221,,100,4.54,1,0,1,100,1,28,10,100,35050,1,,,,,,28,,,,,,10,,,,,,100,,,,,,35050,,,,,
1970,2,222,,100,4.54,1,0,1,100,1,31,6,60,9850,1,,,2,,,31,,,26,,,6,,,7,,,60,,,70,,,9850,,,0,,
1970,2,222,,100,4.54,1,0,2,100,2,26,7,70,0,1,,,1,,,31,,,31,,,6,,,6,,,60,,,60,,,9850,,,9850,,
1970,2,222,,100,4.54,1,0,3,100,1,2,0,1,9999999,1,2,1,,,,31,26,31,,,,6,7,6,,,,60,70,60,,,,9850,0,9850,,,
1970,2,222,,100,4.54,1,0,4,100,2,0,0,1,9999999,1,2,1,,,,31,26,31,,,,6,7,6,,,,60,70,60,,,,9850,0,9850,,,
1970,2,223,,100,4.54,1,0,1,100,1,34,6,60,13250,1,,,2,,,34,,,30,,,6,,,6,,,60,,,60,,,13250,,,0,,
1970,2,223,,100,4.54,1,0,2,100,2,30,6,60,0,1,,,1,,,34,,,34,,,6,,,6,,,60,,,60,,,13250,,,13250,,
1970,2,223,,100,4.54,1,0,3,100,1,7,1,14,9999999,1,2,1,,,,34,30,34,,,,6,6,6,,,,60,60,60,,,,13250,0,13250,,,
1970,2,223,,100,4.54,1,0,4,100,1,5,0,2,9999999,1,2,1,,,,34,30,34,,,,6,6,6,,,,60,60,60,,,,13250,0,13250,,,
1970,2,223,,100,4.54,1,0,5,100,1,2,0,1,9999999,1,2,1,,,,34,30,34,,,,6,6,6,,,,60,60,60,,,,13250,0,13250,,,
1970,2,224,,100,4.54,1,0,1,100,1,52,8,80,15250,1,,,2,,,52,,,56,,,8,,,7,,,80,,,70,,,15250,,,0,,
1970,2,224,,100,4.54,1,0,2,100,2,56,7,70,0,1,,,1,,,52,,,52,,,8,,,8,,,80,,,80,,,15250,,,15250,,
1970,2,224,,100,4.54,1,0,3,100,2,19,6,60,3950,1,2,1,,,,52,56,52,,,,8,7,8,,,,80,70,80,,,,15250,0,15250,,,
1970,2,225,,100,4.54,1,0,1,100,1,34,7,70,14050,1,,,2,,,34,,,32,,,7,,,8,,,70,,,80,,,14050,,,0,,
1970,2,225,,100,4.54,1,0,2,100,2,32,8,80,0,1,,,1,,,34,,,34,,,7,,,7,,,70,,,70,,,14050,,,14050,,
1970,2,225,,100,4.54,1,0,3,100,2,8,1,15,9999999,1,2,1,,,,34,32,34,,,,7,8,7,,,,70,80,70,,,,14050,0,14050,,,
1970,2,225,,100,4.54,1,0,4,100,1,2,0,1,9999999,1,2,1,,,,34,32,34,,,,7,8,7,,,,70,80,70,,,,14050,0,14050,,,
1970,2,226,,100,4.54,1,0,1,100,1,39,7,70,15650,1,,,2,,,39,,,38,,,7,,,3,,,70,,,30,,,15650,,,0,,
1970,2,226,,100,4.54,1,0,2,100,2,38,3,30,0,1,,,1,,,39,,,39,,,7,,,7,,,70,,,70,,,15650,,,15650,,
1970,2,226,,100,4.54,1,0,3,100,2,11,1,17,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,
1970,2,226,,100,4.54,1,0,4,100,2,9,1,15,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,
1970,2,226,,100,4.54,1,0,5,100,2,5,0,2,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,
1970,2,226,,100,4.54,1,0,6,100,1,2,0,1,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,
1970,2,226,,100,4.54,1,0,7,100,1,1,0,1,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,
1970,2,227,,100,4.54,1,0,1,100,2,45,6,60,8050,2,,,,,,45,,,,,,6,,,,,,60,,,,,,8050,,,,,
1970,2,227,,100,4.54,1,0,2,100,2,17,5,50,1350,2,2,,,,,45,45,,,,,6,6,,,,,60,60,,,,,8050,8050,,,,
1970,2,227,,100,4.54,1,0,3,100,2,20,8,80,2050,2,2,,,,,45,45,,,,,6,6,,,,,60,60,,,,,8050,8050,,,,
1970,2,228,,100,4.54,1,0,1,100,1,60,8,80,18150,1,,,2,,,60,,,56,,,8,,,6,,,80,,,60,,,18150,,,250,,
1970,2,228,,100,4.54,1,0,2,100,2,56,6,60,250,1,,,1,,,60,,,60,,,8,,,8,,,80,,,80,,,18150,,,18150,,
1970,2,228,,100,4.54,1,0,3,100,2,62,2,26,0,1,,,,,,60,,,,,,8,,,,,,80,,,,,,18150,,,,,
1970,2,230,,100,4.54,1,0,1,100,2,63,7,70,4250,2,,,,,,63,,,,,,7,,,,,,70,,,,,,4250,,,,,
1970,2,230,,100,4.54,1,0,2,100,2,57,6,60,4350,2,,,,,,63,,,,,,7,,,,,,70,,,,,,4250,,,,,
1970,2,231,,100,4.54,1,0,1,100,2,65,5,50,2250,2,,,,,,65,,,,,,5,,,,,,50,,,,,,2250,,,,,
1970,2,232,,100,4.54,1,0,1,100,1,75,2,26,23850,1,,,2,,,75,,,58,,,2,,,6,,,26,,,60,,,23850,,,1850,,
1970,2,232,,100,4.54,1,0,2,100,2,58,6,60,1850,1,,,1,,,75,,,75,,,2,,,2,,,26,,,26,,,23850,,,23850,,
1970,2,233,,100,4.54,4,4,1,100,1,42,2,22,150,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1970,2,234,,100,4.54,1,0,1,100,1,50,1,17,3850,1,,,,,,50,,,,,,1,,,,,,17,,,,,,3850,,,,,
1970,2,235,,100,4.54,1,0,1,100,2,22,5,50,1050,2,,,,,,22,,,,,,5,,,,,,50,,,,,,1050,,,,,
1970,2,235,,100,4.54,1,0,2,100,1,3,0,2,9999999,2,2,,,,,22,22,,,,,5,5,,,,,50,50,,,,,1050,1050,,,,
1970,2,235,,100,4.54,1,0,3,100,2,20,6,60,3450,2,,,,,,22,,,,,,5,,,,,,50,,,,,,1050,,,,,
1970,2,235,,100,4.54,1,0,4,100,1,1,0,1,9999999,2,2,,,,,22,20,,,,,5,6,,,,,50,60,,,,,1050,3450,,,,
1970,2,236,,100,4.54,1,0,1,100,2,79,2,26,2150,2,,,,,,79,,,,,,2,,,,,,26,,,,,,2150,,,,,
1970,2,237,,100,4.54,1,0,1,100,2,45,3,30,2150,2,,,,,,45,,,,,,3,,,,,,30,,,,,,2150,,,,,
1970,2,237,,100,4.54,1,0,2,100,2,12,2,22,9999999,2,,,,,,45,,,,,,3,,,,,,30,,,,,,2150,,,,,
1970,2,238,,100,4.54,1,0,1,100,2,40,6,60,2750,2,,,,,,40,,,,,,6,,,,,,60,,,,,,2750,,,,,
1970,2,238,,100,4.54,1,0,2,100,1,18,6,60,650,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,
1970,2,238,,100,4.54,1,0,3,100,1,17,3,30,350,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,
1970,2,238,,100,4.54,1,0,4,100,1,13,2,25,9999999,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,
1970,2,238,,100,4.54,1,0,5,100,1,11,1,17,9999999,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,
1970,2,238,,100,4.54,1,0,6,100,2,10,1,17,9999999,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,
1970,2,238,,100,4.54,1,0,7,100,2,9,1,15,9999999,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,
1970,2,240,,100,4.54,1,0,1,100,1,49,4,40,350,1,,,2,,,49,,,54,,,4,,,5,,,40,,,50,,,350,,,750,,
1970,2,240,,100,4.54,1,0,2,100,2,54,5,50,750,1,,,1,,,49,,,49,,,4,,,4,,,40,,,40,,,350,,,350,,
1970,2,240,,100,4.54,1,0,3,100,2,2,0,1,9999999,1,2,1,,,,49,54,49,,,,4,5,4,,,,40,50,40,,,,350,750,350,,,
1970,2,241,,100,4.54,1,0,1,100,1,65,2,26,5550,1,,,2,,,65,,,49,,,2,,,4,,,26,,,40,,,5550,,,5350,,
1970,2,241,,100,4.54,1,0,2,100,2,49,4,40,5350,1,,,1,,,65,,,65,,,2,,,2,,,26,,,26,,,5550,,,5550,,
1970,2,241,,100,4.54,1,0,3,100,1,21,6,65,7550,1,2,1,,,,65,49,65,,,,2,4,2,,,,26,40,26,,,,5550,5350,5550,,,
1970,2,241,,100,4.54,1,0,4,100,2,19,5,50,550,1,2,1,,,,65,49,65,,,,2,4,2,,,,26,40,26,,,,5550,5350,5550,,,
1970,2,241,,100,4.54,1,0,5,100,2,74,6,60,650,1,,,,,,65,,,,,,2,,,,,,26,,,,,,5550,,,,,
1970,2,242,,100,4.54,1,0,1,100,1,72,7,70,1250,1,,,2,,,72,,,62,,,7,,,2,,,70,,,23,,,1250,,,350,,
1970,2,242,,100,4.54,1,0,2,100,2,62,2,23,350,1,,,1,,,72,,,72,,,7,,,7,,,70,,,70,,,1250,,,1250,,
1970,2,243,,100,4.54,1,0,1,100,1,28,6,60,12050,1,,,2,,,28,,,28,,,6,,,6,,,60,,,60,,,12050,,,1550,,
1970,2,243,,100,4.54,1,0,2,100,2,28,6,60,1550,1,,,1,,,28,,,28,,,6,,,6,,,60,,,60,,,12050,,,12050,,
1970,2,243,,100,4.54,1,0,3,100,1,6,1,11,9999999,1,2,1,,,,28,28,28,,,,6,6,6,,,,60,60,60,,,,12050,1550,12050,,,
1970,2,243,,100,4.54,1,0,4,100,2,4,1,11,9999999,1,2,1,,,,28,28,28,,,,6,6,6,,,,60,60,60,,,,12050,1550,12050,,,
1970,2,243,,100,4.54,1,0,5,100,2,0,0,1,9999999,1,2,1,,,,28,28,28,,,,6,6,6,,,,60,60,60,,,,12050,1550,12050,,,
1970,2,244,,100,4.54,1,0,1,100,1,28,6,60,8050,1,,,,,,28,,,,,,6,,,,,,60,,,,,,8050,,,,,
1970,2,244,,100,4.54,1,0,2,100,2,25,6,60,6550,1,,,,,,28,,,,,,6,,,,,,60,,,,,,8050,,,,,
1970,2,245,,100,4.54,1,0,1,100,1,44,4,40,10050,1,,,2,,,44,,,43,,,4,,,4,,,40,,,40,,,10050,,,1450,,
1970,2,245,,100,4.54,1,0,2,100,2,43,4,40,1450,1,,,1,,,44,,,44,,,4,,,4,,,40,,,40,,,10050,,,10050,,
1970,2,245,,100,4.54,1,0,3,100,2,18,6,65,2750,1,2,1,,,,44,43,44,,,,4,4,4,,,,40,40,40,,,,10050,1450,10050,,,
1970,2,245,,100,4.54,1,0,4,100,1,11,1,17,9999999,1,2,1,,,,44,43,44,,,,4,4,4,,,,40,40,40,,,,10050,1450,10050,,,
1970,2,246,,100,4.54,1,0,1,100,1,27,10,100,5050,1,,,,,,27,,,,,,10,,,,,,100,,,,,,5050,,,,,
1970,2,246,,100,4.54,1,0,2,100,1,23,9,90,7350,1,,,,,,27,,,,,,10,,,,,,100,,,,,,5050,,,,,
1970,2,247,,100,4.54,1,0,1,100,1,59,4,40,350,1,,,,,,59,,,,,,4,,,,,,40,,,,,,350,,,,,
1970,2,247,,100,4.54,1,0,2,100,2,52,4,40,7350,1,,,,,,59,,,,,,4,,,,,,40,,,,,,350,,,,,
1970,2,248,,100,4.54,1,0,1,100,1,57,4,40,10750,1,,,2,,,57,,,38,,,4,,,6,,,40,,,60,,,10750,,,0,,
1970,2,248,,100,4.54,1,0,2,100,2,38,6,60,0,1,,,1,,,57,,,57,,,4,,,4,,,40,,,40,,,10750,,,10750,,
1970,2,248,,100,4.54,1,0,3,100,2,16,2,23,0,1,2,1,,,,57,38,57,,,,4,6,4,,,,40,60,40,,,,10750,0,10750,,,
1970,2,248,,100,4.54,1,0,4,100,2,4,0,2,9999999,1,2,1,,,,57,38,57,,,,4,6,4,,,,40,60,40,,,,10750,0,10750,,,
1970,2,248,,100,4.54,1,0,5,100,2,8,1,14,9999999,1,2,1,,,,57,38,57,,,,4,6,4,,,,40,60,40,,,,10750,0,10750,,,
1970,2,250,,100,4.54,1,0,1,100,1,59,6,60,7050,1,,,,,,59,,,,,,6,,,,,,60,,,,,,7050,,,,,
1970,2,251,,100,4.54,1,0,1,100,1,41,6,60,10050,1,,,2,,,41,,,33,,,6,,,6,,,60,,,60,,,10050,,,0,,
1970,2,251,,100,4.54,1,0,2,100,2,33,6,60,0,1,,,1,,,41,,,41,,,6,,,6,,,60,,,60,,,10050,,,10050,,
1970,2,251,,100,4.54,1,0,3,100,1,10,2,22,9999999,1,2,1,,,,41,33,41,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,
1970,2,251,,100,4.54,1,0,4,100,1,6,1,11,9999999,1,2,1,,,,41,33,41,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,
1970,2,252,,100,4.54,1,0,1,100,1,51,2,26,10050,1,,,2,,,51,,,46,,,2,,,2,,,26,,,26,,,10050,,,0,,
1970,2,252,,100,4.54,1,0,2,100,2,46,2,26,0,1,,,1,,,51,,,51,,,2,,,2,,,26,,,26,,,10050,,,10050,,
1970,2,252,,100,4.54,1,0,3,100,1,15,2,26,0,1,2,1,,,,51,46,51,,,,2,2,2,,,,26,26,26,,,,10050,0,10050,,,
1970,2,253,,100,4.54,1,0,1,100,2,47,6,60,8050,2,,,,,,47,,,,,,6,,,,,,60,,,,,,8050,,,,,
1970,2,254,,100,4.54,1,0,1,100,1,33,8,80,6050,1,,,2,,,33,,,41,,,8,,,6,,,80,,,60,,,6050,,,5050,,
1970,2,254,,100,4.54,1,0,2,100,2,41,6,60,5050,1,,,1,,,33,,,33,,,8,,,8,,,80,,,80,,,6050,,,6050,,
1970,2,254,,100,4.54,1,0,3,100,1,15,3,30,150,1,2,1,,,,33,41,33,,,,8,6,8,,,,80,60,80,,,,6050,5050,6050,,,
1970,2,254,,100,4.54,1,0,4,100,1,14,2,26,0,1,2,1,,,,33,41,33,,,,8,6,8,,,,80,60,80,,,,6050,5050,6050,,,
1970,2,254,,100,4.54,1,0,5,100,2,12,2,23,9999999,1,2,1,,,,33,41,33,,,,8,6,8,,,,80,60,80,,,,6050,5050,6050,,,
1970,2,254,,100,4.54,1,0,6,100,2,47,8,80,6050,1,,,,,,33,,,,,,8,,,,,,80,,,,,,6050,,,,,
1970,2,255,,100,4.54,1,0,1,100,1,42,6,60,20050,1,,,2,,,42,,,36,,,6,,,10,,,60,,,100,,,20050,,,13150,,
1970,2,255,,100,4.54,1,0,2,100,2,36,10,100,13150,1,,,1,,,42,,,42,,,6,,,6,,,60,,,60,,,20050,,,20050,,
1970,2,256,,100,4.54,1,0,1,100,1,62,11,111,16950,1,,,2,,,62,,,57,,,11,,,10,,,111,,,100,,,16950,,,50,,
1970,2,256,,100,4.54,1,0,2,100,2,57,10,100,50,1,,,1,,,62,,,62,,,11,,,11,,,111,,,111,,,16950,,,16950,,
1970,2,256,,100,4.54,1,0,3,100,2,19,7,70,850,1,2,1,,,,62,57,62,,,,11,10,11,,,,111,100,111,,,,16950,50,16950,,,
1970,2,257,,100,4.54,1,0,1,100,1,38,11,111,23050,1,,,2,,,38,,,37,,,11,,,9,,,111,,,90,,,23050,,,0,,
1970,2,257,,100,4.54,1,0,2,100,2,37,9,90,0,1,,,1,,,38,,,38,,,11,,,11,,,111,,,111,,,23050,,,23050,,
1970,2,257,,100,4.54,1,0,3,100,2,14,2,26,0,1,2,1,,,,38,37,38,,,,11,9,11,,,,111,90,111,,,,23050,0,23050,,,
1970,2,257,,100,4.54,1,0,4,100,2,11,2,22,9999999,1,2,1,,,,38,37,38,,,,11,9,11,,,,111,90,111,,,,23050,0,23050,,,
1970,2,257,,100,4.54,1,0,5,100,1,9,1,16,9999999,1,2,1,,,,38,37,38,,,,11,9,11,,,,111,90,111,,,,23050,0,23050,,,
1970,2,258,,100,4.54,1,0,1,100,1,38,6,65,3050,1,,,,,,38,,,,,,6,,,,,,65,,,,,,3050,,,,,
1970,2,259,,100,4.54,1,0,1,100,2,69,3,30,4150,2,,,,,,69,,,,,,3,,,,,,30,,,,,,4150,,,,,
1970,2,259,,100,4.54,1,0,2,100,1,26,2,23,1650,2,,,,,,69,,,,,,3,,,,,,30,,,,,,4150,,,,,
1970,2,260,,100,4.54,3,0,1,100,2,89,6,60,850,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1970,2,261,,100,4.54,1,0,1,100,1,36,6,60,11450,1,,,2,,,36,,,35,,,6,,,6,,,60,,,60,,,11450,,,250,,
1970,2,261,,100,4.54,1,0,2,100,2,35,6,60,250,1,,,1,,,36,,,36,,,6,,,6,,,60,,,60,,,11450,,,11450,,
1970,2,261,,100,4.54,1,0,3,100,2,8,1,14,9999999,1,2,1,,,,36,35,36,,,,6,6,6,,,,60,60,60,,,,11450,250,11450,,,
1970,2,261,,100,4.54,1,0,4,100,2,5,0,2,9999999,1,2,1,,,,36,35,36,,,,6,6,6,,,,60,60,60,,,,11450,250,11450,,,
1970,2,262,,100,4.54,1,0,1,100,1,59,6,60,18250,1,,,2,,,59,,,51,,,6,,,6,,,60,,,60,,,18250,,,0,,
1970,2,262,,100,4.54,1,0,2,100,2,51,6,60,0,1,,,1,,,59,,,59,,,6,,,6,,,60,,,60,,,18250,,,18250,,
1970,2,263,,100,4.54,1,0,1,100,1,48,6,60,10050,1,,,2,,,48,,,47,,,6,,,3,,,60,,,30,,,10050,,,0,,
1970,2,263,,100,4.54,1,0,2,100,2,47,3,30,0,1,,,1,,,48,,,48,,,6,,,6,,,60,,,60,,,10050,,,10050,,
1970,2,263,,100,4.54,1,0,3,100,1,20,6,60,0,1,2,1,,,,48,47,48,,,,6,3,6,,,,60,30,60,,,,10050,0,10050,,,
1970,2,263,,100,4.54,1,0,4,100,1,14,2,23,0,1,2,1,,,,48,47,48,,,,6,3,6,,,,60,30,60,,,,10050,0,10050,,,
1970,2,263,,100,4.54,1,0,5,100,2,11,1,16,9999999,1,2,1,,,,48,47,48,,,,6,3,6,,,,60,30,60,,,,10050,0,10050,,,
1970,2,264,,100,4.54,1,0,1,100,1,34,11,110,10850,1,,,2,,,34,,,34,,,11,,,11,,,110,,,110,,,10850,,,150,,
1970,2,264,,100,4.54,1,0,2,100,2,34,11,110,150,1,,,1,,,34,,,34,,,11,,,11,,,110,,,110,,,10850,,,10850,,
1970,2,264,,100,4.54,1,0,3,100,2,5,1,11,9999999,1,2,1,,,,34,34,34,,,,11,11,11,,,,110,110,110,,,,10850,150,10850,,,
1970,2,264,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,34,34,34,,,,11,11,11,,,,110,110,110,,,,10850,150,10850,,,
1970,2,265,,100,4.54,1,0,1,100,1,67,8,80,5450,1,,,2,,,67,,,62,,,8,,,7,,,80,,,70,,,5450,,,11550,,
1970,2,265,,100,4.54,1,0,2,100,2,62,7,70,11550,1,,,1,,,67,,,67,,,8,,,8,,,80,,,80,,,5450,,,5450,,
1970,2,265,,100,4.54,1,0,3,100,1,21,8,80,0,1,2,1,,,,67,62,67,,,,8,7,8,,,,80,70,80,,,,5450,11550,5450,,,
1970,2,266,,100,4.54,1,0,1,100,1,77,4,40,15750,1,,,2,,,77,,,74,,,4,,,6,,,40,,,60,,,15750,,,2250,,
1970,2,266,,100,4.54,1,0,2,100,2,74,6,60,2250,1,,,1,,,77,,,77,,,4,,,4,,,40,,,40,,,15750,,,15750,,
1970,2,267,,100,4.54,1,0,1,100,1,53,6,60,12050,1,,,2,,,53,,,52,,,6,,,6,,,60,,,60,,,12050,,,3550,,
1970,2,267,,100,4.54,1,0,2,100,2,52,6,60,3550,1,,,1,,,53,,,53,,,6,,,6,,,60,,,60,,,12050,,,12050,,
1970,2,267,,100,4.54,1,0,3,100,2,20,8,80,2550,1,2,1,,,,53,52,53,,,,6,6,6,,,,60,60,60,,,,12050,3550,12050,,,
1970,2,268,,100,4.54,1,0,1,100,1,39,11,111,1650,1,,,2,,,39,,,37,,,11,,,10,,,111,,,100,,,1650,,,3350,,
1970,2,268,,100,4.54,1,0,2,100,2,37,10,100,3350,1,,,1,,,39,,,39,,,11,,,11,,,111,,,111,,,1650,,,1650,,
1970,2,268,,100,4.54,1,0,3,100,1,6,1,12,9999999,1,2,1,,,,39,37,39,,,,11,10,11,,,,111,100,111,,,,1650,3350,1650,,,
1970,2,269,,100,4.54,1,0,1,100,2,50,2,26,2850,2,,,,,,50,,,,,,2,,,,,,26,,,,,,2850,,,,,
1970,2,269,,100,4.54,1,0,2,100,2,17,3,30,2750,2,2,,,,,50,50,,,,,2,2,,,,,26,26,,,,,2850,2850,,,,
1970,2,269,,100,4.54,1,0,3,100,1,26,2,25,0,2,,,,,,50,,,,,,2,,,,,,26,,,,,,2850,,,,,
1970,2,269,,100,4.54,1,0,4,100,1,26,1,17,13250,2,,,,,,50,,,,,,2,,,,,,26,,,,,,2850,,,,,
1970,2,269,,100,4.54,1,0,5,100,1,26,1,15,13250,2,,,,,,50,,,,,,2,,,,,,26,,,,,,2850,,,,,
1970,2,270,,100,4.54,1,0,1,100,1,44,7,70,7150,1,,,2,,,44,,,48,,,7,,,8,,,70,,,80,,,7150,,,2650,,
1970,2,270,,100,4.54,1,0,2,100,2,48,8,80,2650,1,,,1,,,44,,,44,,,7,,,7,,,70,,,70,,,7150,,,7150,,
1970,2,270,,100,4.54,1,0,3,100,2,12,2,22,9999999,1,2,1,,,,44,48,44,,,,7,8,7,,,,70,80,70,,,,7150,2650,7150,,,
1970,2,270,,100,4.54,1,0,4,100,2,10,1,17,9999999,1,2,1,,,,44,48,44,,,,7,8,7,,,,70,80,70,,,,7150,2650,7150,,,
1970,2,271,,100,4.54,1,0,1,100,1,34,11,111,5550,1,,,2,,,34,,,29,,,11,,,11,,,111,,,111,,,5550,,,8250,,
1970,2,271,,100,4.54,1,0,2,100,2,29,11,111,8250,1,,,1,,,34,,,34,,,11,,,11,,,111,,,111,,,5550,,,5550,,
1970,2,271,,100,4.54,1,0,3,100,2,2,0,1,9999999,1,2,1,,,,34,29,34,,,,11,11,11,,,,111,111,111,,,,5550,8250,5550,,,
1970,2,272,,100,4.54,1,0,1,100,1,43,11,110,10850,1,,,2,,,43,,,34,,,11,,,11,,,110,,,111,,,10850,,,11550,,
1970,2,272,,100,4.54,1,0,2,100,2,34,11,111,11550,1,,,1,,,43,,,43,,,11,,,11,,,110,,,110,,,10850,,,10850,,
1970,2,272,,100,4.54,1,0,3,100,2,9,1,17,9999999,1,2,1,,,,43,34,43,,,,11,11,11,,,,110,111,110,,,,10850,11550,10850,,,
1970,2,273,,100,4.54,1,0,1,100,2,28,2,26,4150,2,2,,,,,28,59,,,,,2,2,,,,,26,23,,,,,4150,0,,,,
1970,2,273,,100,4.54,1,0,2,100,1,7,1,14,9999999,2,2,,,,,28,28,,,,,2,2,,,,,26,26,,,,,4150,4150,,,,
1970,2,273,,100,4.54,1,0,3,100,2,59,2,23,0,2,,,,,,28,,,,,,2,,,,,,26,,,,,,4150,,,,,
1970,2,274,,100,4.54,1,0,1,100,1,38,2,26,5050,1,,,2,,,38,,,35,,,2,,,6,,,26,,,60,,,5050,,,4050,,
1970,2,274,,100,4.54,1,0,2,100,2,35,6,60,4050,1,,,1,,,38,,,38,,,2,,,2,,,26,,,26,,,5050,,,5050,,
1970,2,274,,100,4.54,1,0,3,100,2,15,3,30,350,1,2,1,,,,38,35,38,,,,2,6,2,,,,26,60,26,,,,5050,4050,5050,,,
1970,2,274,,100,4.54,1,0,4,100,2,14,2,25,0,1,2,1,,,,38,35,38,,,,2,6,2,,,,26,60,26,,,,5050,4050,5050,,,
1970,2,274,,100,4.54,1,0,5,100,1,9,1,15,9999999,1,2,1,,,,38,35,38,,,,2,6,2,,,,26,60,26,,,,5050,4050,5050,,,
1970,2,274,,100,4.54,1,0,6,100,1,8,1,14,9999999,1,2,1,,,,38,35,38,,,,2,6,2,,,,26,60,26,,,,5050,4050,5050,,,
1970,2,275,,100,4.54,1,0,1,100,1,72,1,14,0,1,,,,,,72,,,,,,1,,,,,,14,,,,,,0,,,,,
1970,2,276,,100,4.54,1,0,1,100,2,27,3,30,3250,2,,,,,,27,,,,,,3,,,,,,30,,,,,,3250,,,,,
1970,2,276,,100,4.54,1,0,2,100,2,9,1,16,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,
1970,2,276,,100,4.54,1,0,3,100,1,8,1,15,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,
1970,2,276,,100,4.54,1,0,4,100,1,7,1,14,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,
1970,2,276,,100,4.54,1,0,5,100,1,6,1,11,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,
1970,2,276,,100,4.54,1,0,6,100,1,4,0,2,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,
1970,2,276,,100,4.54,1,0,7,100,2,3,0,2,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,
1970,2,277,,100,4.54,1,0,1,100,1,74,2,26,4450,1,,,2,,,74,,,58,,,2,,,2,,,26,,,26,,,4450,,,4450,,
1970,2,277,,100,4.54,1,0,2,100,2,58,2,26,4450,1,,,1,,,74,,,74,,,2,,,2,,,26,,,26,,,4450,,,4450,,
1970,2,278,,100,4.54,1,0,1,100,1,34,3,30,9350,1,,,2,,,34,,,24,,,3,,,6,,,30,,,60,,,9350,,,0,,
1970,2,278,,100,4.54,1,0,2,100,2,24,6,60,0,1,,,1,,,34,,,34,,,3,,,3,,,30,,,30,,,9350,,,9350,,
1970,2,279,,100,4.54,1,0,1,100,2,64,6,60,7550,2,,,,,,64,,,,,,6,,,,,,60,,,,,,7550,,,,,
1970,2,280,,100,4.54,1,0,1,100,1,48,6,60,7550,1,,,,,,48,,,,,,6,,,,,,60,,,,,,7550,,,,,
1970,2,281,,100,4.54,1,0,1,100,1,47,11,110,9250,1,,,2,,,47,,,40,,,11,,,6,,,110,,,60,,,9250,,,4450,,
1970,2,281,,100,4.54,1,0,2,100,2,40,6,60,4450,1,,,1,,,47,,,47,,,11,,,11,,,110,,,110,,,9250,,,9250,,
1970,2,281,,100,4.54,1,0,3,100,1,16,4,40,0,1,2,1,,,,47,40,47,,,,11,6,11,,,,110,60,110,,,,9250,4450,9250,,,
1970,2,281,,100,4.54,1,0,4,100,1,12,2,22,9999999,1,2,1,,,,47,40,47,,,,11,6,11,,,,110,60,110,,,,9250,4450,9250,,,
1970,2,282,,100,4.54,1,0,1,100,1,36,2,26,5250,1,,,,,,36,,,,,,2,,,,,,26,,,,,,5250,,,,,
1970,2,282,,100,4.54,1,0,2,100,1,39,2,26,5050,1,,,,,,36,,,,,,2,,,,,,26,,,,,,5250,,,,,
1970,2,282,,100,4.54,1,0,3,100,2,77,2,23,650,1,,,,,,36,,,,,,2,,,,,,26,,,,,,5250,,,,,
1970,2,282,,100,4.54,1,0,4,100,1,47,4,40,0,1,,,,,,36,,,,,,2,,,,,,26,,,,,,5250,,,,,
1970,2,282,,100,4.54,1,0,5,100,2,14,2,26,2450,1,,1,,,,36,,47,,,,2,,4,,,,26,,40,,,,5250,,0,,,
1970,2,282,,100,4.54,1,0,6,100,2,10,1,16,9999999,1,,1,,,,36,,47,,,,2,,4,,,,26,,40,,,,5250,,0,,,
1970,2,282,,100,4.54,1,0,7,100,1,8,1,14,9999999,1,,1,,,,36,,47,,,,2,,4,,,,26,,40,,,,5250,,0,,,
1970,2,283,,100,4.54,1,0,1,100,2,23,8,80,7850,2,,,,,,23,,,,,,8,,,,,,80,,,,,,7850,,,,,
1970,2,283,,100,4.54,1,0,2,100,2,18,4,40,0,2,,,,,,23,,,,,,8,,,,,,80,,,,,,7850,,,,,
1970,2,284,,100,4.54,1,0,1,100,2,31,1,16,6250,2,,,,,,31,,,,,,1,,,,,,16,,,,,,6250,,,,,
1970,2,284,,100,4.54,1,0,2,100,1,5,0,2,9999999,2,2,,,,,31,31,,,,,1,1,,,,,16,16,,,,,6250,6250,,,,
1970,2,284,,100,4.54,1,0,3,100,2,4,0,2,9999999,2,2,,,,,31,31,,,,,1,1,,,,,16,16,,,,,6250,6250,,,,
1970,2,285,,100,4.54,1,0,1,100,1,49,2,26,9050,1,,,2,,,49,,,50,,,2,,,2,,,26,,,26,,,9050,,,1550,,
1970,2,285,,100,4.54,1,0,2,100,2,50,2,26,1550,1,,,1,,,49,,,49,,,2,,,2,,,26,,,26,,,9050,,,9050,,
1970,2,286,,100,4.54,1,0,1,100,2,61,2,26,8450,2,,,,,,61,,,,,,2,,,,,,26,,,,,,8450,,,,,
1970,2,287,,100,4.54,1,0,1,100,2,14,2,26,0,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,
1970,2,287,,100,4.54,1,0,2,100,1,10,1,17,9999999,2,2,,,,,14,14,,,,,2,2,,,,,26,26,,,,,0,0,,,,
1970,2,287,,100,4.54,1,0,3,100,1,12,1,17,9999999,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,
1970,2,287,,100,4.54,1,0,4,100,1,9,1,16,9999999,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,
1970,2,287,,100,4.54,1,0,5,100,1,5,1,12,9999999,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,
1970,2,287,,100,4.54,1,0,6,100,1,48,2,25,6450,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,
1970,2,287,,100,4.54,1,0,7,100,2,2,0,1,9999999,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,
1970,2,288,,100,4.54,1,0,1,100,1,58,4,40,8050,1,,,2,,,58,,,53,,,4,,,6,,,40,,,60,,,8050,,,7050,,
1970,2,288,,100,4.54,1,0,2,100,2,53,6,60,7050,1,,,1,,,58,,,58,,,4,,,4,,,40,,,40,,,8050,,,8050,,
1970,2,289,,100,4.54,1,0,1,100,1,67,3,30,14750,1,,,,,,67,,,,,,3,,,,,,30,,,,,,14750,,,,,
1970,2,290,,100,4.54,1,0,1,100,1,73,2,25,1950,1,,,2,,,73,,,68,,,2,,,2,,,25,,,25,,,1950,,,650,,
1970,2,290,,100,4.54,1,0,2,100,2,68,2,25,650,1,2,,1,,,73,89,,73,,,2,2,,2,,,25,25,,25,,,1950,850,,1950,,
1970,2,290,,100,4.54,1,0,3,100,2,89,2,25,850,1,,,,,,73,,,,,,2,,,,,,25,,,,,,1950,,,,,
1970,2,291,,100,4.54,1,0,1,100,1,68,2,26,3450,1,,,2,,,68,,,65,,,2,,,4,,,26,,,40,,,3450,,,1750,,
1970,2,291,,100,4.54,1,0,2,100,2,65,4,40,1750,1,,,1,,,68,,,68,,,2,,,2,,,26,,,26,,,3450,,,3450,,
1970,2,292,,100,4.54,1,0,1,100,1,29,6,60,12050,1,,,2,,,29,,,28,,,6,,,6,,,60,,,60,,,12050,,,4050,,
1970,2,292,,100,4.54,1,0,2,100,2,28,6,60,4050,1,,,1,,,29,,,29,,,6,,,6,,,60,,,60,,,12050,,,12050,,
1970,2,292,,100,4.54,1,0,3,100,2,7,1,12,9999999,1,2,1,,,,29,28,29,,,,6,6,6,,,,60,60,60,,,,12050,4050,12050,,,
1970,2,292,,100,4.54,1,0,4,100,1,6,1,11,9999999,1,2,1,,,,29,28,29,,,,6,6,6,,,,60,60,60,,,,12050,4050,12050,,,
1970,2,292,,100,4.54,1,0,5,100,1,3,0,2,9999999,1,2,1,,,,29,28,29,,,,6,6,6,,,,60,60,60,,,,12050,4050,12050,,,
1970,2,293,,100,4.54,1,0,1,100,2,63,6,60,9950,2,,,,,,63,,,,,,6,,,,,,60,,,,,,9950,,,,,
1970,2,294,,100,4.54,1,0,1,100,1,42,6,60,30150,1,,,2,,,42,,,39,,,6,,,7,,,60,,,70,,,30150,,,1050,,
1970,2,294,,100,4.54,1,0,2,100,2,39,7,70,1050,1,,,1,,,42,,,42,,,6,,,6,,,60,,,60,,,30150,,,30150,,
1970,2,294,,100,4.54,1,0,3,100,2,16,4,40,450,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,
1970,2,294,,100,4.54,1,0,4,100,1,15,3,30,450,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,
1970,2,294,,100,4.54,1,0,5,100,1,14,2,26,0,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,
1970,2,294,,100,4.54,1,0,6,100,1,11,2,22,9999999,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,
1970,2,294,,100,4.54,1,0,7,100,2,9,1,16,9999999,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,
1970,2,295,,100,4.54,1,0,1,100,1,70,5,50,450,1,,,2,,,70,,,70,,,5,,,5,,,50,,,50,,,450,,,150,,
1970,2,295,,100,4.54,1,0,2,100,2,70,5,50,150,1,,,1,,,70,,,70,,,5,,,5,,,50,,,50,,,450,,,450,,
1970,2,296,,100,4.54,1,0,1,100,1,47,5,50,7050,1,,,2,,,47,,,44,,,5,,,5,,,50,,,50,,,7050,,,4550,,
1970,2,296,,100,4.54,1,0,2,100,2,44,5,50,4550,1,2,,1,,,47,71,,47,,,5,4,,5,,,50,40,,50,,,7050,650,,7050,,
1970,2,296,,100,4.54,1,0,3,100,2,71,4,40,650,1,,,,,,47,,,,,,5,,,,,,50,,,,,,7050,,,,,
1970,2,296,,100,4.54,1,0,4,100,1,17,5,50,550,1,2,1,,,,47,44,47,,,,5,5,5,,,,50,50,50,,,,7050,4550,7050,,,
1970,2,297,,100,4.54,1,0,1,100,1,33,10,100,17650,1,,,2,,,33,,,33,,,10,,,10,,,100,,,100,,,17650,,,0,,
1970,2,297,,100,4.54,1,0,2,100,2,33,10,100,0,1,,,1,,,33,,,33,,,10,,,10,,,100,,,100,,,17650,,,17650,,
1970,2,297,,100,4.54,1,0,3,100,1,8,1,15,9999999,1,2,1,,,,33,33,33,,,,10,10,10,,,,100,100,100,,,,17650,0,17650,,,
1970,2,297,,100,4.54,1,0,4,100,2,1,0,1,9999999,1,2,1,,,,33,33,33,,,,10,10,10,,,,100,100,100,,,,17650,0,17650,,,
1970,2,297,,100,4.54,1,0,5,100,2,4,1,11,9999999,1,2,1,,,,33,33,33,,,,10,10,10,,,,100,100,100,,,,17650,0,17650,,,
1970,2,298,,100,4.54,1,0,1,100,1,49,2,26,14050,1,,,2,,,49,,,46,,,2,,,6,,,26,,,60,,,14050,,,7450,,
1970,2,298,,100,4.54,1,0,2,100,2,46,6,60,7450,1,,,1,,,49,,,49,,,2,,,2,,,26,,,26,,,14050,,,14050,,
1970,2,298,,100,4.54,1,0,3,100,2,13,2,23,9999999,1,2,1,,,,49,46,49,,,,2,6,2,,,,26,60,26,,,,14050,7450,14050,,,
1970,2,299,,100,4.54,1,0,1,100,1,47,6,60,15750,1,,,2,,,47,,,46,,,6,,,6,,,60,,,60,,,15750,,,4850,,
1970,2,299,,100,4.54,1,0,2,100,2,46,6,60,4850,1,,,1,,,47,,,47,,,6,,,6,,,60,,,60,,,15750,,,15750,,
1970,2,299,,100,4.54,1,0,3,100,2,18,6,65,2550,1,2,1,,,,47,46,47,,,,6,6,6,,,,60,60,60,,,,15750,4850,15750,,,
1970,2,300,,100,4.54,1,0,1,100,1,64,5,50,2450,1,,,2,,,64,,,63,,,5,,,2,,,50,,,22,,,2450,,,0,,
1970,2,300,,100,4.54,1,0,2,100,2,63,2,22,0,1,,,1,,,64,,,64,,,5,,,5,,,50,,,50,,,2450,,,2450,,
1970,2,301,,100,4.54,1,0,1,100,1,20,7,70,8050,1,,,2,,,20,,,21,,,7,,,7,,,70,,,70,,,8050,,,3050,,
1970,2,301,,100,4.54,1,0,2,100,2,21,7,70,3050,1,,,1,,,20,,,20,,,7,,,7,,,70,,,70,,,8050,,,8050,,
1970,2,301,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,20,21,20,,,,7,7,7,,,,70,70,70,,,,8050,3050,8050,,,
1970,2,302,,100,4.54,1,0,1,100,2,23,2,26,4450,2,,,,,,23,,,,,,2,,,,,,26,,,,,,4450,,,,,
1970,2,303,,100,4.54,1,0,1,100,1,45,6,60,11750,1,,,2,,,45,,,41,,,6,,,6,,,60,,,60,,,11750,,,0,,
1970,2,303,,100,4.54,1,0,2,100,2,41,6,60,0,1,2,1,1,,,45,67,71,45,,,6,2,2,6,,,60,26,23,60,,,11750,1450,3350,11750,,
1970,2,303,,100,4.54,1,0,3,100,2,21,9,90,550,1,2,1,,,,45,41,45,,,,6,6,6,,,,60,60,60,,,,11750,0,11750,,,
1970,2,303,,100,4.54,1,0,4,100,2,17,5,50,550,1,2,1,,,,45,41,45,,,,6,6,6,,,,60,60,60,,,,11750,0,11750,,,
1970,2,303,,100,4.54,1,0,5,100,1,15,3,30,0,1,2,1,,,,45,41,45,,,,6,6,6,,,,60,60,60,,,,11750,0,11750,,,
1970,2,303,,100,4.54,1,0,6,100,1,71,2,23,3350,1,,,2,,,45,,,67,,,6,,,2,,,60,,,26,,,11750,,,1450,,
1970,2,303,,100,4.54,1,0,7,100,2,67,2,26,1450,1,,,1,,,45,,,71,,,6,,,2,,,60,,,23,,,11750,,,3350,,
1970,2,304,,100,4.54,1,0,1,100,1,75,2,26,3650,1,,,2,,,75,,,75,,,2,,,2,,,26,,,26,,,3650,,,850,,
1970,2,304,,100,4.54,1,0,2,100,2,75,2,26,850,1,,,1,,,75,,,75,,,2,,,2,,,26,,,26,,,3650,,,3650,,
1970,2,305,,100,4.54,1,0,1,100,1,38,6,60,10050,1,,,2,,,38,,,38,,,6,,,6,,,60,,,60,,,10050,,,0,,
1970,2,305,,100,4.54,1,0,2,100,2,38,6,60,0,1,,,1,,,38,,,38,,,6,,,6,,,60,,,60,,,10050,,,10050,,
1970,2,305,,100,4.54,1,0,3,100,1,11,2,22,9999999,1,2,1,,,,38,38,38,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,
1970,2,305,,100,4.54,1,0,4,100,1,9,1,16,9999999,1,2,1,,,,38,38,38,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,
1970,2,306,,100,4.54,1,0,1,100,1,47,3,30,11050,1,,,2,,,47,,,40,,,3,,,6,,,30,,,60,,,11050,,,0,,
1970,2,306,,100,4.54,1,0,2,100,2,40,6,60,0,1,,,1,,,47,,,47,,,3,,,3,,,30,,,30,,,11050,,,11050,,
1970,2,306,,100,4.54,1,0,3,100,1,16,3,30,0,1,2,1,,,,47,40,47,,,,3,6,3,,,,30,60,30,,,,11050,0,11050,,,
1970,2,306,,100,4.54,1,0,4,100,2,9,1,16,9999999,1,2,1,,,,47,40,47,,,,3,6,3,,,,30,60,30,,,,11050,0,11050,,,
1970,2,306,,100,4.54,1,0,5,100,2,7,1,14,9999999,1,2,1,,,,47,40,47,,,,3,6,3,,,,30,60,30,,,,11050,0,11050,,,
1970,2,307,,100,4.54,1,0,1,100,1,35,6,60,150,1,2,,,,,35,73,,,,,6,2,,,,,60,26,,,,,150,1250,,,,
1970,2,307,,100,4.54,1,0,2,100,2,73,2,26,1250,1,,,,,,35,,,,,,6,,,,,,60,,,,,,150,,,,,
1970,2,308,,100,4.54,1,0,1,100,1,77,2,22,4050,1,,,,,,77,,,,,,2,,,,,,22,,,,,,4050,,,,,
1970,2,308,,100,4.54,1,0,2,100,1,50,4,40,650,1,,1,,,,77,,77,,,,2,,2,,,,22,,22,,,,4050,,4050,,,
1970,2,308,,100,4.54,1,0,3,100,1,41,2,26,6550,1,,1,,,,77,,77,,,,2,,2,,,,22,,22,,,,4050,,4050,,,
1970,2,309,,100,4.54,1,0,1,100,1,61,5,50,3550,1,,,,,,61,,,,,,5,,,,,,50,,,,,,3550,,,,,
1970,2,309,,100,4.54,1,0,2,100,1,17,5,50,550,1,,1,,,,61,,61,,,,5,,5,,,,50,,50,,,,3550,,3550,,,
1970,2,310,,100,4.54,1,0,1,100,1,56,6,60,7050,1,,,2,,,56,,,61,,,6,,,6,,,60,,,60,,,7050,,,0,,
1970,2,310,,100,4.54,1,0,2,100,2,61,6,60,0,1,,,1,,,56,,,56,,,6,,,6,,,60,,,60,,,7050,,,7050,,
1970,2,311,,100,4.54,1,0,1,100,1,43,11,110,11850,1,,,2,,,43,,,42,,,11,,,10,,,110,,,100,,,11850,,,250,,
1970,2,311,,100,4.54,1,0,2,100,2,42,10,100,250,1,,,1,,,43,,,43,,,11,,,11,,,110,,,110,,,11850,,,11850,,
1970,2,312,,100,4.54,1,0,1,100,2,26,6,60,4050,2,,,,,,26,,,,,,6,,,,,,60,,,,,,4050,,,,,
1970,2,312,,100,4.54,1,0,2,100,1,9,1,15,9999999,2,2,,,,,26,26,,,,,6,6,,,,,60,60,,,,,4050,4050,,,,
1970,2,312,,100,4.54,1,0,3,100,1,7,1,12,9999999,2,2,,,,,26,26,,,,,6,6,,,,,60,60,,,,,4050,4050,,,,
1970,2,313,,100,4.54,1,0,1,100,1,22,10,100,0,1,,,,,,22,,,,,,10,,,,,,100,,,,,,0,,,,,
1970,2,314,,100,4.54,1,0,1,100,1,27,6,60,7050,1,,,,,,27,,,,,,6,,,,,,60,,,,,,7050,,,,,
1970,2,315,,100,4.54,3,0,1,100,2,61,8,80,150,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1970,2,316,,100,4.54,1,0,1,100,1,62,4,40,12050,1,,,2,,,62,,,61,,,4,,,4,,,40,,,40,,,12050,,,1350,,
1970,2,316,,100,4.54,1,0,2,100,2,61,4,40,1350,1,,,1,,,62,,,62,,,4,,,4,,,40,,,40,,,12050,,,12050,,
1970,2,317,,100,4.54,1,0,1,100,2,45,8,80,5050,2,,,,,,45,,,,,,8,,,,,,80,,,,,,5050,,,,,
1970,2,318,,100,4.54,1,0,1,100,2,23,8,80,3050,2,,,,,,23,,,,,,8,,,,,,80,,,,,,3050,,,,,
1970,2,318,,100,4.54,1,0,2,100,1,2,0,1,9999999,2,2,,,,,23,23,,,,,8,8,,,,,80,80,,,,,3050,3050,,,,
1970,2,319,,100,4.54,1,0,1,100,1,43,10,100,22050,1,,,2,,,43,,,37,,,10,,,10,,,100,,,100,,,22050,,,150,,
1970,2,319,,100,4.54,1,0,2,100,2,37,10,100,150,1,,,1,,,43,,,43,,,10,,,10,,,100,,,100,,,22050,,,22050,,
1970,2,319,,100,4.54,1,0,3,100,2,14,2,26,0,1,2,1,,,,43,37,43,,,,10,10,10,,,,100,100,100,,,,22050,150,22050,,,
1970,2,319,,100,4.54,1,0,4,100,1,11,1,17,9999999,1,2,1,,,,43,37,43,,,,10,10,10,,,,100,100,100,,,,22050,150,22050,,,
1970,2,320,,100,4.54,1,0,1,100,2,79,2,26,750,2,,,,,,79,,,,,,2,,,,,,26,,,,,,750,,,,,
1970,2,321,,100,4.54,1,0,1,100,1,24,6,60,5050,1,,,2,,,24,,,21,,,6,,,6,,,60,,,60,,,5050,,,4050,,
1970,2,321,,100,4.54,1,0,2,100,2,21,6,60,4050,1,,,1,,,24,,,24,,,6,,,6,,,60,,,60,,,5050,,,5050,,
1970,2,321,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,24,21,24,,,,6,6,6,,,,60,60,60,,,,5050,4050,5050,,,
1970,2,322,,100,4.54,1,0,1,100,2,60,6,60,6550,2,,,,,,60,,,,,,6,,,,,,60,,,,,,6550,,,,,
1970,2,322,,100,4.54,1,0,2,100,1,24,6,60,1350,2,2,,,,,60,60,,,,,6,6,,,,,60,60,,,,,6550,6550,,,,
1970,2,323,,100,4.54,1,0,1,100,1,77,7,70,10050,1,,,2,,,77,,,71,,,7,,,3,,,70,,,30,,,10050,,,0,,
1970,2,323,,100,4.54,1,0,2,100,2,71,3,30,0,1,,,1,,,77,,,77,,,7,,,7,,,70,,,70,,,10050,,,10050,,
1970,2,324,,100,4.54,1,0,1,100,1,66,3,30,8050,1,,,2,,,66,,,60,,,3,,,5,,,30,,,50,,,8050,,,5050,,
1970,2,324,,100,4.54,1,0,2,100,2,60,5,50,5050,1,,,1,,,66,,,66,,,3,,,3,,,30,,,30,,,8050,,,8050,,
1970,2,325,,100,4.54,1,0,1,100,1,40,2,25,7050,1,,,2,,,40,,,34,,,2,,,4,,,25,,,40,,,7050,,,6050,,
1970,2,325,,100,4.54,1,0,2,100,2,34,4,40,6050,1,,,1,,,40,,,40,,,2,,,2,,,25,,,25,,,7050,,,7050,,
1970,2,325,,100,4.54,1,0,3,100,2,16,3,30,3850,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,
1970,2,325,,100,4.54,1,0,4,100,1,15,2,25,0,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,
1970,2,325,,100,4.54,1,0,5,100,2,13,2,25,9999999,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,
1970,2,325,,100,4.54,1,0,6,100,2,13,2,22,9999999,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,
1970,2,325,,100,4.54,1,0,7,100,2,0,0,1,9999999,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,
1970,2,326,,100,4.54,1,0,1,100,1,37,6,60,4850,1,,,2,,,37,,,40,,,6,,,5,,,60,,,50,,,4850,,,0,,
1970,2,326,,100,4.54,1,0,2,100,2,40,5,50,0,1,,,1,,,37,,,37,,,6,,,6,,,60,,,60,,,4850,,,4850,,
1970,2,326,,100,4.54,1,0,3,100,2,19,6,60,2650,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,
1970,2,326,,100,4.54,1,0,4,100,2,17,5,50,0,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,
1970,2,326,,100,4.54,1,0,5,100,2,15,2,26,0,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,
1970,2,326,,100,4.54,1,0,6,100,1,14,2,25,0,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,
1970,2,326,,100,4.54,1,0,7,100,2,13,2,23,9999999,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,
1970,2,326,,100,4.54,1,0,8,100,2,12,2,22,9999999,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,
1970,2,326,,100,4.54,1,0,9,100,1,1,0,1,9999999,1,2,,,,,37,19,,,,,6,6,,,,,60,60,,,,,4850,2650,,,,
1970,2,327,,100,4.54,1,0,1,100,2,53,2,23,550,2,,,,,,53,,,,,,2,,,,,,23,,,,,,550,,,,,
1970,2,329,,100,4.54,1,0,1,100,1,33,1,17,7050,1,,,2,,,33,,,27,,,1,,,2,,,17,,,23,,,7050,,,2050,,
1970,2,329,,100,4.54,1,0,2,100,2,27,2,23,2050,1,,,1,,,33,,,33,,,1,,,1,,,17,,,17,,,7050,,,7050,,
1970,2,329,,100,4.54,1,0,3,100,2,0,0,1,9999999,1,2,1,,,,33,27,33,,,,1,2,1,,,,17,23,17,,,,7050,2050,7050,,,
1970,2,330,,100,4.54,1,0,1,100,1,29,3,30,9550,1,,,2,,,29,,,24,,,3,,,3,,,30,,,30,,,9550,,,0,,
1970,2,330,,100,4.54,1,0,2,100,2,24,3,30,0,1,,,1,,,29,,,29,,,3,,,3,,,30,,,30,,,9550,,,9550,,
1970,2,330,,100,4.54,1,0,3,100,2,3,0,2,9999999,1,2,1,,,,29,24,29,,,,3,3,3,,,,30,30,30,,,,9550,0,9550,,,
1970,2,330,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,29,24,29,,,,3,3,3,,,,30,30,30,,,,9550,0,9550,,,
1970,2,330,,100,4.54,1,0,5,100,1,0,0,1,9999999,1,2,1,,,,29,24,29,,,,3,3,3,,,,30,30,30,,,,9550,0,9550,,,
1970,2,330,,100,4.54,1,0,6,100,1,0,0,1,9999999,1,2,1,,,,29,24,29,,,,3,3,3,,,,30,30,30,,,,9550,0,9550,,,
1970,2,331,,100,4.54,1,0,1,100,2,45,1,17,0,2,,,,,,45,,,,,,1,,,,,,17,,,,,,0,,,,,
1970,2,331,,100,4.54,1,0,2,100,1,45,1,16,6150,2,,,,,,45,,,,,,1,,,,,,17,,,,,,0,,,,,
1970,2,331,,100,4.54,1,0,3,100,2,20,2,22,4150,2,2,,,,,45,45,,,,,1,1,,,,,17,17,,,,,0,0,,,,
1970,2,332,,100,4.54,1,0,1,100,1,39,6,60,6550,1,,,,,,39,,,,,,6,,,,,,60,,,,,,6550,,,,,
1970,2,333,,100,4.54,1,0,1,100,1,19,6,65,1750,1,,,,,,19,,,,,,6,,,,,,65,,,,,,1750,,,,,
1970,2,333,,100,4.54,1,0,2,100,1,18,7,70,2050,1,,,,,,19,,,,,,6,,,,,,65,,,,,,1750,,,,,
1970,2,333,,100,4.54,1,0,3,100,1,21,9,90,1950,1,,,,,,19,,,,,,6,,,,,,65,,,,,,1750,,,,,
1970,2,334,,100,4.54,1,0,1,100,1,40,3,30,9450,1,,,2,,,40,,,42,,,3,,,2,,,30,,,26,,,9450,,,0,,
1970,2,334,,100,4.54,1,0,2,100,2,42,2,26,0,1,,,1,,,40,,,40,,,3,,,3,,,30,,,30,,,9450,,,9450,,
1970,2,336,,100,4.54,1,0,1,100,2,29,5,50,3750,2,,,,,,29,,,,,,5,,,,,,50,,,,,,3750,,,,,
1970,2,336,,100,4.54,1,0,2,100,2,5,0,2,9999999,2,2,,,,,29,29,,,,,5,5,,,,,50,50,,,,,3750,3750,,,,
1970,2,336,,100,4.54,1,0,3,100,2,4,0,2,9999999,2,2,,,,,29,29,,,,,5,5,,,,,50,50,,,,,3750,3750,,,,
1970,2,337,,100,4.54,1,0,1,100,1,48,11,110,15050,1,,,2,,,48,,,49,,,11,,,2,,,110,,,23,,,15050,,,0,,
1970,2,337,,100,4.54,1,0,2,100,2,49,2,23,0,1,,,1,,,48,,,48,,,11,,,11,,,110,,,110,,,15050,,,15050,,
1970,2,337,,100,4.54,1,0,3,100,1,18,6,60,1750,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,
1970,2,337,,100,4.54,1,0,4,100,1,17,5,50,1050,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,
1970,2,337,,100,4.54,1,0,5,100,1,14,2,25,50,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,
1970,2,337,,100,4.54,1,0,6,100,2,11,2,22,9999999,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,
1970,2,337,,100,4.54,1,0,7,100,1,10,1,17,9999999,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,
1970,2,338,,100,4.54,1,0,1,100,1,57,9,90,10050,1,,,2,,,57,,,49,,,9,,,6,,,90,,,60,,,10050,,,250,,
1970,2,338,,100,4.54,1,0,2,100,2,49,6,60,250,1,,,1,,,57,,,57,,,9,,,9,,,90,,,90,,,10050,,,10050,,
1970,2,338,,100,4.54,1,0,3,100,1,16,4,40,1450,1,2,1,,,,57,49,57,,,,9,6,9,,,,90,60,90,,,,10050,250,10050,,,
1970,2,338,,100,4.54,1,0,4,100,2,14,2,26,0,1,2,1,,,,57,49,57,,,,9,6,9,,,,90,60,90,,,,10050,250,10050,,,
1970,2,338,,100,4.54,1,0,5,100,1,11,2,22,9999999,1,2,1,,,,57,49,57,,,,9,6,9,,,,90,60,90,,,,10050,250,10050,,,
1970,2,338,,100,4.54,1,0,6,100,1,6,1,12,9999999,1,2,1,,,,57,49,57,,,,9,6,9,,,,90,60,90,,,,10050,250,10050,,,
1970,2,339,,100,4.54,1,0,1,100,1,30,6,60,12050,1,,,2,,,30,,,28,,,6,,,6,,,60,,,60,,,12050,,,0,,
1970,2,339,,100,4.54,1,0,2,100,2,28,6,60,0,1,,,1,,,30,,,30,,,6,,,6,,,60,,,60,,,12050,,,12050,,
1970,2,339,,100,4.54,1,0,3,100,1,7,1,14,9999999,1,2,1,,,,30,28,30,,,,6,6,6,,,,60,60,60,,,,12050,0,12050,,,
1970,2,339,,100,4.54,1,0,4,100,1,5,1,11,9999999,1,2,1,,,,30,28,30,,,,6,6,6,,,,60,60,60,,,,12050,0,12050,,,
1970,2,339,,100,4.54,1,0,5,100,2,0,0,1,9999999,1,2,1,,,,30,28,30,,,,6,6,6,,,,60,60,60,,,,12050,0,12050,,,
1970,2,340,,100,4.54,1,0,1,100,1,57,3,30,5250,1,,,2,,,57,,,62,,,3,,,2,,,30,,,26,,,5250,,,0,,
1970,2,340,,100,4.54,1,0,2,100,2,62,2,26,0,1,,,1,,,57,,,57,,,3,,,3,,,30,,,30,,,5250,,,5250,,
1970,2,341,,100,4.54,1,0,1,100,1,73,1,16,1750,1,,,2,,,73,,,67,,,1,,,1,,,16,,,16,,,1750,,,750,,
1970,2,341,,100,4.54,1,0,2,100,2,67,1,16,750,1,,,1,,,73,,,73,,,1,,,1,,,16,,,16,,,1750,,,1750,,
1970,2,342,,100,4.54,1,0,1,100,1,29,6,60,7050,1,,,2,,,29,,,21,,,6,,,5,,,60,,,50,,,7050,,,3350,,
1970,2,342,,100,4.54,1,0,2,100,2,21,5,50,3350,1,,,1,,,29,,,29,,,6,,,6,,,60,,,60,,,7050,,,7050,,
1970,2,342,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,29,21,29,,,,6,5,6,,,,60,50,60,,,,7050,3350,7050,,,
1970,2,343,,100,4.54,1,0,1,100,1,36,11,111,24250,1,,,2,,,36,,,30,,,11,,,10,,,111,,,100,,,24250,,,7050,,
1970,2,343,,100,4.54,1,0,2,100,2,30,10,100,7050,1,,,1,,,36,,,36,,,11,,,11,,,111,,,111,,,24250,,,24250,,
1970,2,343,,100,4.54,1,0,3,100,2,7,1,14,9999999,1,2,1,,,,36,30,36,,,,11,10,11,,,,111,100,111,,,,24250,7050,24250,,,
1970,2,343,,100,4.54,1,0,4,100,2,3,0,2,9999999,1,2,1,,,,36,30,36,,,,11,10,11,,,,111,100,111,,,,24250,7050,24250,,,
1970,2,344,,100,4.54,1,0,1,100,1,24,11,111,250,1,,,,,,24,,,,,,11,,,,,,111,,,,,,250,,,,,
1970,2,344,,100,4.54,1,0,2,100,1,6,1,12,9999999,1,,1,,,,24,,24,,,,11,,11,,,,111,,111,,,,250,,250,,,
1970,2,344,,100,4.54,1,0,3,100,1,8,1,15,9999999,1,,1,,,,24,,24,,,,11,,11,,,,111,,111,,,,250,,250,,,
1970,2,345,,100,4.54,1,0,1,100,1,58,8,80,11850,1,,,2,,,58,,,57,,,8,,,6,,,80,,,60,,,11850,,,6450,,
1970,2,345,,100,4.54,1,0,2,100,2,57,6,60,6450,1,,,1,,,58,,,58,,,8,,,8,,,80,,,80,,,11850,,,11850,,
1970,2,345,,100,4.54,1,0,3,100,2,28,7,70,6150,1,2,1,,,,58,57,58,,,,8,6,8,,,,80,60,80,,,,11850,6450,11850,,,
1970,2,346,,100,4.54,1,0,1,100,1,39,6,60,12050,1,,,2,,,39,,,33,,,6,,,6,,,60,,,60,,,12050,,,3050,,
1970,2,346,,100,4.54,1,0,2,100,2,33,6,60,3050,1,,,1,,,39,,,39,,,6,,,6,,,60,,,60,,,12050,,,12050,,
1970,2,346,,100,4.54,1,0,3,100,1,16,3,30,0,1,2,1,,,,39,33,39,,,,6,6,6,,,,60,60,60,,,,12050,3050,12050,,,
1970,2,346,,100,4.54,1,0,4,100,1,14,2,25,0,1,2,1,,,,39,33,39,,,,6,6,6,,,,60,60,60,,,,12050,3050,12050,,,
1970,2,346,,100,4.54,1,0,5,100,1,4,1,11,9999999,1,2,1,,,,39,33,39,,,,6,6,6,,,,60,60,60,,,,12050,3050,12050,,,
1970,2,347,,100,4.54,1,0,1,100,1,38,11,111,46550,1,,,2,,,38,,,33,,,11,,,6,,,111,,,60,,,46550,,,0,,
1970,2,347,,100,4.54,1,0,2,100,2,33,6,60,0,1,,,1,,,38,,,38,,,11,,,11,,,111,,,111,,,46550,,,46550,,
1970,2,347,,100,4.54,1,0,3,100,1,10,1,16,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,
1970,2,347,,100,4.54,1,0,4,100,1,9,1,15,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,
1970,2,347,,100,4.54,1,0,5,100,2,7,1,14,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,
1970,2,347,,100,4.54,1,0,6,100,1,5,1,11,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,
1970,2,347,,100,4.54,1,0,7,100,2,4,0,2,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,
1970,2,347,,100,4.54,1,0,8,100,1,1,0,1,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,
1970,2,348,,100,4.54,1,0,1,100,2,57,8,80,9050,2,,,,,,57,,,,,,8,,,,,,80,,,,,,9050,,,,,


================================================
FILE: examples/data/nyc-taxi_1k.csv
================================================
1460000001,2,2017-12-15 00:00:28,2017-12-15 00:15:43,N,1,,,,,2,1.50,11,0.5,0.5,1.25,0,,0.3,13.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000002,2,2017-12-15 00:33:12,2017-12-15 00:51:04,N,1,,,,,3,2.53,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000003,2,2017-12-15 00:56:59,2017-12-15 00:59:51,N,1,,,,,3,0.06,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000004,2,2017-12-15 00:09:19,2017-12-15 00:18:54,N,1,,,,,5,1.47,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000005,2,2017-12-15 00:21:02,2017-12-15 00:25:30,N,1,,,,,5,0.23,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000006,2,2017-12-15 00:35:38,2017-12-15 01:26:19,N,1,,,,,5,18.48,58,0.5,0.5,9.76,5.76,,0.3,76.77,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000007,1,2017-12-15 00:48:29,2017-12-15 01:01:42,N,1,,,,,1,2.60,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000008,2,2017-12-15 00:09:01,2017-12-15 00:11:57,N,1,,,,,1,0.85,4.5,0.5,0.5,0,0,,0.3,5.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000009,2,2017-12-15 00:32:00,2017-12-15 00:35:42,N,1,,,,,1,0.48,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000010,2,2017-12-15 00:42:13,2017-12-15 00:54:48,N,1,,,,,1,2.09,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000011,2,2017-12-15 00:05:46,2017-12-15 00:09:49,N,1,,,,,3,0.60,4.5,0.5,0.5,0.87,0,,0.3,6.67,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000012,2,2017-12-15 00:10:48,2017-12-15 00:20:24,N,1,,,,,3,0.97,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000013,2,2017-12-15 00:26:39,2017-12-15 00:39:18,N,1,,,,,3,1.82,9.5,0.5,0.5,3,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000014,2,2017-12-15 00:40:49,2017-12-15 01:16:48,N,1,,,,,3,8.99,31,0.5,0.5,6.46,0,,0.3,38.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000015,2,2017-12-15 00:24:04,2017-12-15 00:34:56,N,1,,,,,1,5.04,16,0.5,0.5,5.19,0,,0.3,22.49,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000016,1,2017-12-15 00:06:12,2017-12-15 00:35:33,N,1,,,,,1,5.30,21.5,0.5,0.5,1.5,0,,0.3,24.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000017,1,2017-12-15 00:38:38,2017-12-15 00:54:18,N,1,,,,,1,0.90,10.5,0.5,0.5,1,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000018,1,2017-12-15 00:33:57,2017-12-15 00:47:58,N,1,,,,,1,2.10,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000019,1,2017-12-15 00:00:23,2017-12-15 00:13:38,N,1,,,,,1,1.40,9.5,0.5,0.5,1.5,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000020,1,2017-12-15 00:21:30,2017-12-15 00:31:23,N,1,,,,,1,1.50,8,0.5,0.5,2.3,0,,0.3,11.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000021,1,2017-12-15 00:47:17,2017-12-15 01:02:26,N,1,,,,,1,2.80,12,0.5,0.5,1,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000022,1,2017-12-15 00:22:56,2017-12-15 00:29:31,N,1,,,,,1,1.50,7,0.5,0.5,3.5,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000023,1,2017-12-15 00:44:16,2017-12-15 00:50:13,N,1,,,,,1,0.90,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000024,2,2017-12-15 00:21:58,2017-12-15 00:44:00,N,1,,,,,1,13.41,37,0.5,0.5,0,0,,0.3,38.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000025,2,2017-12-15 00:18:22,2017-12-15 00:33:48,N,1,,,,,1,1.71,11,0.5,0.5,0,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000026,2,2017-12-15 00:39:06,2017-12-15 00:52:25,N,1,,,,,1,1.01,9,0.5,0.5,1.54,0,,0.3,11.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000027,2,2017-12-15 01:00:32,2017-12-15 01:20:29,N,1,,,,,1,3.69,16,0.5,0.5,2.6,0,,0.3,19.9,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000028,1,2017-12-15 00:09:13,2017-12-15 00:25:51,N,1,,,,,1,1.90,12,0.5,0.5,1,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000029,1,2017-12-15 00:27:17,2017-12-15 00:35:39,N,1,,,,,1,1.00,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000030,1,2017-12-15 00:37:01,2017-12-15 00:41:47,N,1,,,,,1,0.60,5,0.5,0.5,1.25,0,,0.3,7.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000031,1,2017-12-15 00:44:51,2017-12-15 00:52:31,N,1,,,,,1,1.80,8,0.5,0.5,1,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000032,1,2017-12-15 00:58:04,2017-12-15 01:42:27,N,1,,,,,1,12.10,41,0.5,0.5,2,0,,0.3,44.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000033,1,2017-12-15 00:04:01,2017-12-15 00:20:26,N,1,,,,,1,1.70,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000034,1,2017-12-15 00:21:36,2017-12-15 00:23:09,N,1,,,,,1,0.20,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000035,1,2017-12-15 00:24:05,2017-12-15 00:38:33,N,1,,,,,1,3.10,12.5,0.5,0.5,0,0,,0.3,13.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000036,1,2017-12-15 00:49:14,2017-12-15 01:05:23,N,1,,,,,1,3.90,15,0.5,0.5,3.25,0,,0.3,19.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000037,1,2017-12-15 00:19:03,2017-12-15 00:44:02,N,1,,,,,1,3.50,17,0.5,0.5,2.75,0,,0.3,21.05,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000038,1,2017-12-15 00:47:45,2017-12-15 01:01:16,N,1,,,,,1,4.90,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000039,2,2017-12-15 00:23:06,2017-12-15 00:24:06,N,1,,,,,1,0.36,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000040,2,2017-12-15 00:27:28,2017-12-15 00:30:57,N,1,,,,,1,0.99,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000041,2,2017-12-15 00:18:20,2017-12-15 00:41:29,N,1,,,,,1,3.74,18,0.5,0.5,3.86,0,,0.3,23.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000042,1,2017-12-15 00:08:31,2017-12-15 00:11:07,N,1,,,,,1,0.50,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000043,1,2017-12-15 00:13:03,2017-12-15 00:33:47,N,1,,,,,1,7.20,22.5,0.5,0.5,0,5.76,,0.3,29.56,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000044,2,2017-12-15 00:30:10,2017-12-15 00:45:02,N,1,,,,,1,3.02,12.5,0.5,0.5,0,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000045,2,2017-12-15 00:49:42,2017-12-15 01:13:51,N,1,,,,,1,5.27,20,0.5,0.5,2,0,,0.3,23.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000046,2,2017-12-15 00:05:58,2017-12-15 00:25:43,N,1,,,,,1,4.15,17,0.5,0.5,4.58,0,,0.3,22.88,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000047,2,2017-12-15 00:37:28,2017-12-15 00:42:26,N,1,,,,,1,1.00,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000048,1,2017-12-15 00:15:17,2017-12-15 00:37:01,N,1,,,,,1,4.00,16.5,0.5,0.5,3.55,0,,0.3,21.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000049,1,2017-12-15 00:42:53,2017-12-15 00:59:39,N,1,,,,,1,3.30,14.5,0.5,0.5,3.15,0,,0.3,18.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000050,2,2017-12-15 00:33:52,2017-12-15 00:40:49,N,1,,,,,1,0.40,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000051,2,2017-12-15 00:46:35,2017-12-15 00:56:49,N,1,,,,,1,1.26,8.5,0.5,0.5,2,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000052,1,2017-12-15 00:06:04,2017-12-15 00:31:04,N,1,,,,,1,4.50,20,0.5,0.5,4.26,0,,0.3,25.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000053,2,2017-12-15 00:30:11,2017-12-15 00:34:09,N,1,,,,,3,1.14,5.5,0.5,0.5,2.04,0,,0.3,8.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000054,2,2017-12-15 00:45:10,2017-12-15 01:11:33,N,1,,,,,5,10.50,31.5,0.5,0.5,0,5.76,,0.3,38.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000055,2,2017-12-15 00:31:27,2017-12-15 00:53:23,N,1,,,,,2,4.40,18,0.5,0.5,4.82,0,,0.3,24.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000056,2,2017-12-15 00:23:15,2017-12-15 00:33:59,N,1,,,,,1,1.26,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000057,1,2017-12-15 00:18:49,2017-12-15 00:25:53,N,1,,,,,2,1.10,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000058,1,2017-12-15 00:28:53,2017-12-15 00:35:26,N,1,,,,,1,0.80,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000059,1,2017-12-15 00:36:04,2017-12-15 00:58:54,N,1,,,,,1,4.00,17,0.5,0.5,3.65,0,,0.3,21.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000060,1,2017-12-15 00:52:09,2017-12-15 00:59:47,N,1,,,,,1,1.00,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000061,2,2017-12-15 00:07:57,2017-12-15 00:25:09,N,1,,,,,1,2.79,13,0.5,0.5,0,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000062,2,2017-12-15 00:26:09,2017-12-15 00:29:52,N,1,,,,,1,0.68,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000063,2,2017-12-15 00:31:59,2017-12-15 00:40:18,N,1,,,,,1,1.31,7.5,0.5,0.5,1.25,0,,0.3,10.05,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000064,2,2017-12-15 00:49:06,2017-12-15 01:07:13,N,1,,,,,1,4.21,15.5,0.5,0.5,3.36,0,,0.3,20.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000065,2,2017-12-15 00:56:03,2017-12-15 01:37:49,N,1,,,,,1,22.40,62.5,0.5,0.5,17.39,5.76,,0.3,86.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000066,1,2017-12-15 00:01:19,2017-12-15 00:12:58,N,1,,,,,1,1.80,10,0.5,0.5,1,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000067,1,2017-12-15 00:18:24,2017-12-15 01:04:16,N,5,,,,,1,5.00,0,0,0,0,10.5,,0.3,10.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000068,2,2017-12-15 00:08:27,2017-12-15 00:38:37,N,1,,,,,1,4.88,21.5,0.5,0.5,2.28,0,,0.3,25.08,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000069,2,2017-12-15 00:42:27,2017-12-15 00:53:50,N,1,,,,,1,5.28,17,0.5,0.5,4.81,5.76,,0.3,28.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000070,2,2017-12-15 00:05:56,2017-12-15 00:22:42,N,1,,,,,2,2.29,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000071,2,2017-12-15 00:27:09,2017-12-15 00:31:47,N,1,,,,,2,0.77,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000072,2,2017-12-15 00:42:03,2017-12-15 01:00:47,N,1,,,,,2,3.28,14.5,0.5,0.5,3.95,0,,0.3,19.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000073,1,2017-12-15 00:45:21,2017-12-15 00:50:56,N,1,,,,,1,1.50,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000074,2,2017-12-15 00:08:09,2017-12-15 00:21:11,N,1,,,,,5,1.97,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000075,2,2017-12-15 00:21:48,2017-12-15 00:57:13,N,1,,,,,5,5.40,24,0.5,0.5,0,0,,0.3,25.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000076,1,2017-12-15 00:09:11,2017-12-15 00:35:18,N,1,,,,,4,5.20,21.5,0.5,0.5,0,0,,0.3,22.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000077,1,2017-12-15 00:42:08,2017-12-15 00:42:09,N,1,,,,,1,5.30,2.5,0.5,0.5,0,0,,0.3,3.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000078,1,2017-12-15 00:45:28,2017-12-15 01:05:27,N,1,,,,,1,6.90,22.5,0.5,0.5,3,5.76,,0.3,32.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000079,2,2017-12-15 00:03:48,2017-12-15 00:38:38,N,1,,,,,6,7.86,29,0.5,0.5,9.09,0,,0.3,39.39,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000080,1,2017-12-15 00:25:04,2017-12-15 00:28:18,N,1,,,,,2,0.60,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000081,1,2017-12-15 00:34:23,2017-12-15 00:59:40,N,1,,,,,1,3.60,17.5,0.5,0.5,3,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000082,2,2017-12-15 00:53:37,2017-12-15 01:11:06,N,1,,,,,1,2.77,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000083,2,2017-12-15 00:09:00,2017-12-15 00:42:14,N,1,,,,,1,4.94,24.5,0.5,0.5,5.16,0,,0.3,30.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000084,1,2017-12-15 00:00:49,2017-12-15 00:07:51,N,1,,,,,1,0.50,6,0.5,0.5,1,0,,0.3,8.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000085,1,2017-12-15 00:22:47,2017-12-15 00:30:21,N,1,,,,,4,0.60,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000086,1,2017-12-15 00:32:04,2017-12-15 00:39:38,N,1,,,,,1,0.90,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000087,1,2017-12-15 00:50:23,2017-12-15 01:05:29,N,1,,,,,1,3.00,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000088,2,2017-12-15 00:08:01,2017-12-15 00:25:35,N,1,,,,,1,2.80,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000089,2,2017-12-15 00:27:27,2017-12-15 00:55:33,N,1,,,,,1,4.05,19.5,0.5,0.5,5.2,0,,0.3,26,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000090,2,2017-12-15 00:11:22,2017-12-15 00:37:10,N,1,,,,,2,5.04,19.5,0.5,0.5,0,0,,0.3,20.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000091,2,2017-12-15 01:00:50,2017-12-15 01:29:18,N,1,,,,,2,7.11,24.5,0.5,0.5,0,0,,0.3,25.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000092,1,2017-12-15 00:26:01,2017-12-15 00:48:49,N,1,,,,,1,14.60,40,0.5,0.5,8.25,0,,0.3,49.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000093,1,2017-12-15 00:10:53,2017-12-15 00:48:57,N,1,,,,,1,14.30,43.5,0.5,0.5,8.95,0,,0.3,53.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000094,2,2017-12-15 00:31:37,2017-12-15 00:37:18,N,1,,,,,1,0.48,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000095,2,2017-12-15 00:41:42,2017-12-15 00:51:54,N,1,,,,,1,1.66,9,0.5,0.5,1,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000096,2,2017-12-15 00:53:10,2017-12-15 01:12:40,N,1,,,,,1,3.08,15.5,0.5,0.5,1,0,,0.3,17.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000097,2,2017-12-15 00:17:32,2017-12-15 00:30:33,N,1,,,,,5,1.14,9,0.5,0.5,1,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000098,2,2017-12-15 00:57:51,2017-12-15 01:11:23,N,1,,,,,5,2.01,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000099,1,2017-12-15 00:43:52,2017-12-15 00:52:43,N,1,,,,,4,1.60,8,0.5,0.5,0,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000100,1,2017-12-15 00:02:26,2017-12-15 00:13:59,N,1,,,,,1,1.10,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000101,1,2017-12-15 00:14:19,2017-12-15 00:56:54,N,5,,,,,1,18.50,100,0,0,15,0,,0.3,115.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000102,1,2017-12-15 00:40:45,2017-12-15 00:54:30,N,1,,,,,1,2.10,12,0.5,0.5,2.2,0,,0.3,15.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000103,2,2017-12-15 00:37:36,2017-12-15 00:53:05,N,1,,,,,1,3.60,14,0.5,0.5,2,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000104,2,2017-12-15 00:53:59,2017-12-15 00:55:30,N,1,,,,,1,0.22,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000105,1,2017-12-15 00:36:23,2017-12-15 00:51:06,N,1,,,,,1,2.30,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000106,2,2017-12-15 00:19:27,2017-12-15 00:31:53,N,1,,,,,6,1.49,9.5,0.5,0.5,2.16,0,,0.3,12.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000107,2,2017-12-15 00:39:17,2017-12-15 00:44:01,N,1,,,,,6,0.79,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000108,1,2017-12-15 00:09:38,2017-12-15 00:47:23,N,1,,,,,2,17.90,51,0.5,0.5,1,17.28,,0.3,70.58,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000109,1,2017-12-15 00:49:21,2017-12-15 01:11:02,N,1,,,,,1,14.00,38,0.5,0.5,1,0,,0.3,40.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000110,2,2017-12-15 00:32:24,2017-12-15 00:38:56,N,1,,,,,1,0.80,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000111,2,2017-12-15 00:40:45,2017-12-15 00:55:44,N,1,,,,,1,1.54,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000112,1,2017-12-15 00:34:39,2017-12-15 00:41:03,N,1,,,,,1,1.00,6.5,0.5,0.5,1.95,0,,0.3,9.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000113,1,2017-12-15 00:43:04,2017-12-15 00:58:19,N,1,,,,,1,2.80,12.5,0.5,0.5,2.75,0,,0.3,16.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000114,2,2017-12-15 00:32:50,2017-12-15 00:45:30,N,1,,,,,1,1.56,9.5,0.5,0.5,2.16,0,,0.3,12.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000115,2,2017-12-15 00:50:04,2017-12-15 00:59:07,N,1,,,,,1,1.70,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000116,1,2017-12-15 00:31:07,2017-12-15 00:35:35,N,1,,,,,1,0.60,5,0.5,0.5,1,0,,0.3,7.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000117,1,2017-12-15 00:44:08,2017-12-15 01:12:08,N,1,,,,,1,7.30,26.5,0.5,0.5,0,0,,0.3,27.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000118,2,2017-12-15 00:40:24,2017-12-15 00:45:14,N,1,,,,,1,1.47,6.5,0.5,0.5,0,0,,0.3,7.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000119,2,2017-12-15 00:46:19,2017-12-15 01:02:19,N,1,,,,,2,1.87,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000120,2,2017-12-15 00:02:07,2017-12-15 00:04:51,N,1,,,,,1,0.36,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000121,2,2017-12-15 00:22:58,2017-12-15 00:34:06,N,1,,,,,2,0.73,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000122,2,2017-12-15 00:41:31,2017-12-15 00:48:10,N,1,,,,,1,0.82,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000123,2,2017-12-15 00:52:19,2017-12-15 01:01:01,N,1,,,,,1,1.54,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000124,1,2017-12-15 00:31:59,2017-12-15 00:44:42,N,1,,,,,0,2.20,10.5,0.5,0.5,2,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000125,2,2017-12-15 00:38:43,2017-12-15 00:44:54,N,1,,,,,1,1.44,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000126,2,2017-12-15 00:51:10,2017-12-15 01:05:47,N,1,,,,,5,2.75,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000127,2,2017-12-15 00:09:57,2017-12-15 00:25:53,N,1,,,,,1,2.61,12.5,0.5,0.5,2.76,0,,0.3,16.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000128,2,2017-12-15 00:27:34,2017-12-15 00:44:22,N,1,,,,,1,7.11,22.5,0.5,0.5,3.8,0,,0.3,27.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000129,2,2017-12-15 00:29:13,2017-12-15 00:53:56,N,1,,,,,2,9.73,30,0.5,0.5,0,0,,0.3,31.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000130,2,2017-12-15 00:28:32,2017-12-15 00:33:57,N,1,,,,,6,0.59,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000131,2,2017-12-15 00:37:51,2017-12-15 01:55:38,N,1,,,,,6,15.34,58,0.5,0.5,0,0,,0.3,59.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000132,2,2017-12-15 00:04:24,2017-12-15 00:13:56,N,1,,,,,3,1.46,8,0.5,0.5,2,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000133,2,2017-12-15 00:38:48,2017-12-15 01:01:48,N,1,,,,,2,2.72,15.5,0.5,0.5,4.2,0,,0.3,21,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000134,1,2017-12-15 00:33:15,2017-12-15 00:47:55,N,1,,,,,1,9.80,27,0.5,0.5,8,0,,0.3,36.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000135,2,2017-12-15 00:41:39,2017-12-15 01:14:47,N,1,,,,,1,8.90,30.5,0.5,0.5,0,0,,0.3,31.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000136,1,2017-12-15 00:50:41,2017-12-15 00:59:29,N,1,,,,,1,1.30,8,0.5,0.5,1.5,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000137,2,2017-12-15 00:49:15,2017-12-15 00:58:07,N,1,,,,,4,1.87,9,0.5,0.5,2.06,0,,0.3,12.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000138,2,2017-12-14 23:51:20,2017-12-15 00:50:09,N,1,,,,,1,10.08,43.5,0.5,0.5,8.96,0,,0.3,53.76,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000139,2,2017-12-15 00:05:31,2017-12-15 00:19:55,N,1,,,,,5,2.97,12,0.5,0.5,2.66,0,,0.3,15.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000140,2,2017-12-15 00:29:06,2017-12-15 01:09:26,N,1,,,,,3,13.26,41.5,0.5,0.5,0,0,,0.3,42.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000141,2,2017-12-15 00:26:34,2017-12-15 00:51:30,N,1,,,,,1,5.36,21,0.5,0.5,2,0,,0.3,24.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000142,2,2017-12-15 00:04:47,2017-12-15 00:23:09,N,1,,,,,1,2.68,13.5,0.5,0.5,4.44,0,,0.3,19.24,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000143,2,2017-12-15 00:31:16,2017-12-15 00:44:45,N,1,,,,,1,2.40,11,0.5,0.5,1,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000144,2,2017-12-15 00:49:37,2017-12-15 01:21:04,N,1,,,,,1,6.83,25.5,0.5,0.5,5.36,0,,0.3,32.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000145,2,2017-12-15 00:10:01,2017-12-15 00:16:49,N,1,,,,,1,0.91,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000146,2,2017-12-15 00:19:03,2017-12-15 00:30:11,N,1,,,,,1,1.81,9.5,0.5,0.5,2,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000147,2,2017-12-15 00:31:07,2017-12-15 00:34:53,N,1,,,,,1,0.92,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000148,2,2017-12-15 00:43:48,2017-12-15 01:04:53,N,1,,,,,1,4.63,18,0.5,0.5,3.86,0,,0.3,23.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000149,1,2017-12-15 00:22:11,2017-12-15 00:32:11,N,1,,,,,1,2.20,10,0.5,0.5,2.25,0,,0.3,13.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000150,1,2017-12-15 00:48:02,2017-12-15 01:01:45,N,1,,,,,1,1.30,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000151,2,2017-12-15 00:05:43,2017-12-15 00:12:57,N,1,,,,,1,2.05,9,0.5,0.5,2.06,0,,0.3,14.31,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000152,2,2017-12-15 00:32:48,2017-12-15 00:55:01,N,1,,,,,1,9.23,28,0.5,0.5,6,5.76,,0.3,41.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000153,2,2017-12-15 01:00:10,2017-12-15 01:16:04,N,1,,,,,1,4.37,15.5,0.5,0.5,3.36,0,,0.3,20.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000154,2,2017-12-15 00:08:13,2017-12-15 00:28:37,N,1,,,,,1,3.22,15,0.5,0.5,0,0,,0.3,16.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000155,2,2017-12-15 00:31:00,2017-12-15 00:41:23,N,1,,,,,1,1.60,8.5,0.5,0.5,0,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000156,2,2017-12-15 00:42:50,2017-12-15 00:53:34,N,1,,,,,1,1.37,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000157,2,2017-12-15 00:55:45,2017-12-15 00:59:31,N,1,,,,,1,1.08,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000158,2,2017-12-15 00:09:20,2017-12-15 00:23:24,N,1,,,,,1,2.62,11.5,0.5,0.5,3.2,0,,0.3,16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000159,2,2017-12-15 00:24:46,2017-12-15 00:36:33,N,1,,,,,1,1.35,9.5,0.5,0.5,0,0,,0.3,10.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000160,2,2017-12-15 00:40:16,2017-12-15 00:57:48,N,1,,,,,1,3.03,13.5,0.5,0.5,1,0,,0.3,15.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000161,1,2017-12-15 00:00:35,2017-12-15 00:12:21,N,1,,,,,1,2.00,9.5,0.5,0.5,3.2,0,,0.3,14,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000162,1,2017-12-15 00:13:48,2017-12-15 00:20:03,N,1,,,,,1,1.00,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000163,1,2017-12-15 00:20:47,2017-12-15 00:31:56,N,1,,,,,1,1.70,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000164,1,2017-12-15 00:33:29,2017-12-15 01:02:24,N,1,,,,,1,9.00,29,0.5,0.5,7.55,0,,0.3,37.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000165,1,2017-12-15 00:09:42,2017-12-15 00:18:12,N,1,,,,,2,2.40,9,0.5,0.5,0,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000166,1,2017-12-15 00:57:43,2017-12-15 01:16:05,N,1,,,,,1,3.30,13.5,0.5,0.5,2.95,0,,0.3,17.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000167,2,2017-12-15 00:09:55,2017-12-15 00:19:30,N,1,,,,,1,1.45,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000168,2,2017-12-15 00:20:11,2017-12-15 00:33:01,N,1,,,,,1,2.18,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000169,2,2017-12-15 00:35:09,2017-12-15 01:09:59,N,1,,,,,1,17.48,51,0.5,0.5,0,5.76,,0.3,58.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000170,2,2017-12-15 00:26:34,2017-12-15 00:41:45,N,1,,,,,1,1.81,11,0.5,0.5,3.69,0,,0.3,15.99,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000171,2,2017-12-15 00:46:38,2017-12-15 01:08:50,N,1,,,,,1,3.31,16,0.5,0.5,3.46,0,,0.3,20.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000172,2,2017-12-15 00:19:37,2017-12-15 00:35:33,N,1,,,,,2,1.61,10.5,0.5,0.5,5,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000173,2,2017-12-15 00:38:17,2017-12-15 00:45:48,N,1,,,,,2,1.01,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000174,2,2017-12-15 00:58:49,2017-12-15 01:35:08,N,1,,,,,2,5.57,26.5,0.5,0.5,5.56,0,,0.3,33.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000175,1,2017-12-15 00:17:10,2017-12-15 00:52:01,N,1,,,,,1,4.00,23,0.5,0.5,6,5.76,,0.3,36.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000176,2,2017-12-15 00:51:45,2017-12-15 01:16:36,N,2,,,,,2,16.55,52,0,0.5,10.56,0,,0.3,63.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000177,2,2017-12-15 00:53:53,2017-12-15 00:59:08,N,1,,,,,3,1.09,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000178,2,2017-12-15 00:17:56,2017-12-15 00:41:33,N,1,,,,,1,5.12,19.5,0.5,0.5,4.16,0,,0.3,24.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000179,2,2017-12-15 00:45:41,2017-12-15 00:58:18,N,1,,,,,1,2.83,11.5,0.5,0.5,3.2,0,,0.3,16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000180,2,2017-12-15 00:01:12,2017-12-15 00:11:38,N,1,,,,,1,1.65,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000181,2,2017-12-15 00:12:58,2017-12-15 00:19:54,N,1,,,,,1,0.89,6.5,0.5,0.5,1.95,0,,0.3,9.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000182,2,2017-12-15 00:24:50,2017-12-15 00:26:01,N,5,,,,,1,0.00,100,0,0.5,8.2,0,,0.3,109,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000183,1,2017-12-15 00:32:04,2017-12-15 01:01:36,N,2,,,,,3,20.80,52,0,0.5,0,5.76,,0.3,58.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000184,2,2017-12-15 00:18:06,2017-12-15 01:07:36,N,1,,,,,1,25.24,71,0.5,0.5,15.61,5.76,,0.3,93.67,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000185,2,2017-12-15 00:10:06,2017-12-15 00:20:55,N,1,,,,,1,1.84,9.5,0.5,0.5,2.5,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000186,2,2017-12-15 00:31:27,2017-12-15 00:45:44,N,1,,,,,1,1.66,10.5,0.5,0.5,1.2,0,,0.3,13,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000187,2,2017-12-15 00:53:13,2017-12-15 01:13:27,N,1,,,,,1,2.40,14,0.5,0.5,0,0,,0.3,15.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000188,2,2017-12-15 00:03:43,2017-12-15 00:11:39,N,1,,,,,2,1.58,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000189,2,2017-12-15 00:18:02,2017-12-15 00:43:48,N,1,,,,,2,4.61,19,0.5,0.5,0,0,,0.3,20.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000190,2,2017-12-15 00:48:20,2017-12-15 01:09:19,N,1,,,,,2,2.67,15,0.5,0.5,1,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000191,1,2017-12-15 00:05:45,2017-12-15 00:26:31,N,1,,,,,2,3.50,17.5,0.5,0.5,2,0,,0.3,20.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000192,1,2017-12-15 00:35:30,2017-12-15 00:47:19,N,1,,,,,1,2.50,10.5,0.5,0.5,2,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000193,1,2017-12-15 00:55:01,2017-12-15 01:06:13,N,1,,,,,1,0.90,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000194,2,2017-12-15 00:23:31,2017-12-15 00:30:12,N,1,,,,,1,0.98,6.5,0.5,0.5,1,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000195,2,2017-12-15 00:33:17,2017-12-15 00:55:09,N,1,,,,,1,3.20,15.5,0.5,0.5,3.36,0,,0.3,20.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000196,2,2017-12-15 00:56:39,2017-12-15 01:14:39,N,1,,,,,1,4.65,18,0.5,0.5,3.86,0,,0.3,23.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000197,2,2017-12-15 00:12:57,2017-12-15 00:18:57,N,1,,,,,1,1.03,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000198,2,2017-12-15 00:20:41,2017-12-15 00:46:44,N,1,,,,,1,5.33,21,0.5,0.5,1,0,,0.3,23.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000199,2,2017-12-14 23:56:15,2017-12-15 00:20:06,N,1,,,,,1,10.45,30.5,0.5,0.5,7.51,5.76,,0.3,45.07,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000200,2,2017-12-15 00:30:30,2017-12-15 01:22:30,N,1,,,,,1,13.51,48,0.5,0.5,0,0,,0.3,49.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000201,2,2017-12-15 00:03:15,2017-12-15 00:21:04,N,1,,,,,1,2.38,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000202,2,2017-12-15 00:23:15,2017-12-15 00:27:40,N,1,,,,,1,0.47,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000203,2,2017-12-15 00:31:32,2017-12-15 00:49:33,N,1,,,,,1,2.91,14,0.5,0.5,1,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000204,2,2017-12-15 00:08:42,2017-12-15 00:12:21,N,1,,,,,1,0.39,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000205,2,2017-12-15 00:13:31,2017-12-15 00:24:20,N,1,,,,,1,2.49,10.5,0.5,0.5,1.2,0,,0.3,13,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000206,2,2017-12-15 00:29:19,2017-12-15 00:58:28,N,1,,,,,1,3.72,20,0.5,0.5,4.26,0,,0.3,25.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000207,2,2017-12-15 00:28:14,2017-12-15 00:34:25,N,1,,,,,1,1.55,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000208,2,2017-12-15 00:36:22,2017-12-15 00:37:55,N,1,,,,,1,0.45,3.5,0.5,0.5,0.96,0,,0.3,5.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000209,2,2017-12-15 00:39:37,2017-12-15 01:35:03,N,1,,,,,1,13.96,47,0.5,0.5,5.08,5.76,,0.3,59.14,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000210,2,2017-12-15 00:25:04,2017-12-15 00:33:25,N,1,,,,,1,1.64,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000211,2,2017-12-15 00:34:48,2017-12-15 00:43:08,N,1,,,,,1,1.60,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000212,2,2017-12-15 01:01:52,2017-12-15 01:36:51,N,1,,,,,1,9.76,35,0.5,0.5,0,0,,0.3,36.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000213,1,2017-12-15 00:46:44,2017-12-15 00:49:28,N,1,,,,,1,0.50,4,0.5,0.5,0,0,,0.3,5.3,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000214,2,2017-12-15 00:06:12,2017-12-15 00:31:47,N,1,,,,,1,4.02,18,0.5,0.5,1.5,0,,0.3,20.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000215,2,2017-12-15 00:44:58,2017-12-15 01:05:54,N,1,,,,,1,4.67,18,0.5,0.5,0,0,,0.3,19.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000216,2,2017-12-15 00:17:30,2017-12-15 00:24:38,N,1,,,,,1,1.71,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000217,2,2017-12-15 00:34:51,2017-12-15 00:47:29,N,1,,,,,1,1.75,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000218,2,2017-12-15 00:50:18,2017-12-15 01:10:23,N,1,,,,,1,3.51,16,0.5,0.5,2.5,0,,0.3,19.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000219,1,2017-12-15 00:21:43,2017-12-15 00:30:20,N,1,,,,,1,1.20,7.5,0.5,0.5,1.75,0,,0.3,10.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000220,1,2017-12-15 00:45:42,2017-12-15 01:20:41,N,1,,,,,1,6.70,27,0.5,0.5,6.8,5.76,,0.3,40.86,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000221,2,2017-12-15 00:16:52,2017-12-15 00:27:51,N,1,,,,,2,1.33,8.5,0.5,0.5,1,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000222,2,2017-12-15 00:31:34,2017-12-15 00:45:50,N,1,,,,,2,6.73,20.5,0.5,0.5,4.36,0,,0.3,26.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000223,2,2017-12-15 00:02:41,2017-12-15 00:30:26,N,1,,,,,1,6.01,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000224,2,2017-12-15 00:47:01,2017-12-15 00:55:49,N,1,,,,,1,1.98,8.5,0.5,0.5,2,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000225,2,2017-12-15 00:58:15,2017-12-15 01:14:41,N,1,,,,,1,3.40,13.5,0.5,0.5,4.44,0,,0.3,19.24,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000226,2,2017-12-15 00:13:35,2017-12-15 00:27:29,N,1,,,,,1,2.59,11.5,0.5,0.5,3.84,0,,0.3,16.64,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000227,2,2017-12-15 00:38:30,2017-12-15 00:43:51,N,1,,,,,1,1.15,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000228,2,2017-12-15 00:45:24,2017-12-15 00:51:18,N,1,,,,,1,1.00,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000229,2,2017-12-15 00:55:04,2017-12-15 01:08:57,N,1,,,,,1,3.47,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000230,2,2017-12-15 00:04:17,2017-12-15 00:10:33,N,1,,,,,1,1.23,6.5,0.5,0.5,1.95,0,,0.3,9.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000231,2,2017-12-15 00:13:03,2017-12-15 00:26:55,N,1,,,,,1,2.78,12,0.5,0.5,3.32,0,,0.3,16.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000232,2,2017-12-15 00:30:01,2017-12-15 00:37:15,N,1,,,,,1,0.89,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000233,2,2017-12-15 00:38:43,2017-12-15 00:47:26,N,1,,,,,1,1.40,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000234,2,2017-12-15 00:49:45,2017-12-15 00:53:07,N,1,,,,,1,0.59,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000235,2,2017-12-15 00:56:01,2017-12-15 01:07:10,N,1,,,,,1,2.46,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000236,2,2017-12-15 00:33:40,2017-12-15 00:39:42,N,1,,,,,2,1.31,6.5,0.5,0.5,1,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000237,2,2017-12-15 00:55:08,2017-12-15 01:29:46,N,1,,,,,2,11.64,37,0.5,0.5,8.81,5.76,,0.3,52.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000238,2,2017-12-15 00:38:07,2017-12-15 00:42:29,N,1,,,,,6,0.95,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000239,1,2017-12-15 00:02:32,2017-12-15 00:14:54,N,1,,,,,2,2.40,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000240,1,2017-12-15 00:47:13,2017-12-15 01:23:55,N,1,,,,,2,9.40,32.5,0.5,0.5,0,0,,0.3,33.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000241,2,2017-12-15 00:07:00,2017-12-15 00:22:39,N,1,,,,,2,2.66,12,0.5,0.5,3.99,0,,0.3,17.29,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000242,2,2017-12-15 00:24:20,2017-12-15 00:39:27,N,1,,,,,2,3.08,12.5,0.5,0.5,1,0,,0.3,14.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000243,2,2017-12-15 00:42:39,2017-12-15 00:57:57,N,1,,,,,2,2.33,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000244,1,2017-12-15 00:00:39,2017-12-15 00:03:05,N,1,,,,,1,0.40,4,0.5,0.5,0,0,,0.3,5.3,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000245,2,2017-12-15 00:10:32,2017-12-15 00:43:16,N,1,,,,,5,8.04,28,0.5,0.5,3.7,0,,0.3,33,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000246,2,2017-12-15 00:35:42,2017-12-15 00:51:25,N,1,,,,,1,8.76,25,0.5,0.5,5,5.76,,0.3,37.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000247,2,2017-12-15 00:52:46,2017-12-15 01:04:05,N,1,,,,,3,1.78,9.5,0.5,0.5,1,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000248,2,2017-12-15 00:05:05,2017-12-15 00:11:34,N,1,,,,,1,1.08,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000249,2,2017-12-15 00:12:28,2017-12-15 00:37:12,N,1,,,,,1,3.21,16.5,0.5,0.5,3.56,0,,0.3,21.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000250,2,2017-12-15 00:44:06,2017-12-15 01:04:05,N,1,,,,,1,5.06,17,0.5,0.5,0,0,,0.3,18.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000251,1,2017-12-15 00:05:32,2017-12-15 00:09:10,N,1,,,,,1,0.60,4.5,0.5,0.5,1.15,0,,0.3,6.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000252,1,2017-12-15 00:27:29,2017-12-15 00:39:05,N,1,,,,,1,2.20,10,0.5,0.5,2.25,0,,0.3,13.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000253,1,2017-12-15 00:07:54,2017-12-15 00:10:50,N,1,,,,,2,0.60,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000254,1,2017-12-15 00:12:02,2017-12-15 00:17:01,N,1,,,,,2,1.00,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000255,1,2017-12-15 00:24:58,2017-12-15 00:37:38,N,1,,,,,1,1.60,9.5,0.5,0.5,1.7,0,,0.3,12.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000256,1,2017-12-15 00:41:49,2017-12-15 00:53:48,N,1,,,,,1,3.90,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000257,2,2017-12-14 23:58:09,2017-12-15 00:02:07,N,1,,,,,1,0.49,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000258,2,2017-12-15 00:05:04,2017-12-15 00:18:50,N,1,,,,,1,2.02,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000259,2,2017-12-15 00:21:45,2017-12-15 00:55:32,N,1,,,,,1,5.18,23.5,0.5,0.5,4.96,0,,0.3,29.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000260,2,2017-12-15 00:04:25,2017-12-15 00:11:39,N,1,,,,,1,1.13,7,0.5,0.5,2.08,0,,0.3,10.38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000261,2,2017-12-15 00:20:57,2017-12-15 00:31:42,N,1,,,,,1,1.08,8,0.5,0.5,1,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000262,2,2017-12-15 00:39:26,2017-12-15 00:50:31,N,1,,,,,1,2.09,9.5,0.5,0.5,0,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000263,2,2017-12-15 00:53:53,2017-12-15 01:04:58,N,1,,,,,1,1.76,9.5,0.5,0.5,1,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000264,2,2017-12-15 00:08:30,2017-12-15 00:20:52,N,1,,,,,1,1.75,9.5,0.5,0.5,0,0,,0.3,10.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000265,2,2017-12-15 00:23:40,2017-12-15 00:41:49,N,1,,,,,1,2.18,12.5,0.5,0.5,2.76,0,,0.3,16.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000266,2,2017-12-15 00:44:35,2017-12-15 00:49:32,N,1,,,,,1,0.79,5.5,0.5,0.5,2.04,0,,0.3,8.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000267,2,2017-12-15 00:52:36,2017-12-15 01:02:00,N,1,,,,,1,0.73,7.5,0.5,0.5,1.76,0,,0.3,10.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000268,2,2017-12-15 00:31:52,2017-12-15 00:47:40,N,1,,,,,1,1.80,11.5,0.5,0.5,1.08,0,,0.3,13.88,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000269,2,2017-12-15 00:53:43,2017-12-15 01:05:05,N,1,,,,,1,2.54,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000270,2,2017-12-15 00:06:34,2017-12-15 00:13:18,N,1,,,,,2,1.21,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000271,2,2017-12-15 00:18:09,2017-12-15 00:25:11,N,1,,,,,2,1.29,7,0.5,0.5,2.08,0,,0.3,10.38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000272,2,2017-12-15 00:30:15,2017-12-15 00:38:42,N,1,,,,,2,1.38,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000273,2,2017-12-15 00:40:42,2017-12-15 00:44:57,N,1,,,,,2,1.32,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000274,1,2017-12-15 00:47:31,2017-12-15 00:49:29,N,1,,,,,3,0.20,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000275,1,2017-12-15 00:52:16,2017-12-15 01:28:36,N,1,,,,,1,7.30,29.5,0.5,0.5,0,0,,0.3,30.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000276,1,2017-12-15 00:01:57,2017-12-15 00:18:51,N,1,,,,,1,11.40,31.5,0.5,0.5,8.2,0,,0.3,41,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000277,1,2017-12-15 00:39:16,2017-12-15 01:04:21,N,1,,,,,1,10.10,30.5,0.5,0.5,7.5,5.76,,0.3,45.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000278,1,2017-12-15 00:18:06,2017-12-15 00:26:33,N,1,,,,,1,1.70,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000279,1,2017-12-15 00:28:42,2017-12-15 01:10:48,N,1,,,,,1,8.70,34.5,0.5,0.5,7.15,0,,0.3,42.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000280,2,2017-12-15 00:20:44,2017-12-15 00:47:16,N,1,,,,,1,3.10,17.5,0.5,0.5,3.76,0,,0.3,22.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000281,2,2017-12-15 00:48:38,2017-12-15 00:58:46,N,1,,,,,1,1.87,9,0.5,0.5,2.06,0,,0.3,12.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000282,2,2017-12-15 00:04:31,2017-12-15 00:25:09,N,1,,,,,1,2.28,14.5,0.5,0.5,3.16,0,,0.3,18.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000283,2,2017-12-15 00:26:46,2017-12-15 00:33:08,N,1,,,,,1,0.56,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000284,2,2017-12-15 00:37:12,2017-12-15 01:04:43,N,1,,,,,1,6.54,24,0.5,0.5,0,0,,0.3,25.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000285,2,2017-12-15 00:32:16,2017-12-15 01:27:04,N,1,,,,,1,14.64,50,0.5,0.5,10.26,0,,0.3,61.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000286,2,2017-12-15 00:59:27,2017-12-15 01:20:51,N,1,,,,,1,5.95,20,0.5,0.5,0,0,,0.3,21.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000287,2,2017-12-15 00:48:18,2017-12-15 01:14:34,N,1,,,,,1,10.73,32.5,0.5,0.5,0,5.76,,0.3,39.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000288,1,2017-12-15 00:07:31,2017-12-15 00:19:09,N,1,,,,,2,2.20,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000289,1,2017-12-15 00:24:03,2017-12-15 00:42:25,N,1,,,,,1,2.30,13,0.5,0.5,3.55,0,,0.3,17.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000290,1,2017-12-15 00:44:12,2017-12-15 00:55:18,N,1,,,,,1,1.80,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000291,1,2017-12-15 00:56:48,2017-12-15 01:13:09,N,1,,,,,2,2.60,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000292,1,2017-12-15 00:19:32,2017-12-15 00:25:08,N,1,,,,,1,1.20,6,0.5,0.5,1,0,,0.3,8.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000293,1,2017-12-15 00:27:00,2017-12-15 00:51:04,N,1,,,,,1,4.60,19,0.5,0.5,4.05,0,,0.3,24.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000294,1,2017-12-15 00:20:38,2017-12-15 00:25:15,N,1,,,,,1,1.20,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000295,1,2017-12-15 00:41:06,2017-12-15 00:52:18,N,1,,,,,1,1.80,9.5,0.5,0.5,2.7,0,,0.3,13.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000296,1,2017-12-15 00:55:55,2017-12-15 01:12:31,N,1,,,,,1,3.30,14,0.5,0.5,1,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000297,2,2017-12-15 00:05:55,2017-12-15 00:14:30,N,1,,,,,1,1.63,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000298,2,2017-12-15 00:27:26,2017-12-15 01:03:16,N,1,,,,,1,6.38,26,0.5,0.5,5.46,0,,0.3,32.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000299,1,2017-12-15 00:08:21,2017-12-15 00:28:24,N,1,,,,,1,3.50,15,0.5,0.5,0,0,,0.3,16.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000300,1,2017-12-15 00:32:08,2017-12-15 00:43:23,N,1,,,,,2,2.80,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000301,1,2017-12-15 00:49:47,2017-12-15 00:53:20,N,1,,,,,2,0.60,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000302,1,2017-12-15 00:30:10,2017-12-15 01:07:08,N,1,,,,,1,9.40,33.5,0.5,0.5,5,5.76,,0.3,45.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000303,1,2017-12-15 00:04:37,2017-12-15 00:11:07,N,1,,,,,1,1.00,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000304,1,2017-12-15 00:11:47,2017-12-15 00:44:18,N,1,,,,,1,5.70,24.5,0.5,0.5,0,0,,0.3,25.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000305,1,2017-12-15 00:58:49,2017-12-15 01:17:57,N,1,,,,,1,5.20,18.5,0.5,0.5,3.95,0,,0.3,23.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000306,1,2017-12-15 00:02:04,2017-12-15 00:06:33,N,1,,,,,1,0.80,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000307,1,2017-12-15 00:19:59,2017-12-15 00:38:04,Y,1,,,,,1,2.80,13,0.5,0.5,3,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000308,2,2017-12-15 00:04:13,2017-12-15 00:44:18,N,1,,,,,6,7.07,30,0.5,0.5,6.26,0,,0.3,37.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000309,2,2017-12-15 00:54:44,2017-12-15 01:03:50,N,1,,,,,5,1.55,8,0.5,0.5,2.32,0,,0.3,11.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000310,1,2017-12-15 00:43:05,2017-12-15 00:52:01,N,1,,,,,1,2.10,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000311,1,2017-12-15 00:05:03,2017-12-15 00:18:37,N,1,,,,,1,2.60,11,0.5,0.5,2.45,0,,0.3,14.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000312,1,2017-12-15 00:20:35,2017-12-15 00:38:49,N,1,,,,,1,2.50,13.5,0.5,0.5,4.4,0,,0.3,19.2,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000313,1,2017-12-15 00:39:52,2017-12-15 00:44:38,N,1,,,,,1,1.10,5.5,0.5,0.5,1.35,0,,0.3,8.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000314,1,2017-12-15 00:49:40,2017-12-15 01:00:41,N,1,,,,,1,1.70,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000315,1,2017-12-15 00:47:28,2017-12-15 00:52:07,N,1,,,,,1,1.20,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000316,2,2017-12-15 00:06:56,2017-12-15 00:44:48,N,1,,,,,5,3.86,23.5,0.5,0.5,4.96,0,,0.3,29.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000317,2,2017-12-15 00:55:16,2017-12-15 01:06:36,N,1,,,,,5,1.94,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000318,1,2017-12-15 00:30:37,2017-12-15 00:30:47,N,2,,,,,1,0.00,52,0,0.5,8,5.76,,0.3,66.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000319,1,2017-12-15 00:34:47,2017-12-15 01:55:20,N,1,,,,,1,20.40,75.5,0.5,0.5,5,5.76,,0.3,87.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000320,2,2017-12-14 23:51:49,2017-12-15 00:09:43,N,1,,,,,1,1.19,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000321,2,2017-12-15 00:19:42,2017-12-15 00:27:12,N,1,,,,,1,0.78,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000322,2,2017-12-15 00:28:34,2017-12-15 00:40:23,N,1,,,,,1,2.75,11.5,0.5,0.5,1.5,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000323,2,2017-12-15 00:35:15,2017-12-15 00:55:59,N,1,,,,,1,7.24,23.5,0.5,0.5,0,5.76,,0.3,30.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000324,2,2017-12-15 00:26:02,2017-12-15 00:29:23,N,1,,,,,1,0.35,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000325,2,2017-12-15 00:31:42,2017-12-15 00:36:19,N,1,,,,,1,1.07,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000326,2,2017-12-15 00:36:55,2017-12-15 00:43:24,N,1,,,,,1,1.38,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000327,2,2017-12-15 00:45:40,2017-12-15 01:24:42,N,1,,,,,1,6.01,27,0.5,0.5,4.24,0,,0.3,32.54,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000328,2,2017-12-15 00:11:05,2017-12-15 00:19:49,N,1,,,,,2,1.16,7.5,0.5,0.5,1,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000329,2,2017-12-15 00:20:45,2017-12-15 00:54:08,N,1,,,,,2,5.22,23.5,0.5,0.5,2.4,0,,0.3,27.2,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000330,2,2017-12-14 23:59:30,2017-12-15 00:27:27,N,1,,,,,6,4.86,21,0.5,0.5,5.58,0,,0.3,27.88,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000331,2,2017-12-15 00:32:34,2017-12-15 00:43:36,N,1,,,,,6,2.73,10.5,0.5,0.5,0.59,0,,0.3,12.39,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000332,2,2017-12-15 00:55:30,2017-12-15 01:14:02,N,1,,,,,6,3.22,14.5,0.5,0.5,1.5,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000333,2,2017-12-15 00:48:56,2017-12-15 00:57:15,N,1,,,,,6,1.23,7.5,0.5,0.5,1.76,0,,0.3,10.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000334,1,2017-12-15 00:17:24,2017-12-15 00:28:44,N,1,,,,,1,2.10,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000335,1,2017-12-15 00:29:20,2017-12-15 00:40:38,N,1,,,,,1,1.70,9,0.5,0.5,2.5,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000336,1,2017-12-15 00:43:13,2017-12-15 01:04:32,N,1,,,,,1,1.90,14.5,0.5,0.5,3.15,0,,0.3,18.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000337,2,2017-12-15 00:18:55,2017-12-15 00:35:52,N,1,,,,,5,2.25,12.5,0.5,0.5,2,0,,0.3,15.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000338,2,2017-12-15 00:37:39,2017-12-15 00:52:21,N,1,,,,,5,1.76,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000339,2,2017-12-15 00:02:22,2017-12-15 00:18:02,N,1,,,,,2,1.79,11.5,0.5,0.5,1.5,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000340,2,2017-12-15 00:19:03,2017-12-15 00:55:49,N,1,,,,,2,8.96,33.5,0.5,0.5,6.96,0,,0.3,41.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000341,2,2017-12-15 00:38:47,2017-12-15 01:02:25,N,1,,,,,1,5.67,19.5,0.5,0.5,4.16,0,,0.3,24.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000342,1,2017-12-15 00:54:07,2017-12-15 00:56:25,N,1,,,,,1,0.30,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000343,1,2017-12-15 00:59:33,2017-12-15 01:15:36,N,1,,,,,1,4.10,16,0.5,0.5,3.45,0,,0.3,20.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000344,1,2017-12-15 00:16:35,2017-12-15 00:38:22,N,1,,,,,1,3.70,16.5,0.5,0.5,3.55,0,,0.3,21.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000345,1,2017-12-15 00:41:35,2017-12-15 00:46:07,N,1,,,,,1,1.10,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000346,1,2017-12-15 00:28:16,2017-12-15 00:49:43,N,1,,,,,1,2.10,14.5,0.5,0.5,1,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000347,1,2017-12-15 00:50:32,2017-12-15 01:02:25,N,1,,,,,1,2.00,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000348,2,2017-12-15 00:06:30,2017-12-15 00:32:46,N,2,,,,,2,18.00,52,0,0.5,8,0,,0.3,60.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000349,2,2017-12-15 00:35:57,2017-12-15 00:49:57,N,1,,,,,2,3.00,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000350,2,2017-12-15 00:51:07,2017-12-15 01:03:37,N,1,,,,,2,1.78,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000351,2,2017-12-14 23:58:18,2017-12-15 00:17:19,N,1,,,,,1,2.89,13.5,0.5,0.5,0,0,,0.3,14.8,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000352,2,2017-12-15 00:25:33,2017-12-15 00:37:17,N,1,,,,,1,1.84,9.5,0.5,0.5,2.7,0,,0.3,13.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000353,2,2017-12-15 00:38:50,2017-12-15 01:12:55,N,5,,,,,1,20.52,95,0,0.5,21.26,10.5,,0.3,127.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000354,1,2017-12-15 00:10:39,2017-12-15 00:23:43,N,1,,,,,0,1.80,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000355,1,2017-12-15 00:26:21,2017-12-15 00:46:41,N,1,,,,,0,3.90,15.5,0.5,0.5,0,0,,0.3,16.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000356,1,2017-12-15 00:51:23,2017-12-15 01:23:51,N,1,,,,,0,4.60,21,0.5,0.5,3.2,0,,0.3,25.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000357,1,2017-12-15 00:01:01,2017-12-15 00:09:09,N,1,,,,,1,1.50,7.5,0.5,0.5,2.2,0,,0.3,11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000358,1,2017-12-15 00:11:16,2017-12-15 00:28:12,N,1,,,,,1,4.20,16.5,0.5,0.5,3.55,0,,0.3,21.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000359,1,2017-12-15 00:41:38,2017-12-15 00:49:41,N,1,,,,,1,1.00,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000360,1,2017-12-15 00:53:12,2017-12-15 01:03:55,N,1,,,,,1,2.30,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000361,2,2017-12-15 00:17:03,2017-12-15 00:23:50,N,1,,,,,6,0.78,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000362,2,2017-12-15 00:26:35,2017-12-15 00:30:26,N,1,,,,,6,0.70,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000363,2,2017-12-15 00:33:08,2017-12-15 00:49:10,N,1,,,,,6,3.51,14.5,0.5,0.5,0,0,,0.3,15.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000364,2,2017-12-15 00:54:31,2017-12-15 01:18:59,N,1,,,,,6,6.01,21.5,0.5,0.5,0,0,,0.3,22.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000365,1,2017-12-15 00:06:33,2017-12-15 00:44:01,N,1,,,,,1,9.40,34,0.5,0.5,7.05,0,,0.3,42.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000366,1,2017-12-15 00:24:39,2017-12-15 00:49:07,N,1,,,,,1,2.60,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000367,1,2017-12-15 00:53:08,2017-12-15 01:07:56,N,1,,,,,1,3.30,13,0.5,0.5,3.55,0,,0.3,17.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000368,1,2017-12-15 00:02:28,2017-12-15 00:13:55,N,1,,,,,1,1.80,9.5,0.5,0.5,2,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000369,1,2017-12-15 00:33:04,2017-12-15 00:40:02,N,1,,,,,1,2.10,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000370,1,2017-12-15 00:03:29,2017-12-15 00:08:56,N,1,,,,,1,1.00,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000371,1,2017-12-15 00:10:46,2017-12-15 00:20:15,N,1,,,,,1,1.20,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000372,1,2017-12-15 00:22:56,2017-12-15 00:37:19,N,1,,,,,1,4.00,15,0.5,0.5,0,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000373,1,2017-12-15 00:42:52,2017-12-15 00:51:19,N,1,,,,,1,5.00,15.5,0.5,0.5,3.35,0,,0.3,20.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000374,1,2017-12-15 00:53:47,2017-12-15 01:23:10,N,1,,,,,1,4.70,21,0.5,0.5,0,0,,0.3,22.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000375,2,2017-12-15 00:36:27,2017-12-15 01:12:34,N,1,,,,,1,10.62,35.5,0.5,0.5,4.2,0,,0.3,41,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000376,1,2017-12-15 00:28:33,2017-12-15 00:45:42,N,1,,,,,1,3.10,14,0.5,0.5,3.8,0,,0.3,19.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000377,2,2017-12-15 00:15:28,2017-12-15 00:50:41,N,1,,,,,2,6.56,28,0.5,0.5,0,0,,0.3,29.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000378,2,2017-12-15 00:56:38,2017-12-15 01:09:17,N,1,,,,,2,6.86,20.5,0.5,0.5,1.1,0,,0.3,22.9,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000379,1,2017-12-15 00:13:36,2017-12-15 01:14:17,N,1,,,,,1,13.00,47.5,0.5,0.5,9.75,0,,0.3,58.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000380,1,2017-12-15 00:03:32,2017-12-15 00:28:38,N,1,,,,,1,4.60,19.5,0.5,0.5,1,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000381,1,2017-12-15 00:46:55,2017-12-15 01:10:49,N,1,,,,,1,5.00,20.5,0.5,0.5,0,5.76,,0.3,27.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000382,1,2017-12-15 00:09:42,2017-12-15 00:20:45,N,1,,,,,1,2.50,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000383,1,2017-12-15 00:23:14,2017-12-15 00:27:02,N,1,,,,,1,0.60,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000384,1,2017-12-15 00:31:40,2017-12-15 00:36:49,N,1,,,,,1,1.00,6,0.5,0.5,2,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000385,1,2017-12-15 00:52:15,2017-12-15 01:13:40,N,1,,,,,1,4.60,17.5,0.5,0.5,3.75,0,,0.3,22.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000386,2,2017-12-15 00:13:40,2017-12-15 00:33:20,N,1,,,,,1,1.93,13.5,0.5,0.5,1.48,0,,0.3,16.28,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000387,2,2017-12-15 00:52:47,2017-12-15 01:19:23,N,1,,,,,1,5.91,23.5,0.5,0.5,7.44,0,,0.3,32.24,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000388,1,2017-12-15 00:38:36,2017-12-15 00:56:37,N,1,,,,,1,5.90,20,0.5,0.5,4.25,0,,0.3,25.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000389,2,2017-12-15 00:15:53,2017-12-15 00:48:54,N,1,,,,,1,3.61,22,0.5,0.5,1,0,,0.3,24.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000390,2,2017-12-15 00:20:20,2017-12-15 00:40:11,N,1,,,,,1,2.02,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000391,2,2017-12-15 00:41:22,2017-12-15 00:57:21,N,1,,,,,1,3.29,14,0.5,0.5,3.06,0,,0.3,18.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000392,2,2017-12-15 00:59:16,2017-12-15 01:09:53,N,1,,,,,1,1.08,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000393,1,2017-12-15 00:44:17,2017-12-15 01:13:28,N,2,,,,,1,17.20,52,0,0.5,10.56,0,,0.3,63.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000394,2,2017-12-15 00:06:55,2017-12-15 00:17:28,N,1,,,,,2,1.58,9,0.5,0.5,2.06,0,,0.3,12.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000395,2,2017-12-15 00:22:35,2017-12-15 00:34:52,N,1,,,,,2,1.84,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000396,2,2017-12-15 00:44:25,2017-12-15 00:54:41,N,1,,,,,2,0.92,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000397,2,2017-12-15 00:10:17,2017-12-15 00:44:44,N,1,,,,,2,4.78,23,0.5,0.5,4.86,0,,0.3,29.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000398,1,2017-12-15 00:07:26,2017-12-15 00:11:49,N,1,,,,,1,0.80,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000399,1,2017-12-15 00:16:45,2017-12-15 00:46:00,N,1,,,,,1,5.10,21.5,0.5,0.5,4.55,0,,0.3,27.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000400,1,2017-12-15 00:56:23,2017-12-15 00:59:44,N,1,,,,,1,0.80,4.5,0.5,0.5,1.45,0,,0.3,7.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000401,1,2017-12-15 00:02:45,2017-12-15 00:17:46,N,1,,,,,1,2.60,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000402,1,2017-12-15 00:33:58,2017-12-15 01:10:08,N,1,,,,,1,13.80,42.5,0.5,0.5,12,0,,0.3,55.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000403,1,2017-12-15 00:08:20,2017-12-15 00:19:54,N,1,,,,,1,2.20,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000404,1,2017-12-15 00:22:02,2017-12-15 00:24:32,N,1,,,,,1,0.40,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000405,1,2017-12-15 00:49:45,2017-12-15 01:07:55,N,1,,,,,1,4.30,16.5,0.5,0.5,3.55,0,,0.3,21.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000406,1,2017-12-15 00:19:42,2017-12-15 00:35:10,N,1,,,,,1,7.20,21.5,0.5,0.5,4,0,,0.3,26.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000407,1,2017-12-15 00:36:17,2017-12-15 00:39:28,N,1,,,,,1,0.60,4.5,0.5,0.5,1.2,0,,0.3,7,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000408,1,2017-12-15 00:44:47,2017-12-15 00:47:53,N,1,,,,,1,0.50,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000409,1,2017-12-15 00:06:26,2017-12-15 01:03:38,N,1,,,,,1,12.30,48,0.5,0.5,13.75,5.76,,0.3,68.81,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000410,1,2017-12-15 00:03:14,2017-12-15 00:10:39,N,1,,,,,1,1.30,7,0.5,0.5,1.5,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000411,1,2017-12-15 00:12:39,2017-12-15 00:37:19,N,1,,,,,1,4.20,18,0.5,0.5,2.5,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000412,1,2017-12-15 00:42:49,2017-12-15 00:46:16,N,1,,,,,1,0.80,5,0.5,0.5,1.55,0,,0.3,7.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000413,2,2017-12-14 19:10:23,2017-12-14 19:18:28,N,1,,,,,1,1.02,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000414,2,2017-12-14 19:23:38,2017-12-15 19:06:32,N,5,,,,,3,4.24,60,0,0.5,0,10.5,,0.3,71.3,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000415,2,2017-12-15 00:55:49,2017-12-15 01:11:02,N,1,,,,,2,7.83,23.5,0.5,0.5,0,0,,0.3,24.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000416,2,2017-12-15 00:05:09,2017-12-15 00:20:48,N,1,,,,,2,1.36,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000417,2,2017-12-15 00:29:27,2017-12-15 00:52:13,N,1,,,,,2,3.98,17.5,0.5,0.5,3.76,0,,0.3,22.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000418,2,2017-12-15 00:02:35,2017-12-15 00:21:33,N,1,,,,,1,2.34,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000419,2,2017-12-15 00:26:40,2017-12-15 01:10:02,N,1,,,,,1,9.00,34.5,0.5,0.5,2.2,0,,0.3,38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000420,2,2017-12-15 00:41:13,2017-12-15 00:47:16,N,1,,,,,5,0.95,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000421,2,2017-12-15 00:48:07,2017-12-15 00:51:55,N,1,,,,,5,0.82,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000422,2,2017-12-15 00:54:41,2017-12-15 01:36:33,N,1,,,,,5,8.13,31.5,0.5,0.5,6.56,0,,0.3,39.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000423,1,2017-12-15 00:10:16,2017-12-15 00:30:12,N,1,,,,,3,4.20,17,0.5,0.5,0,0,,0.3,18.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000424,1,2017-12-15 00:09:09,2017-12-15 00:36:56,N,1,,,,,1,4.20,20,0.5,0.5,0.01,0,,0.3,21.31,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000425,1,2017-12-15 00:54:57,2017-12-15 01:17:59,N,1,,,,,1,5.40,19.5,0.5,0.5,4.15,0,,0.3,24.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000426,2,2017-12-15 00:13:43,2017-12-15 00:42:45,N,1,,,,,1,6.63,25.5,0.5,0.5,9.77,5.76,,0.3,42.33,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000427,2,2017-12-15 00:09:34,2017-12-15 00:21:47,N,1,,,,,1,1.91,10,0.5,0.5,0,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000428,2,2017-12-15 00:26:37,2017-12-15 00:54:33,N,1,,,,,1,6.54,25,0.5,0.5,0,5.76,,0.3,32.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000429,1,2017-12-15 00:09:48,2017-12-15 00:19:19,N,1,,,,,1,2.60,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000430,1,2017-12-15 00:32:26,2017-12-15 00:36:48,N,1,,,,,1,0.50,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000431,1,2017-12-15 00:42:32,2017-12-15 01:03:48,N,1,,,,,1,3.90,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000432,2,2017-12-14 23:59:37,2017-12-15 00:04:06,N,1,,,,,1,0.61,5,0.5,0.5,1,0,,0.3,7.3,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000433,2,2017-12-15 00:06:30,2017-12-15 00:32:11,N,1,,,,,1,4.48,18.5,0.5,0.5,2,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000434,2,2017-12-15 00:40:44,2017-12-15 01:00:28,N,1,,,,,1,4.72,16.5,0.5,0.5,4.71,5.76,,0.3,28.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000435,2,2017-12-15 00:10:53,2017-12-15 00:17:52,N,1,,,,,1,1.12,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000436,2,2017-12-15 00:35:36,2017-12-15 00:59:12,N,1,,,,,1,10.99,32.5,0.5,0.5,6.76,0,,0.3,40.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000437,2,2017-12-15 00:43:04,2017-12-15 01:16:19,N,2,,,,,1,17.90,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000438,1,2017-12-15 00:02:42,2017-12-15 00:10:12,N,1,,,,,1,1.30,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000439,1,2017-12-15 00:11:22,2017-12-15 00:40:14,N,1,,,,,1,7.30,25.5,0.5,0.5,6.7,0,,0.3,33.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000440,1,2017-12-15 00:20:08,2017-12-15 00:40:28,N,1,,,,,1,2.40,14,0.5,0.5,3.05,0,,0.3,18.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000441,1,2017-12-15 00:43:20,2017-12-15 01:08:32,N,1,,,,,2,1.30,15.5,0.5,0.5,5,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000442,1,2017-12-15 00:11:32,2017-12-15 00:47:05,N,1,,,,,1,5.10,26.5,0.5,0.5,8.3,0,,0.3,36.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000443,2,2017-12-15 00:09:29,2017-12-15 00:29:03,N,1,,,,,1,5.70,20.5,0.5,0.5,4.36,0,,0.3,26.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000444,2,2017-12-15 00:30:13,2017-12-15 00:37:34,N,1,,,,,1,1.67,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000445,2,2017-12-14 23:58:56,2017-12-15 00:30:50,N,2,,,,,1,20.04,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000446,2,2017-12-15 00:36:57,2017-12-15 00:45:41,N,1,,,,,1,1.46,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000447,1,2017-12-15 00:04:54,2017-12-15 00:37:27,N,1,,,,,1,4.60,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000448,2,2017-12-15 00:48:43,2017-12-15 00:53:39,N,1,,,,,1,1.30,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000449,2,2017-12-15 00:57:01,2017-12-15 01:01:12,N,1,,,,,1,0.68,5,0.5,0.5,0.63,0,,0.3,6.93,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000450,1,2017-12-15 00:05:18,2017-12-15 00:49:34,N,1,,,,,1,15.40,51,0.5,0.5,5,0,,0.3,57.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000451,2,2017-12-15 00:45:45,2017-12-15 01:07:45,N,1,,,,,1,9.52,28,0.5,0.5,0,5.76,,0.3,35.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000452,2,2017-12-15 00:08:35,2017-12-15 00:56:01,N,1,,,,,5,7.01,31,0.5,0.5,9.69,0,,0.3,41.99,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000453,1,2017-12-15 00:10:24,2017-12-15 00:31:26,N,1,,,,,1,3.20,16,0.5,0.5,0,0,,0.3,17.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000454,2,2017-12-15 00:04:37,2017-12-15 00:53:48,N,1,,,,,1,11.07,40.5,0.5,0.5,5,5.76,,0.3,52.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000455,1,2017-12-15 00:05:10,2017-12-15 00:20:23,N,1,,,,,1,3.10,13,0.5,0.5,1.43,0,,0.3,15.73,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000456,1,2017-12-15 00:21:02,2017-12-15 00:36:30,N,1,,,,,2,2.30,11.5,0.5,0.5,1.92,0,,0.3,14.72,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000457,1,2017-12-15 00:38:38,2017-12-15 01:10:20,N,1,,,,,1,4.90,22.5,0.5,0.5,5.95,0,,0.3,29.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000458,1,2017-12-15 00:03:57,2017-12-15 00:07:19,N,1,,,,,2,1.10,5.5,0.5,0.5,1,0,,0.3,7.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000459,1,2017-12-15 00:26:00,2017-12-15 00:34:10,N,1,,,,,1,1.30,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000460,1,2017-12-15 00:36:16,2017-12-15 00:49:08,N,1,,,,,1,2.60,11,0.5,0.5,2.45,0,,0.3,14.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000461,1,2017-12-15 00:53:25,2017-12-15 01:00:12,N,1,,,,,1,1.90,8,0.5,0.5,2.3,0,,0.3,11.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000462,2,2017-12-15 00:15:12,2017-12-15 00:26:54,N,1,,,,,1,1.17,8.5,0.5,0.5,2.94,0,,0.3,12.74,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000463,2,2017-12-15 00:47:47,2017-12-15 00:59:33,N,1,,,,,1,1.40,9,0.5,0.5,2.06,0,,0.3,12.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000464,2,2017-12-15 00:08:58,2017-12-15 00:43:22,N,1,,,,,5,6.10,25.5,0.5,0.5,8.04,0,,0.3,34.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000465,2,2017-12-15 00:48:26,2017-12-15 01:01:21,N,1,,,,,5,2.41,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000466,1,2017-12-15 00:42:54,2017-12-15 00:46:52,N,1,,,,,1,0.70,4.5,0.5,0.5,1.15,0,,0.3,6.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000467,1,2017-12-15 00:01:07,2017-12-15 00:05:50,N,1,,,,,1,0.90,5,0.5,0.5,0,0,,0.3,6.3,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000468,1,2017-12-15 00:09:05,2017-12-15 00:26:40,N,1,,,,,1,3.60,16,0.5,0.5,1,0,,0.3,18.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000469,1,2017-12-15 00:59:32,2017-12-15 01:16:09,N,1,,,,,1,7.70,23,0.5,0.5,7.5,5.76,,0.3,37.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000470,1,2017-12-15 00:15:12,2017-12-15 00:36:25,N,1,,,,,1,1.40,13.5,0.5,0.5,2.95,0,,0.3,17.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000471,1,2017-12-15 00:39:37,2017-12-15 01:19:53,N,1,,,,,1,8.00,31,0.5,0.5,8.05,0,,0.3,40.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000472,1,2017-12-15 00:04:23,2017-12-15 00:37:18,N,1,,,,,1,6.20,25,0.5,0.5,5.25,0,,0.3,31.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000473,1,2017-12-15 00:53:13,2017-12-15 00:59:08,N,1,,,,,1,1.20,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000474,1,2017-12-15 00:14:58,2017-12-15 00:43:09,N,1,,,,,1,4.20,19.5,0.5,0.5,4.16,0,,0.3,24.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000475,1,2017-12-15 00:54:26,2017-12-15 01:01:22,N,1,,,,,1,1.10,6.5,0.5,0.5,1,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000476,2,2017-12-15 00:03:41,2017-12-15 00:33:29,N,1,,,,,1,6.35,25,0.5,0.5,5.26,0,,0.3,31.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000477,2,2017-12-15 00:55:24,2017-12-15 01:29:13,N,1,,,,,1,8.48,30,0.5,0.5,5,0,,0.3,36.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000478,2,2017-12-15 00:13:25,2017-12-15 00:43:14,N,2,,,,,2,17.12,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000479,1,2017-12-15 00:10:35,2017-12-15 00:21:11,N,1,,,,,2,1.10,8.5,0.5,0.5,1.95,0,,0.3,11.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000480,1,2017-12-15 00:22:05,2017-12-15 00:25:54,N,1,,,,,1,0.60,4.5,0.5,0.5,1.15,0,,0.3,6.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000481,1,2017-12-15 00:27:36,2017-12-15 00:40:36,N,1,,,,,1,1.00,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000482,1,2017-12-15 00:41:44,2017-12-15 00:47:23,N,1,,,,,1,1.20,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000483,1,2017-12-15 00:49:04,2017-12-15 01:03:47,N,1,,,,,1,2.10,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000484,1,2017-12-15 00:05:27,2017-12-15 00:29:12,N,1,,,,,0,10.50,31.5,0.5,0.5,9,5.76,,0.3,47.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000485,1,2017-12-15 00:35:54,2017-12-15 00:40:31,N,1,,,,,2,0.70,5.5,0.5,0.5,1.35,0,,0.3,8.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000486,1,2017-12-15 00:43:11,2017-12-15 00:53:22,N,1,,,,,2,1.00,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000487,2,2017-12-15 00:31:19,2017-12-15 00:49:31,N,1,,,,,1,6.31,20.5,0.5,0.5,4.36,0,,0.3,26.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000488,2,2017-12-15 00:19:14,2017-12-15 00:29:43,N,1,,,,,1,1.05,8,0.5,0.5,1.5,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000489,2,2017-12-15 00:32:35,2017-12-15 00:44:57,N,1,,,,,1,2.42,10.5,0.5,0.5,1,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000490,2,2017-12-15 00:48:40,2017-12-15 00:57:57,N,1,,,,,1,1.22,8,0.5,0.5,2.32,0,,0.3,11.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000491,2,2017-12-15 00:38:41,2017-12-15 01:21:39,N,1,,,,,2,17.74,52,0.5,0.5,11.81,5.76,,0.3,70.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000492,2,2017-12-15 00:08:26,2017-12-15 00:19:44,N,1,,,,,1,1.66,8.5,0.5,0.5,2.5,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000493,2,2017-12-15 00:22:53,2017-12-15 00:36:57,N,1,,,,,1,1.74,10,0.5,0.5,1,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000494,2,2017-12-15 01:00:01,2017-12-15 01:05:22,N,1,,,,,1,0.96,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000495,2,2017-12-15 00:02:53,2017-12-15 00:09:38,N,1,,,,,1,0.65,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000496,2,2017-12-15 00:11:31,2017-12-15 00:12:25,N,5,,,,,1,0.00,15,0,0.5,40,0,,0.3,55.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000497,2,2017-12-15 00:43:55,2017-12-15 00:56:49,N,1,,,,,1,1.94,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000498,1,2017-12-15 00:25:49,2017-12-15 00:35:05,N,1,,,,,1,1.10,7.5,0.5,0.5,1.75,0,,0.3,10.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000499,1,2017-12-15 00:52:06,2017-12-15 00:57:28,N,1,,,,,2,0.60,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000500,1,2017-12-15 00:34:18,2017-12-15 01:21:09,N,1,,,,,1,6.60,30,0.5,0.5,9.35,0,,0.3,40.65,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000501,2,2017-12-15 00:07:37,2017-12-15 00:59:00,N,1,,,,,1,10.17,42,0.5,0.5,0,0,,0.3,43.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000502,1,2017-12-15 00:07:42,2017-12-15 00:30:07,N,1,,,,,2,2.90,16,0.5,0.5,0,0,,0.3,17.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000503,1,2017-12-15 00:30:45,2017-12-15 00:38:40,N,1,,,,,1,1.30,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000504,1,2017-12-15 00:42:23,2017-12-15 00:55:29,N,1,,,,,2,1.80,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000505,1,2017-12-15 00:57:04,2017-12-15 00:57:04,N,1,,,,,1,0.00,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000506,1,2017-12-15 00:59:48,2017-12-15 01:21:26,N,1,,,,,1,4.50,17.5,0.5,0.5,3.75,0,,0.3,22.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000507,2,2017-12-15 00:20:44,2017-12-15 00:30:11,N,1,,,,,2,0.78,7.5,0.5,0.5,0,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000508,2,2017-12-15 00:37:52,2017-12-15 00:52:16,N,1,,,,,2,1.16,10.5,0.5,0.5,1.5,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000509,2,2017-12-15 00:58:26,2017-12-15 01:04:02,N,1,,,,,2,1.00,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000510,1,2017-12-15 00:05:09,2017-12-15 00:25:20,N,1,,,,,1,2.60,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000511,1,2017-12-15 00:29:09,2017-12-15 00:36:48,N,1,,,,,1,1.40,7.5,0.5,0.5,2.2,0,,0.3,11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000512,1,2017-12-15 00:38:25,2017-12-15 00:51:42,N,1,,,,,1,1.50,10,0.5,0.5,3.35,0,,0.3,14.65,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000513,1,2017-12-15 00:23:44,2017-12-15 00:49:22,N,1,,,,,1,4.40,20.5,0.5,0.5,4.35,0,,0.3,26.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000514,1,2017-12-15 00:09:29,2017-12-15 00:14:30,N,1,,,,,1,0.80,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000515,1,2017-12-15 00:16:07,2017-12-15 00:35:34,N,1,,,,,1,4.60,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000516,1,2017-12-15 00:58:07,2017-12-15 01:05:15,N,1,,,,,2,1.40,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000517,2,2017-12-15 00:25:15,2017-12-15 00:34:38,N,1,,,,,1,1.21,8,0.5,0.5,2.79,0,,0.3,12.09,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000518,2,2017-12-15 00:40:54,2017-12-15 00:52:56,N,1,,,,,1,0.86,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000519,2,2017-12-15 00:54:22,2017-12-15 01:00:26,N,1,,,,,1,0.89,5.5,0.5,0.5,1,0,,0.3,7.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000520,1,2017-12-15 00:07:03,2017-12-15 00:17:51,N,1,,,,,1,1.00,8,0.5,0.5,1,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000521,1,2017-12-15 00:27:43,2017-12-15 01:02:24,N,1,,,,,1,6.80,27,0.5,0.5,0,0,,0.3,28.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000522,2,2017-12-15 00:14:12,2017-12-15 00:57:30,N,4,,,,,1,33.89,158,0.5,0.5,10,0,,0.3,169.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000523,2,2017-12-15 00:01:06,2017-12-15 00:16:46,N,1,,,,,1,3.00,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000524,2,2017-12-15 00:30:25,2017-12-15 00:41:04,N,1,,,,,1,3.33,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000525,2,2017-12-15 00:42:14,2017-12-15 00:52:16,N,1,,,,,1,4.32,14,0.5,0.5,3.06,0,,0.3,18.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000526,1,2017-12-15 00:44:17,2017-12-15 00:53:15,N,1,,,,,1,2.10,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000527,1,2017-12-15 00:58:36,2017-12-15 01:17:56,N,1,,,,,2,5.00,17.5,0.5,0.5,3.75,0,,0.3,22.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000528,2,2017-12-15 00:14:35,2017-12-15 00:18:59,N,1,,,,,1,0.79,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000529,2,2017-12-15 00:26:09,2017-12-15 01:04:29,N,1,,,,,1,11.54,37.5,0.5,0.5,7.76,0,,0.3,48.51,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000530,2,2017-12-15 00:14:54,2017-12-15 00:31:45,N,1,,,,,1,2.37,12.5,0.5,0.5,2.07,0,,0.3,15.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000531,2,2017-12-15 00:33:52,2017-12-15 00:40:05,N,1,,,,,2,1.43,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000532,2,2017-12-15 00:49:02,2017-12-15 00:52:27,N,1,,,,,2,1.01,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000533,2,2017-12-15 00:21:05,2017-12-15 00:32:58,N,1,,,,,2,1.94,10,0.5,0.5,2.82,0,,0.3,14.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000534,2,2017-12-15 00:36:08,2017-12-15 00:44:27,N,1,,,,,2,1.33,7.5,0.5,0.5,1.2,0,,0.3,10,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000535,2,2017-12-15 00:49:05,2017-12-15 01:22:21,N,1,,,,,2,6.61,25.5,0.5,0.5,5.36,0,,0.3,32.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000536,2,2017-12-15 00:43:23,2017-12-15 01:16:38,N,1,,,,,1,7.28,27,0.5,0.5,5,0,,0.3,33.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000537,1,2017-12-15 00:08:26,2017-12-15 00:11:22,N,1,,,,,2,0.40,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000538,1,2017-12-15 00:13:29,2017-12-15 00:46:41,N,1,,,,,1,6.30,25,0.5,0.5,5.25,0,,0.3,31.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000539,2,2017-12-15 00:11:39,2017-12-15 00:11:42,N,5,,,,,1,0.00,125,0,0,40,0,,0.3,165.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000540,2,2017-12-15 00:56:03,2017-12-15 01:00:44,N,1,,,,,1,0.97,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000541,1,2017-12-15 00:42:52,2017-12-15 01:02:34,N,1,,,,,1,13.00,36,0.5,0.5,6,0,,0.3,43.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000542,1,2017-12-15 00:03:04,2017-12-15 00:52:32,N,1,,,,,1,6.60,32,0.5,0.5,6.65,0,,0.3,39.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000543,1,2017-12-15 00:13:41,2017-12-15 00:28:38,N,1,,,,,1,4.20,14.5,0.5,0.5,3.5,0,,0.3,19.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000544,1,2017-12-15 00:33:27,2017-12-15 00:40:04,N,1,,,,,1,1.70,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000545,1,2017-12-15 00:47:29,2017-12-15 00:58:09,N,1,,,,,1,2.20,10,0.5,0.5,2.8,0,,0.3,14.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000546,1,2017-12-15 00:05:19,2017-12-15 00:38:17,N,1,,,,,1,8.20,30,0.5,0.5,6.25,0,,0.3,37.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000547,2,2017-12-15 00:53:06,2017-12-15 01:14:50,N,1,,,,,4,12.29,35,0.5,0.5,0,0,,0.3,36.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000548,1,2017-12-15 00:02:50,2017-12-15 00:07:18,N,1,,,,,1,0.60,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000549,1,2017-12-15 00:12:02,2017-12-15 00:14:27,N,1,,,,,1,0.40,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000550,1,2017-12-15 00:15:16,2017-12-15 00:21:59,N,1,,,,,1,0.90,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000551,1,2017-12-15 00:24:38,2017-12-15 00:46:00,N,1,,,,,1,4.00,17,0.5,0.5,1,0,,0.3,19.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000552,1,2017-12-15 00:53:46,2017-12-15 00:59:26,N,1,,,,,1,1.20,6.5,0.5,0.5,2,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000553,1,2017-12-15 00:00:57,2017-12-15 00:15:13,N,1,,,,,2,2.60,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000554,1,2017-12-15 00:19:11,2017-12-15 01:02:12,N,1,,,,,2,6.70,30,0.5,0.5,7.8,0,,0.3,39.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000555,2,2017-12-15 00:35:27,2017-12-15 00:54:46,N,1,,,,,1,2.59,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000556,2,2017-12-15 00:06:54,2017-12-15 00:37:31,N,1,,,,,1,9.21,31,0.5,0.5,8.08,0,,0.3,40.38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000557,2,2017-12-15 00:38:32,2017-12-15 00:53:06,N,1,,,,,1,4.34,14.5,0.5,0.5,3.95,0,,0.3,19.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000558,2,2017-12-15 00:56:49,2017-12-15 01:03:38,N,1,,,,,1,1.81,7.5,0.5,0.5,0,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000559,2,2017-12-15 00:03:50,2017-12-15 00:13:29,N,1,,,,,1,2.34,9.5,0.5,0.5,2.16,0,,0.3,12.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000560,2,2017-12-15 00:28:29,2017-12-15 00:35:00,N,1,,,,,1,1.32,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000561,2,2017-12-15 00:50:11,2017-12-15 01:05:29,N,1,,,,,1,3.22,13.5,0.5,0.5,4.44,0,,0.3,19.24,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000562,1,2017-12-15 00:02:47,2017-12-15 00:41:54,N,1,,,,,1,7.80,31.5,0.5,0.5,6.55,0,,0.3,39.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000563,2,2017-12-15 00:05:55,2017-12-15 00:55:30,N,1,,,,,1,14.85,47,0.5,0.5,0,0,,0.3,48.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000564,2,2017-12-15 00:57:55,2017-12-15 00:58:01,N,5,,,,,1,0.00,10,0,0.5,0,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000565,2,2017-12-15 00:13:16,2017-12-15 00:22:13,N,1,,,,,2,1.57,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000566,2,2017-12-15 00:22:47,2017-12-15 00:24:31,N,1,,,,,1,0.43,3.5,0.5,0.5,0.72,0,,0.3,5.52,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000567,2,2017-12-15 00:32:29,2017-12-15 00:35:46,N,1,,,,,2,0.72,4.5,0.5,0.5,1.45,0,,0.3,7.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000568,2,2017-12-15 00:36:36,2017-12-15 00:52:02,N,1,,,,,2,2.10,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000569,2,2017-12-15 00:54:25,2017-12-15 00:58:58,N,1,,,,,2,0.00,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000570,1,2017-12-15 00:10:19,2017-12-15 00:42:43,N,1,,,,,1,5.40,22.5,0.5,0.5,0,0,,0.3,23.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000571,2,2017-12-15 00:22:17,2017-12-15 00:54:37,N,1,,,,,1,5.00,24,0.5,0.5,4,0,,0.3,29.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000572,2,2017-12-14 23:58:52,2017-12-15 00:49:35,N,1,,,,,1,11.20,41,0.5,0.5,7.21,5.76,,0.3,55.27,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000573,1,2017-12-15 00:36:07,2017-12-15 01:03:18,Y,1,,,,,1,5.00,20,0.5,0.5,4.25,0,,0.3,25.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000574,1,2017-12-15 00:15:45,2017-12-15 00:50:45,N,1,,,,,1,9.20,32,0.5,0.5,0,5.76,,0.3,39.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000575,1,2017-12-15 00:22:12,2017-12-15 00:38:30,N,1,,,,,1,2.20,12.5,0.5,0.5,0,0,,0.3,13.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000576,1,2017-12-15 00:41:46,2017-12-15 01:01:28,Y,1,,,,,1,3.70,15.5,0.5,0.5,3.35,0,,0.3,20.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000577,1,2017-12-15 00:25:24,2017-12-15 01:02:48,N,1,,,,,1,6.00,27.5,0.5,0.5,5.75,0,,0.3,34.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000578,2,2017-12-15 00:07:55,2017-12-15 00:12:34,N,1,,,,,1,0.96,5.5,0.5,0.5,0,0,,0.3,6.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000579,2,2017-12-15 00:13:36,2017-12-15 00:45:06,N,1,,,,,1,4.72,22,0.5,0.5,0,0,,0.3,23.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000580,2,2017-12-15 00:56:28,2017-12-15 01:00:43,N,1,,,,,1,0.79,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000581,2,2017-12-15 00:08:58,2017-12-15 00:16:04,N,1,,,,,1,0.73,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000582,2,2017-12-15 00:17:18,2017-12-15 00:50:54,N,1,,,,,1,3.66,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000583,2,2017-12-15 00:49:08,2017-12-15 01:09:20,N,1,,,,,1,4.16,17,0.5,0.5,3.66,0,,0.3,21.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000584,2,2017-12-15 00:50:52,2017-12-15 01:03:54,N,1,,,,,1,2.09,10,0.5,0.5,2.82,0,,0.3,14.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000585,2,2017-12-15 00:31:15,2017-12-15 00:35:58,N,1,,,,,1,1.11,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000586,2,2017-12-15 00:51:58,2017-12-15 01:15:57,N,1,,,,,1,2.79,16,0.5,0.5,0,5.76,,0.3,23.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000587,2,2017-12-15 00:03:58,2017-12-15 00:14:13,N,1,,,,,1,1.68,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000588,2,2017-12-15 00:16:35,2017-12-15 01:25:44,N,1,,,,,1,15.30,59,0.5,0.5,12.06,0,,0.3,72.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000589,2,2017-12-15 00:06:16,2017-12-15 00:20:06,N,1,,,,,1,2.16,11,0.5,0.5,0,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000590,2,2017-12-15 00:23:57,2017-12-15 00:34:07,N,1,,,,,1,1.73,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000591,2,2017-12-15 00:35:00,2017-12-15 00:41:32,N,1,,,,,1,1.44,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000592,2,2017-12-15 00:42:57,2017-12-15 01:17:17,N,1,,,,,1,8.22,28.5,0.5,0.5,7.45,0,,0.3,37.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000593,2,2017-12-15 00:11:18,2017-12-15 00:41:56,N,1,,,,,1,4.03,20.5,0.5,0.5,3.5,0,,0.3,25.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000594,2,2017-12-15 00:50:15,2017-12-15 01:05:16,N,1,,,,,1,2.87,12.5,0.5,0.5,0,0,,0.3,13.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000595,2,2017-12-15 00:44:35,2017-12-15 01:36:39,N,1,,,,,1,13.58,47,0.5,0.5,9,0,,0.3,57.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000596,2,2017-12-15 00:10:26,2017-12-15 01:14:11,N,2,,,,,5,22.01,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000597,1,2017-12-15 00:20:22,2017-12-15 00:59:40,N,2,,,,,2,18.00,52,0,0.5,0,0,,0.3,52.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000598,1,2017-12-15 00:11:10,2017-12-15 00:22:38,N,1,,,,,2,2.00,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000599,1,2017-12-15 00:25:37,2017-12-15 00:41:15,N,1,,,,,3,2.60,12.5,0.5,0.5,0,0,,0.3,13.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000600,2,2017-12-15 00:09:00,2017-12-15 00:36:33,N,1,,,,,2,2.91,18.5,0.5,0.5,0,0,,0.3,19.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000601,2,2017-12-15 00:53:19,2017-12-15 01:19:39,N,1,,,,,2,7.12,24,0.5,0.5,6.32,0,,0.3,31.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000602,2,2017-12-15 00:29:14,2017-12-15 00:38:05,N,1,,,,,1,1.50,8,0.5,0.5,2.32,0,,0.3,11.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000603,2,2017-12-15 00:39:24,2017-12-15 00:57:43,N,1,,,,,1,3.18,14,0.5,0.5,1,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000604,2,2017-12-15 00:22:31,2017-12-15 00:29:31,N,1,,,,,3,1.44,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000605,2,2017-12-15 00:48:10,2017-12-15 01:20:50,N,1,,,,,3,6.85,25.5,0.5,0.5,0,0,,0.3,26.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000606,1,2017-12-15 00:17:11,2017-12-15 00:24:41,N,1,,,,,1,1.30,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000607,1,2017-12-15 00:26:31,2017-12-15 00:36:40,N,1,,,,,1,1.40,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000608,1,2017-12-15 00:43:07,2017-12-15 01:24:10,N,1,,,,,1,10.60,37.5,0.5,0.5,2,0,,0.3,40.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000609,2,2017-12-15 00:22:39,2017-12-15 00:43:55,N,2,,,,,1,8.03,52,0,0.5,2,5.76,,0.3,60.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000610,1,2017-12-15 00:02:34,2017-12-15 00:11:48,N,1,,,,,1,4.90,15.5,0.5,0.5,3,0,,0.3,19.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000611,1,2017-12-15 00:29:52,2017-12-15 00:32:26,N,1,,,,,1,0.30,3.5,0.5,0.5,1.4,0,,0.3,6.2,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000612,1,2017-12-15 00:37:00,2017-12-15 00:40:47,N,1,,,,,1,0.70,5,0.5,0.5,1,0,,0.3,7.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000613,1,2017-12-15 00:23:17,2017-12-15 00:56:40,N,1,,,,,1,3.90,21.5,0.5,0.5,4.55,0,,0.3,27.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000614,2,2017-12-15 00:45:58,2017-12-15 01:16:25,N,1,,,,,1,15.59,44.5,0.5,0.5,0,5.76,,0.3,51.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000615,2,2017-12-15 00:10:34,2017-12-15 00:11:56,N,1,,,,,1,0.08,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000616,2,2017-12-15 00:17:20,2017-12-15 00:36:29,N,1,,,,,1,2.99,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000617,2,2017-12-15 00:10:12,2017-12-15 00:30:28,N,1,,,,,1,2.17,14,0.5,0.5,3.06,0,,0.3,18.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000618,2,2017-12-15 00:35:21,2017-12-15 00:55:02,N,1,,,,,1,6.77,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000619,1,2017-12-15 00:46:25,2017-12-15 01:09:39,N,1,,,,,1,5.70,20.5,0.5,0.5,0,0,,0.3,21.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000620,1,2017-12-15 00:04:11,2017-12-15 00:18:22,N,1,,,,,1,3.10,12,0.5,0.5,2,0,,0.3,15.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000621,1,2017-12-15 00:31:24,2017-12-15 00:42:28,N,1,,,,,1,0.60,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000622,1,2017-12-15 00:44:08,2017-12-15 00:54:43,N,1,,,,,1,1.60,9,0.5,0.5,3.05,0,,0.3,13.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000623,2,2017-12-15 00:19:42,2017-12-15 00:47:45,N,1,,,,,1,13.76,39,0.5,0.5,0,5.76,,0.3,46.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000624,1,2017-12-15 00:52:27,2017-12-15 01:08:45,N,1,,,,,1,7.70,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000625,1,2017-12-15 00:19:22,2017-12-15 00:34:50,N,1,,,,,1,3.10,14,0.5,0.5,2,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000626,1,2017-12-15 00:44:21,2017-12-15 01:23:25,N,1,,,,,1,10.70,35,0.5,0.5,7,0,,0.3,43.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000627,1,2017-12-15 00:50:13,2017-12-15 01:15:53,N,1,,,,,1,16.10,44,0.5,0.5,11.3,0,,0.3,56.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000628,2,2017-12-14 23:58:56,2017-12-15 00:03:51,N,1,,,,,3,1.18,6,0.5,0.5,1.02,0,,0.3,8.32,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000629,2,2017-12-15 00:05:10,2017-12-15 00:41:52,N,1,,,,,4,9.78,33.5,0.5,0.5,2,5.76,,0.3,42.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000630,1,2017-12-15 00:02:37,2017-12-15 00:09:45,N,1,,,,,1,1.30,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000631,1,2017-12-15 00:16:03,2017-12-15 00:22:28,N,1,,,,,1,0.50,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000632,1,2017-12-15 00:24:13,2017-12-15 00:37:26,N,1,,,,,2,1.90,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000633,1,2017-12-15 00:48:50,2017-12-15 00:52:32,N,1,,,,,1,0.50,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000634,2,2017-12-15 00:10:46,2017-12-15 00:39:54,N,1,,,,,2,4.28,21,0.5,0.5,4.46,0,,0.3,26.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000635,1,2017-12-15 00:03:20,2017-12-15 00:09:54,N,1,,,,,5,1.00,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000636,1,2017-12-15 00:11:44,2017-12-15 00:13:57,N,1,,,,,2,0.50,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000637,1,2017-12-15 00:14:52,2017-12-15 00:31:37,N,1,,,,,1,3.60,14.5,0.5,0.5,3.15,0,,0.3,18.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000638,1,2017-12-15 00:28:49,2017-12-15 00:35:36,N,1,,,,,1,1.30,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000639,1,2017-12-15 00:46:13,2017-12-15 01:11:12,N,1,,,,,1,4.90,19,0.5,0.5,0,0,,0.3,20.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000640,1,2017-12-15 00:52:19,2017-12-15 01:28:24,N,1,,,,,1,6.60,29,0.5,0.5,6.05,0,,0.3,36.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000641,1,2017-12-15 00:09:02,2017-12-15 00:15:32,N,1,,,,,2,1.30,7,0.5,0.5,0,0,,0.3,8.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000642,1,2017-12-15 00:32:12,2017-12-15 00:38:02,N,1,,,,,1,1.30,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000643,1,2017-12-15 00:57:40,2017-12-15 01:05:08,N,1,,,,,1,1.40,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000644,2,2017-12-15 00:07:59,2017-12-15 00:37:06,N,1,,,,,1,3.73,19,0.5,0.5,1,0,,0.3,21.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000645,2,2017-12-15 00:49:50,2017-12-15 00:56:39,N,1,,,,,1,1.95,8.5,0.5,0.5,0,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000646,2,2017-12-15 00:01:20,2017-12-15 00:25:50,N,1,,,,,5,5.33,20.5,0.5,0.5,4.36,0,,0.3,26.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000647,2,2017-12-15 00:28:14,2017-12-15 00:29:15,N,1,,,,,5,0.03,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000648,2,2017-12-15 00:39:06,2017-12-15 00:53:21,N,1,,,,,5,1.21,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000649,2,2017-12-15 00:54:22,2017-12-15 01:20:16,N,1,,,,,5,4.22,18.5,0.5,0.5,4.95,0,,0.3,24.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000650,2,2017-12-15 00:21:40,2017-12-15 00:42:49,N,1,,,,,2,3.51,15.5,0.5,0.5,0,0,,0.3,16.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000651,2,2017-12-15 01:00:21,2017-12-15 01:20:15,N,1,,,,,2,8.83,26.5,0.5,0.5,0,5.76,,0.3,33.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000652,1,2017-12-15 00:51:00,2017-12-15 01:03:07,N,1,,,,,1,2.10,10,0.5,0.5,2.3,0,,0.3,13.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000653,1,2017-12-15 00:00:05,2017-12-15 00:14:23,N,1,,,,,1,2.60,12,0.5,0.5,3.95,0,,0.3,17.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000654,1,2017-12-15 00:26:10,2017-12-15 00:30:41,N,1,,,,,1,0.70,5,0.5,0.5,0.95,0,,0.3,7.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000655,1,2017-12-15 00:32:06,2017-12-15 00:42:31,N,1,,,,,1,1.70,9,0.5,0.5,0,0,,0.3,10.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000656,1,2017-12-15 00:43:47,2017-12-15 01:04:09,N,1,,,,,1,3.90,15.5,0.5,0.5,0,0,,0.3,16.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000657,2,2017-12-15 00:00:11,2017-12-15 00:29:57,N,1,,,,,3,5.36,24,0.5,0.5,7.59,0,,0.3,32.89,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000658,2,2017-12-15 00:55:15,2017-12-15 01:12:55,N,1,,,,,4,2.32,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000659,2,2017-12-15 00:05:39,2017-12-15 00:32:38,N,1,,,,,1,3.69,19,0.5,0.5,6.09,0,,0.3,26.39,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000660,2,2017-12-15 00:36:55,2017-12-15 01:15:17,N,1,,,,,1,10.12,36,0.5,0.5,7.46,0,,0.3,44.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000661,2,2017-12-15 00:11:13,2017-12-15 00:26:46,N,1,,,,,3,4.16,15.5,0.5,0.5,3.36,0,,0.3,20.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000662,2,2017-12-15 00:29:43,2017-12-15 01:09:14,N,1,,,,,3,4.33,25.5,0.5,0.5,3,0,,0.3,29.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000663,2,2017-12-15 00:04:03,2017-12-15 00:18:22,N,1,,,,,2,1.95,11,0.5,0.5,0,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000664,2,2017-12-15 00:23:19,2017-12-15 00:25:03,N,1,,,,,2,0.55,3.5,0.5,0.5,1.2,0,,0.3,6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000665,2,2017-12-15 00:28:30,2017-12-15 00:48:26,N,1,,,,,2,3.86,16.5,0.5,0.5,2,0,,0.3,19.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000666,2,2017-12-15 00:54:59,2017-12-16 00:48:26,N,1,,,,,2,6.51,27,0.5,0.5,0,0,,0.3,28.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000667,1,2017-12-15 00:00:19,2017-12-15 00:07:29,N,1,,,,,1,1.30,6.5,0.5,0.5,0.5,0,,0.3,8.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000668,1,2017-12-15 00:14:16,2017-12-15 00:24:38,N,1,,,,,1,1.70,8.5,0.5,0.5,1.95,0,,0.3,11.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000669,1,2017-12-15 00:28:03,2017-12-15 00:48:26,N,1,,,,,1,3.70,15.5,0.5,0.5,1.5,0,,0.3,18.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000670,2,2017-12-15 00:04:25,2017-12-15 00:10:06,N,1,,,,,1,0.92,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000671,2,2017-12-15 00:11:22,2017-12-15 00:36:07,N,1,,,,,1,4.65,20,0.5,0.5,5.32,0,,0.3,26.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000672,2,2017-12-15 00:49:56,2017-12-15 00:56:19,N,1,,,,,1,1.85,7.5,0.5,0.5,1.76,0,,0.3,10.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000673,1,2017-12-15 00:54:32,2017-12-15 01:06:37,N,1,,,,,2,4.40,15,0.5,0.5,0,0,,0.3,16.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000674,2,2017-12-15 00:10:56,2017-12-15 00:49:42,N,2,,,,,1,19.22,52,0,0.5,0,5.76,,0.3,58.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000675,2,2017-12-15 00:53:14,2017-12-15 01:10:10,N,1,,,,,1,2.44,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000676,1,2017-12-15 00:24:30,2017-12-15 00:27:49,N,1,,,,,1,0.80,5,0.5,0.5,1,0,,0.3,7.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000677,1,2017-12-15 00:35:28,2017-12-15 00:43:21,N,1,,,,,1,1.20,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000678,1,2017-12-15 00:45:35,2017-12-15 00:55:48,N,1,,,,,1,1.60,8.5,0.5,0.5,2.9,0,,0.3,12.7,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000679,2,2017-12-15 00:07:29,2017-12-15 00:38:34,N,1,,,,,1,13.41,39.5,0.5,0.5,9.31,5.76,,0.3,55.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000680,2,2017-12-15 00:48:16,2017-12-15 01:03:17,N,1,,,,,1,3.08,13.5,0.5,0.5,0,0,,0.3,14.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000681,1,2017-12-15 00:22:27,2017-12-15 00:38:33,N,1,,,,,1,1.20,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000682,1,2017-12-15 00:41:29,2017-12-15 00:46:11,N,1,,,,,1,0.80,5.5,0.5,0.5,12.3,0,,0.3,19.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000683,1,2017-12-15 00:57:43,2017-12-15 01:22:11,N,1,,,,,1,6.00,21.5,0.5,0.5,0,0,,0.3,22.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000684,2,2017-12-15 00:36:07,2017-12-15 01:08:34,N,1,,,,,1,5.41,22.5,0.5,0.5,0,0,,0.3,23.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000685,2,2017-12-15 00:32:43,2017-12-15 00:38:26,N,1,,,,,1,1.00,6.5,0.5,0.5,1,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000686,2,2017-12-15 00:57:05,2017-12-15 01:05:25,N,1,,,,,1,1.67,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000687,2,2017-12-15 00:19:35,2017-12-15 00:31:13,N,1,,,,,1,2.63,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000688,1,2017-12-15 00:04:18,2017-12-15 00:12:26,N,1,,,,,1,0.90,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000689,1,2017-12-15 00:14:22,2017-12-15 00:59:18,N,1,,,,,1,6.60,29.5,0.5,0.5,6.15,0,,0.3,36.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000690,1,2017-12-15 00:00:52,2017-12-15 00:49:27,N,1,,,,,1,8.70,40,0.5,0.5,0,5.76,,0.3,47.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000691,1,2017-12-15 00:17:13,2017-12-15 00:37:07,N,1,,,,,1,3.10,14.5,0.5,0.5,3.15,0,,0.3,18.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000692,1,2017-12-15 00:38:56,2017-12-15 00:49:41,N,1,,,,,1,1.10,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000693,1,2017-12-15 00:52:01,2017-12-15 01:17:31,N,1,,,,,1,4.70,19.5,0.5,0.5,4,0,,0.3,24.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000694,2,2017-12-15 00:03:29,2017-12-15 00:03:31,N,5,,,,,1,0.06,75,0,0,15.06,0,,0.3,90.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000873,1,2017-12-15 00:48:42,2017-12-15 00:51:50,Y,1,,,,,1,0.40,4,0.5,0.5,0.79,0,,0.3,6.09,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000695,2,2017-12-15 00:22:23,2017-12-15 00:29:43,N,1,,,,,2,1.23,6.5,0.5,0.5,2.34,0,,0.3,10.14,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000696,2,2017-12-15 00:44:53,2017-12-15 00:48:01,N,1,,,,,2,0.38,4,0.5,0.5,1,0,,0.3,6.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000697,1,2017-12-15 00:19:26,2017-12-15 00:28:01,N,1,,,,,1,1.40,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000698,1,2017-12-15 00:39:46,2017-12-15 01:14:14,N,1,,,,,1,7.80,28,0.5,0.5,3,0,,0.3,32.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000699,1,2017-12-15 00:16:44,2017-12-15 00:45:52,N,2,,,,,3,17.30,52,0,0.5,0,5.76,,0.3,58.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000700,2,2017-12-15 00:02:11,2017-12-15 00:29:09,N,1,,,,,1,6.58,24,0.5,0.5,5.06,0,,0.3,30.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000701,2,2017-12-15 00:30:14,2017-12-15 00:46:04,N,1,,,,,1,2.80,12.5,0.5,0.5,0,0,,0.3,13.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000702,1,2017-12-15 00:10:46,2017-12-15 00:40:01,N,1,,,,,1,9.90,31,0.5,0.5,7.6,5.76,,0.3,45.66,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000703,1,2017-12-15 00:48:41,2017-12-15 01:06:29,N,1,,,,,1,3.50,15,0.5,0.5,4.05,0,,0.3,20.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000704,2,2017-12-15 00:22:48,2017-12-15 00:42:03,N,1,,,,,1,3.05,15,0.5,0.5,2.44,0,,0.3,18.74,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000705,2,2017-12-15 00:46:30,2017-12-15 01:13:28,N,1,,,,,1,3.78,19,0.5,0.5,5.08,0,,0.3,25.38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000706,2,2017-12-15 00:01:57,2017-12-15 00:25:18,N,1,,,,,1,4.73,19.5,0.5,0.5,6.24,0,,0.3,27.04,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000707,2,2017-12-15 00:01:06,2017-12-15 00:05:42,N,1,,,,,2,1.25,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000708,2,2017-12-15 00:28:01,2017-12-15 00:35:18,N,1,,,,,2,1.25,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000709,2,2017-12-15 00:36:39,2017-12-15 00:50:41,N,1,,,,,2,3.16,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000710,2,2017-12-15 00:02:19,2017-12-15 00:22:36,N,1,,,,,1,3.68,15.5,0.5,0.5,3.36,0,,0.3,22.11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000711,2,2017-12-15 00:23:44,2017-12-15 00:33:57,N,1,,,,,1,1.77,9,0.5,0.5,2.58,0,,0.3,12.88,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000712,2,2017-12-15 00:37:09,2017-12-15 00:56:53,N,1,,,,,1,2.92,15,0.5,0.5,2,0,,0.3,18.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000713,2,2017-12-15 00:12:13,2017-12-15 00:29:34,N,1,,,,,1,12.02,33,0.5,0.5,8.58,0,,0.3,42.88,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000714,2,2017-12-15 00:11:36,2017-12-15 00:40:50,N,1,,,,,3,5.63,23,0.5,0.5,4.86,0,,0.3,29.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000715,2,2017-12-15 00:43:02,2017-12-15 00:49:08,N,1,,,,,3,1.68,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000716,1,2017-12-15 00:05:41,2017-12-15 00:32:29,N,1,,,,,2,4.20,20,0.5,0.5,5.3,0,,0.3,26.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000717,1,2017-12-15 00:44:27,2017-12-15 01:04:20,N,1,,,,,1,4.90,17,0.5,0.5,5.45,0,,0.3,23.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000718,1,2017-12-15 00:30:44,2017-12-15 00:40:04,N,1,,,,,1,1.50,8.5,0.5,0.5,1.5,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000719,1,2017-12-15 00:41:48,2017-12-15 00:50:54,N,1,,,,,1,1.50,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000720,1,2017-12-15 00:59:42,2017-12-15 01:12:12,N,1,,,,,1,4.20,13.5,0.5,0.5,2,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000721,2,2017-12-15 00:08:17,2017-12-15 00:23:49,N,1,,,,,2,6.81,21.5,0.5,0.5,3.5,0,,0.3,26.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000722,2,2017-12-15 00:54:07,2017-12-15 01:12:23,N,1,,,,,2,8.71,25,0.5,0.5,0,5.76,,0.3,32.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000723,1,2017-12-15 00:10:46,2017-12-15 00:23:46,N,1,,,,,1,1.80,10.5,0.5,0.5,1,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000724,1,2017-12-15 00:25:04,2017-12-15 00:40:45,N,1,,,,,1,3.10,13.5,0.5,0.5,3.7,0,,0.3,18.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000725,1,2017-12-15 00:46:29,2017-12-15 01:22:33,N,1,,,,,1,5.80,26.5,0.5,0.5,5.55,0,,0.3,33.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000726,2,2017-12-15 00:07:51,2017-12-15 00:14:32,N,1,,,,,1,0.81,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000727,2,2017-12-15 00:17:00,2017-12-15 00:37:28,N,1,,,,,1,1.28,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000728,2,2017-12-15 00:19:24,2017-12-15 00:34:41,N,1,,,,,1,4.52,15.5,0.5,0.5,0,0,,0.3,16.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000729,2,2017-12-15 00:01:16,2017-12-15 00:17:43,N,1,,,,,1,1.05,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000730,2,2017-12-15 00:18:40,2017-12-15 00:31:35,N,1,,,,,1,1.22,9.5,0.5,0.5,0,0,,0.3,10.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000731,1,2017-12-15 00:31:52,2017-12-15 00:38:45,N,1,,,,,1,1.30,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000732,1,2017-12-15 00:18:16,2017-12-15 00:43:11,N,1,,,,,1,7.00,25,0.5,0.5,7,0,,0.3,33.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000733,2,2017-12-15 00:05:53,2017-12-15 00:35:02,N,1,,,,,1,6.85,26,0.5,0.5,0,0,,0.3,27.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000734,2,2017-12-15 00:53:47,2017-12-15 01:24:10,N,1,,,,,1,8.55,29.5,0.5,0.5,3,5.76,,0.3,39.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000735,1,2017-12-15 00:18:57,2017-12-15 00:36:41,N,1,,,,,1,2.40,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000736,1,2017-12-15 00:39:32,2017-12-15 00:51:27,N,1,,,,,1,2.20,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000737,1,2017-12-15 00:09:35,2017-12-15 00:10:30,N,4,,,,,0,0.00,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000738,1,2017-12-15 00:10:38,2017-12-15 00:50:50,N,2,,,,,2,18.30,52,0,0.5,10,0,,0.3,62.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000739,1,2017-12-15 00:59:40,2017-12-15 01:12:42,N,1,,,,,1,1.90,10,0.5,0.5,2.25,0,,0.3,13.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000740,2,2017-12-15 00:10:27,2017-12-15 00:32:38,N,1,,,,,1,15.36,41.5,0.5,0.5,8.56,0,,0.3,51.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000741,2,2017-12-15 00:43:57,2017-12-15 01:14:33,N,1,,,,,1,7.44,28,0.5,0.5,8.76,5.76,,0.3,43.82,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000742,1,2017-12-15 00:23:14,2017-12-15 00:40:55,N,1,,,,,1,3.10,14,0.5,0.5,1.5,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000743,1,2017-12-15 00:45:14,2017-12-15 01:06:11,N,1,,,,,1,4.60,19.5,0.5,0.5,4.15,0,,0.3,24.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000744,1,2017-12-15 00:05:13,2017-12-15 00:31:01,N,1,,,,,1,4.90,20.5,0.5,0.5,4.35,0,,0.3,26.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000745,1,2017-12-15 00:55:03,2017-12-15 01:00:44,N,1,,,,,1,0.90,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000746,1,2017-12-15 00:08:52,2017-12-15 00:44:05,N,1,,,,,1,3.80,23,0.5,0.5,6.05,0,,0.3,30.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000747,2,2017-12-15 00:33:05,2017-12-15 00:48:27,N,1,,,,,1,2.99,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000748,2,2017-12-15 00:15:29,2017-12-15 00:54:51,N,1,,,,,1,4.36,26,0.5,0.5,5.46,0,,0.3,32.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000749,1,2017-12-15 00:01:36,2017-12-15 00:04:58,N,1,,,,,1,0.80,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000750,1,2017-12-15 00:15:24,2017-12-15 00:21:09,N,1,,,,,1,1.80,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000751,1,2017-12-15 00:40:23,2017-12-15 00:46:29,N,1,,,,,1,1.00,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000752,1,2017-12-15 00:49:17,2017-12-15 01:00:42,N,1,,,,,1,1.30,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000753,2,2017-12-15 00:11:14,2017-12-15 00:23:43,N,1,,,,,3,2.00,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000754,2,2017-12-15 00:31:31,2017-12-15 00:45:43,N,1,,,,,2,2.13,11.5,0.5,0.5,1,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000755,2,2017-12-15 00:40:55,2017-12-15 01:00:47,N,1,,,,,1,2.88,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000756,2,2017-12-15 00:08:22,2017-12-15 00:10:21,N,1,,,,,1,0.68,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000757,2,2017-12-15 00:11:45,2017-12-15 00:32:57,N,1,,,,,1,4.32,18,0.5,0.5,3.86,0,,0.3,23.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000758,2,2017-12-15 00:45:14,2017-12-15 01:04:07,N,1,,,,,1,4.99,18,0.5,0.5,5.79,0,,0.3,25.09,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000759,1,2017-12-15 00:31:29,2017-12-15 00:31:58,N,5,,,,,1,0.00,18,0,0,3.65,0,,0.3,21.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000760,1,2017-12-15 00:34:33,2017-12-15 00:39:54,N,1,,,,,1,1.60,7,0.5,0.5,2.05,0,,0.3,10.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000761,1,2017-12-15 00:42:43,2017-12-15 00:51:05,N,1,,,,,1,1.50,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000762,1,2017-12-15 00:17:16,2017-12-15 00:26:19,N,1,,,,,1,2.30,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000763,1,2017-12-15 00:35:40,2017-12-15 00:49:25,N,1,,,,,1,3.20,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000764,2,2017-12-15 00:23:04,2017-12-15 00:42:31,N,1,,,,,1,8.98,26.5,0.5,0.5,2,0,,0.3,29.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000765,1,2017-12-15 00:46:24,2017-12-15 01:01:21,N,1,,,,,1,2.80,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000766,2,2017-12-14 23:57:15,2017-12-15 00:15:42,N,1,,,,,1,5.55,20,0.5,0.5,4.26,0,,0.3,25.56,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000767,2,2017-12-15 00:23:19,2017-12-15 00:34:54,N,1,,,,,1,1.26,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000768,2,2017-12-15 00:41:52,2017-12-15 00:48:13,N,1,,,,,1,0.82,6,0.5,0.5,1.82,0,,0.3,9.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000769,2,2017-12-15 00:49:53,2017-12-15 01:02:43,N,1,,,,,1,2.31,11,0.5,0.5,3.69,0,,0.3,15.99,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000770,2,2017-12-15 00:29:46,2017-12-15 01:15:02,N,1,,,,,5,9.27,37.5,0.5,0.5,8.91,5.76,,0.3,53.47,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000771,2,2017-12-15 00:03:21,2017-12-15 00:16:01,N,1,,,,,5,2.26,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000772,2,2017-12-15 00:16:49,2017-12-15 00:24:40,N,1,,,,,5,1.39,7.5,0.5,0.5,1,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000773,2,2017-12-15 00:29:56,2017-12-15 00:42:19,N,1,,,,,5,1.61,9.5,0.5,0.5,2.16,0,,0.3,12.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000774,2,2017-12-15 00:43:51,2017-12-15 00:54:08,N,1,,,,,5,1.04,7,0.5,0.5,0.7,0,,0.3,9,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000775,2,2017-12-15 00:55:15,2017-12-15 00:59:45,N,1,,,,,5,0.38,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000776,2,2017-12-15 00:15:43,2017-12-15 00:43:10,N,1,,,,,3,3.34,19,0.5,0.5,4.06,0,,0.3,24.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000777,2,2017-12-15 00:54:12,2017-12-15 01:05:13,N,1,,,,,3,2.16,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000778,2,2017-12-15 00:22:21,2017-12-15 00:48:52,N,1,,,,,1,9.82,30.5,0.5,0.5,0,0,,0.3,31.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000779,2,2017-12-15 00:53:35,2017-12-15 00:55:32,N,1,,,,,1,0.36,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000780,2,2017-12-15 00:33:11,2017-12-15 00:39:57,N,1,,,,,1,1.35,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000781,2,2017-12-15 00:40:32,2017-12-15 00:59:34,N,1,,,,,1,4.29,16,0.5,0.5,0,0,,0.3,17.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000782,2,2017-12-15 00:21:31,2017-12-15 00:25:39,N,1,,,,,1,0.27,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000783,2,2017-12-15 00:22:28,2017-12-15 00:57:26,N,1,,,,,1,5.58,26,0.5,0.5,1,0,,0.3,28.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000784,1,2017-12-15 00:44:31,2017-12-15 00:48:39,N,1,,,,,3,0.60,4.5,0.5,0.5,1.7,0,,0.3,7.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000785,1,2017-12-15 00:54:03,2017-12-15 01:08:58,N,1,,,,,1,2.60,12,0.5,0.5,2.65,0,,0.3,15.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000786,1,2017-12-15 00:17:08,2017-12-15 00:38:33,N,1,,,,,1,9.00,27.5,0.5,0.5,6,5.76,,0.3,40.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000787,1,2017-12-15 00:40:42,2017-12-15 00:58:49,N,1,,,,,1,3.70,15,0.5,0.5,0.02,0,,0.3,16.32,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000788,1,2017-12-15 00:07:49,2017-12-15 00:32:05,N,1,,,,,1,3.90,17.5,0.5,0.5,1.88,0,,0.3,20.68,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000789,1,2017-12-15 00:36:09,2017-12-15 00:37:14,N,1,,,,,1,0.30,3,0.5,0.5,0.85,0,,0.3,5.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000790,1,2017-12-15 00:41:57,2017-12-15 01:13:05,N,1,,,,,1,8.30,29.5,0.5,0.5,7.7,0,,0.3,38.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000791,2,2017-12-15 00:33:18,2017-12-15 00:39:28,N,1,,,,,2,1.87,8,0.5,0.5,1,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000792,2,2017-12-15 00:41:31,2017-12-15 01:14:47,N,1,,,,,2,9.43,32.5,0.5,0.5,6.76,0,,0.3,40.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000793,2,2017-12-15 00:51:28,2017-12-15 01:18:40,N,1,,,,,3,15.96,45,0.5,0.5,0,0,,0.3,46.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000794,2,2017-12-15 00:24:56,2017-12-15 00:57:40,N,1,,,,,1,5.40,23.5,0.5,0.5,0,5.76,,0.3,30.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000795,2,2017-12-15 00:50:36,2017-12-15 01:11:29,N,1,,,,,1,14.03,38,0.5,0.5,7.86,0,,0.3,47.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000796,2,2017-12-15 00:16:48,2017-12-15 00:25:42,N,1,,,,,1,3.31,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000797,2,2017-12-15 00:28:58,2017-12-15 00:45:19,N,1,,,,,1,2.56,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000798,2,2017-12-15 00:47:35,2017-12-15 01:25:59,N,1,,,,,1,5.91,26,0.5,0.5,2,5.76,,0.3,35.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000799,1,2017-12-15 00:04:08,2017-12-15 00:15:31,N,1,,,,,1,1.90,9,0.5,0.5,2.55,0,,0.3,12.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000800,1,2017-12-15 00:17:18,2017-12-15 00:31:04,N,1,,,,,1,2.50,11.5,0.5,0.5,1.5,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000801,2,2017-12-15 00:17:03,2017-12-15 00:54:29,N,1,,,,,2,6.26,25.5,0.5,0.5,0,0,,0.3,26.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000802,1,2017-12-15 00:18:30,2017-12-15 00:32:04,N,1,,,,,2,1.40,9.5,0.5,0.5,1,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000803,1,2017-12-15 00:40:55,2017-12-15 00:52:32,N,1,,,,,2,1.70,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000804,1,2017-12-15 00:58:53,2017-12-15 01:14:54,N,1,,,,,1,3.00,13,0.5,0.5,3.55,0,,0.3,17.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000805,1,2017-12-15 00:21:00,2017-12-15 00:39:48,N,1,,,,,1,4.40,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000806,1,2017-12-15 00:41:50,2017-12-15 00:54:39,N,1,,,,,1,5.00,16.5,0.5,0.5,0,0,,0.3,17.8,4,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000807,1,2017-12-15 00:16:11,2017-12-15 00:23:24,N,1,,,,,1,1.10,6.5,0.5,0.5,1.4,0,,0.3,9.2,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000808,1,2017-12-15 00:36:58,2017-12-15 01:23:38,N,1,,,,,3,4.50,29.5,0.5,0.5,6.15,0,,0.3,36.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000809,1,2017-12-15 00:02:18,2017-12-15 00:27:13,N,1,,,,,1,5.20,20,0.5,0.5,4.25,0,,0.3,25.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000810,1,2017-12-15 00:45:21,2017-12-15 00:53:02,N,1,,,,,1,1.40,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000811,1,2017-12-15 00:54:15,2017-12-15 01:08:34,N,1,,,,,1,2.90,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000812,1,2017-12-15 00:05:06,2017-12-15 00:10:53,N,1,,,,,1,1.60,7,0.5,0.5,3,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000813,1,2017-12-15 00:13:19,2017-12-15 00:26:11,N,1,,,,,1,2.40,11,0.5,0.5,2.45,0,,0.3,14.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000814,1,2017-12-15 00:56:51,2017-12-15 01:03:21,N,1,,,,,1,0.50,6,0.5,0.5,0,0,,0.3,7.3,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000815,1,2017-12-15 00:05:46,2017-12-15 00:43:24,N,1,,,,,1,3.20,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000816,1,2017-12-15 00:49:45,2017-12-15 01:01:15,N,1,,,,,1,1.80,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000817,1,2017-12-15 00:06:47,2017-12-15 00:19:20,N,1,,,,,1,2.60,11,0.5,0.5,2.45,0,,0.3,14.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000818,1,2017-12-15 00:21:08,2017-12-15 00:39:13,N,1,,,,,1,4.20,16,0.5,0.5,2,0,,0.3,19.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000819,1,2017-12-15 00:41:09,2017-12-15 00:46:56,N,1,,,,,1,0.70,5.5,0.5,0.5,1.35,0,,0.3,8.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000820,1,2017-12-15 00:49:48,2017-12-15 00:56:43,N,1,,,,,4,0.80,6,0.5,0.5,1.8,0,,0.3,9.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000821,1,2017-12-15 00:57:42,2017-12-15 01:16:36,N,1,,,,,1,3.00,14,0.5,0.5,2.5,0,,0.3,17.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000822,1,2017-12-15 00:04:26,2017-12-15 00:19:40,N,1,,,,,2,1.40,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000823,1,2017-12-15 00:26:21,2017-12-15 00:34:11,N,1,,,,,1,1.10,7,0.5,0.5,2,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000824,1,2017-12-15 00:39:20,2017-12-15 00:56:50,N,1,,,,,1,5.00,17,0.5,0.5,3.65,0,,0.3,21.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000825,2,2017-12-15 00:12:02,2017-12-15 00:26:03,N,1,,,,,1,2.27,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000826,2,2017-12-15 00:32:43,2017-12-15 00:43:57,N,1,,,,,1,3.13,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000827,2,2017-12-15 00:45:55,2017-12-15 00:54:31,N,1,,,,,1,1.89,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000828,2,2017-12-15 00:04:28,2017-12-15 00:48:05,N,1,,,,,1,7.08,31,0.5,0.5,6.46,0,,0.3,38.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000829,1,2017-12-15 00:01:24,2017-12-15 00:38:04,N,1,,,,,2,6.60,27.5,0.5,0.5,5.75,0,,0.3,34.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000830,1,2017-12-15 00:17:14,2017-12-15 00:26:11,N,1,,,,,1,2.00,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000831,1,2017-12-15 00:30:59,2017-12-15 00:47:45,N,1,,,,,1,3.00,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000832,1,2017-12-15 00:51:48,2017-12-15 01:05:41,N,1,,,,,1,6.70,20.5,0.5,0.5,4.35,0,,0.3,26.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000833,2,2017-12-15 00:04:55,2017-12-15 00:17:37,N,1,,,,,1,2.77,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000834,2,2017-12-15 00:27:10,2017-12-15 00:40:45,N,1,,,,,1,4.32,15,0.5,0.5,3.26,0,,0.3,19.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000835,2,2017-12-15 00:47:01,2017-12-15 01:00:18,N,1,,,,,1,1.12,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000836,2,2017-12-15 00:34:54,2017-12-15 00:51:54,N,1,,,,,1,1.76,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000837,1,2017-12-15 00:41:18,2017-12-15 00:54:16,N,1,,,,,1,2.70,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000838,1,2017-12-15 00:16:37,2017-12-15 00:25:14,N,1,,,,,1,1.00,7.5,0.5,0.5,2.2,0,,0.3,11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000839,1,2017-12-15 00:29:29,2017-12-15 00:35:48,N,1,,,,,1,0.80,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000840,1,2017-12-15 00:44:11,2017-12-15 00:54:00,N,1,,,,,1,1.30,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000841,2,2017-12-15 00:03:15,2017-12-15 00:12:39,N,1,,,,,1,1.44,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000842,2,2017-12-15 00:13:34,2017-12-15 00:25:05,N,1,,,,,1,0.70,8,0.5,0.5,0,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000843,2,2017-12-15 00:29:16,2017-12-15 00:32:52,N,1,,,,,1,0.63,4.5,0.5,0.5,1.74,0,,0.3,7.54,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000844,2,2017-12-15 00:44:44,2017-12-15 00:55:18,N,1,,,,,1,2.22,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000845,2,2017-12-15 00:56:56,2017-12-15 01:08:12,N,1,,,,,1,2.25,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000846,1,2017-12-15 00:05:27,2017-12-15 00:40:39,N,1,,,,,1,4.10,23,0.5,0.5,3,0,,0.3,27.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000847,1,2017-12-15 00:50:51,2017-12-15 00:55:54,N,1,,,,,1,1.00,5.5,0.5,0.5,1.35,0,,0.3,8.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000848,1,2017-12-15 00:59:52,2017-12-15 01:11:28,N,1,,,,,2,1.30,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000849,2,2017-12-15 00:43:01,2017-12-15 01:04:17,N,1,,,,,1,4.68,18,0.5,0.5,2.5,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000850,1,2017-12-15 00:52:23,2017-12-15 01:16:27,N,1,,,,,1,4.00,17.5,0.5,0.5,1.2,0,,0.3,20,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000851,1,2017-12-15 00:50:17,2017-12-15 01:18:00,N,1,,,,,1,7.70,26,0.5,0.5,5.45,0,,0.3,32.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000852,2,2017-12-15 00:29:48,2017-12-15 00:47:32,N,1,,,,,5,4.13,15.5,0.5,0.5,4.2,0,,0.3,21,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000853,1,2017-12-15 00:20:05,2017-12-15 00:28:06,N,1,,,,,1,0.60,6.5,0.5,0.5,2,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000854,1,2017-12-15 00:30:22,2017-12-15 01:10:08,N,1,,,,,1,5.80,27,0.5,0.5,7.05,0,,0.3,35.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000855,2,2017-12-15 00:30:08,2017-12-15 00:36:48,N,1,,,,,1,0.72,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000856,2,2017-12-15 00:38:28,2017-12-15 00:57:44,N,1,,,,,1,1.65,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000857,1,2017-12-15 00:11:26,2017-12-15 00:33:14,N,1,,,,,1,4.30,18.5,0.5,0.5,2,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000858,1,2017-12-15 00:53:08,2017-12-15 01:24:53,N,1,,,,,2,8.90,31,0.5,0.5,5,5.76,,0.3,43.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000859,1,2017-12-15 00:53:46,2017-12-15 01:07:14,N,1,,,,,2,1.90,10.5,0.5,0.5,1.2,0,,0.3,13,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000860,2,2017-12-15 00:31:50,2017-12-15 00:37:17,N,1,,,,,1,1.77,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000861,1,2017-12-15 00:08:40,2017-12-15 00:58:36,N,1,,,,,1,19.60,59.5,0.5,0.5,13,0,,0.3,73.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000862,2,2017-12-15 00:02:27,2017-12-15 00:06:04,N,1,,,,,5,0.60,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000863,2,2017-12-15 00:17:42,2017-12-15 01:05:29,N,1,,,,,5,6.86,30,0.5,0.5,0,0,,0.3,31.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000864,2,2017-12-15 00:22:02,2017-12-15 00:40:51,N,1,,,,,1,3.48,15.5,0.5,0.5,0,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000865,2,2017-12-15 00:42:18,2017-12-15 00:49:35,N,1,,,,,1,1.42,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000866,2,2017-12-15 00:02:19,2017-12-15 00:24:09,N,1,,,,,1,10.38,31,0.5,0.5,6.46,0,,0.3,38.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000867,2,2017-12-15 00:21:42,2017-12-15 01:25:06,N,1,,,,,1,11.80,45.5,0.5,0.5,0,0,,0.3,46.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000868,2,2017-12-15 00:03:13,2017-12-15 00:11:15,N,1,,,,,1,1.05,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000869,2,2017-12-15 00:13:04,2017-12-15 00:43:07,N,1,,,,,1,5.54,22,0.5,0.5,0,0,,0.3,23.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000870,2,2017-12-15 00:03:01,2017-12-15 00:17:43,N,1,,,,,1,1.14,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000871,2,2017-12-15 00:19:26,2017-12-15 00:36:55,N,1,,,,,1,3.51,15,0.5,0.5,20,0,,0.3,36.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000872,1,2017-12-15 00:42:05,2017-12-15 00:46:23,Y,1,,,,,2,0.70,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000874,1,2017-12-15 00:32:08,2017-12-15 00:38:53,N,1,,,,,1,0.60,6,0.5,0.5,0,0,,0.3,7.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000875,1,2017-12-15 00:52:04,2017-12-15 01:04:53,N,1,,,,,1,2.00,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000876,2,2017-12-15 00:39:40,2017-12-15 01:22:10,N,2,,,,,6,21.28,52,0,0.5,6,5.76,,0.3,64.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000877,1,2017-12-15 00:04:14,2017-12-15 00:13:52,N,1,,,,,2,2.10,9.5,0.5,0.5,1.2,0,,0.3,12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000878,1,2017-12-15 00:44:57,2017-12-15 00:57:59,N,1,,,,,2,1.40,9.5,0.5,0.5,2,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000879,2,2017-12-15 00:12:01,2017-12-15 00:18:24,N,1,,,,,1,1.32,7,0.5,0.5,0.1,0,,0.3,8.4,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000880,2,2017-12-15 00:22:58,2017-12-15 00:40:06,N,1,,,,,1,4.69,16.5,0.5,0.5,3.56,0,,0.3,21.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000881,2,2017-12-15 00:47:43,2017-12-15 00:54:11,N,1,,,,,1,1.30,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000882,2,2017-12-15 00:16:46,2017-12-15 00:24:04,N,1,,,,,1,1.86,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000883,2,2017-12-15 00:29:13,2017-12-15 00:43:55,N,1,,,,,1,2.95,12.5,0.5,0.5,2.2,0,,0.3,16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000884,2,2017-12-15 00:48:04,2017-12-15 00:49:49,N,1,,,,,1,0.34,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000885,2,2017-12-15 00:51:53,2017-12-15 00:55:13,N,1,,,,,1,0.86,5,0.5,0.5,1.89,0,,0.3,8.19,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000886,2,2017-12-15 00:21:27,2017-12-15 01:14:34,N,1,,,,,1,10.62,42.5,0.5,0.5,10.95,0,,0.3,54.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000887,1,2017-12-15 00:04:08,2017-12-15 00:36:58,N,1,,,,,2,5.10,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000888,1,2017-12-15 00:40:51,2017-12-15 00:57:22,N,1,,,,,2,3.10,14,0.5,0.5,3.8,0,,0.3,19.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000889,1,2017-12-15 00:08:02,2017-12-15 00:29:01,N,1,,,,,1,2.50,14.5,0.5,0.5,1,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000890,1,2017-12-15 00:48:54,2017-12-15 01:33:00,N,1,,,,,1,15.60,48.5,0.5,0.5,5.2,0,,0.3,55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000891,2,2017-12-14 23:59:19,2017-12-15 00:23:03,N,1,,,,,1,3.84,17.5,0.5,0.5,1,0,,0.3,19.8,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000892,2,2017-12-15 00:42:48,2017-12-15 01:03:15,N,1,,,,,1,2.45,14.5,0.5,0.5,1,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000893,2,2017-12-15 00:16:33,2017-12-15 00:26:08,N,1,,,,,1,1.67,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000894,2,2017-12-15 00:38:41,2017-12-15 00:56:22,N,1,,,,,1,2.95,13.5,0.5,0.5,0,0,,0.3,14.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000895,2,2017-12-15 00:57:11,2017-12-15 01:14:48,N,1,,,,,1,0.82,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000896,2,2017-12-15 00:02:45,2017-12-15 00:36:35,N,2,,,,,1,18.85,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000897,2,2017-12-15 00:43:23,2017-12-15 01:00:33,N,1,,,,,1,2.82,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000898,2,2017-12-15 00:08:07,2017-12-15 00:41:12,N,1,,,,,1,5.36,24,0.5,0.5,6.32,0,,0.3,31.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000899,2,2017-12-15 00:48:01,2017-12-15 00:54:37,N,1,,,,,1,0.87,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000900,2,2017-12-15 01:00:05,2017-12-15 01:04:23,N,1,,,,,1,1.04,5.5,0.5,0.5,2.04,0,,0.3,8.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000901,1,2017-12-15 00:43:08,2017-12-15 00:50:53,N,1,,,,,1,2.10,8.5,0.5,0.5,1.95,0,,0.3,11.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000902,1,2017-12-15 00:51:34,2017-12-15 00:54:56,N,1,,,,,1,1.10,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000903,2,2017-12-15 00:53:48,2017-12-15 01:04:17,N,1,,,,,1,1.66,9,0.5,0.5,0,0,,0.3,10.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000904,1,2017-12-15 00:22:51,2017-12-15 00:50:12,N,2,,,,,1,18.40,52,0,0.5,10.55,0,,0.3,63.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000905,1,2017-12-15 00:02:29,2017-12-15 00:27:44,N,1,,,,,1,4.20,18,0.5,0.5,0,0,,0.3,19.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000906,1,2017-12-15 00:31:33,2017-12-15 00:50:28,N,1,,,,,1,3.40,15,0.5,0.5,4.05,0,,0.3,20.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000907,1,2017-12-15 00:53:36,2017-12-15 00:57:11,N,1,,,,,1,0.90,5,0.5,0.5,1.25,0,,0.3,7.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000908,2,2017-12-15 00:01:47,2017-12-15 00:35:22,N,1,,,,,1,6.78,26.5,0.5,0.5,2,0,,0.3,29.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000909,2,2017-12-15 00:55:50,2017-12-15 01:21:26,N,1,,,,,1,4.70,19,0.5,0.5,0,0,,0.3,20.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000910,1,2017-12-15 00:15:27,2017-12-15 00:20:38,N,1,,,,,1,1.40,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000911,1,2017-12-15 00:34:52,2017-12-15 01:01:21,N,1,,,,,1,5.10,21,0.5,0.5,3.35,0,,0.3,25.65,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000912,1,2017-12-15 00:15:57,2017-12-15 00:22:46,N,1,,,,,1,1.80,7.5,0.5,0.5,1.75,0,,0.3,10.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000913,1,2017-12-15 00:31:27,2017-12-15 01:11:20,N,1,,,,,1,5.30,26.5,0.5,0.5,1,0,,0.3,28.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000914,1,2017-12-15 00:14:15,2017-12-15 00:29:52,N,1,,,,,1,3.00,13,0.5,0.5,1,0,,0.3,15.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000915,1,2017-12-15 00:48:36,2017-12-15 01:24:10,N,1,,,,,1,4.90,26,0.5,0.5,8.15,0,,0.3,35.45,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000916,1,2017-12-15 00:22:04,2017-12-15 00:32:41,Y,1,,,,,2,1.90,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000917,1,2017-12-15 00:33:58,2017-12-15 00:47:41,N,1,,,,,1,3.30,12.5,0.5,0.5,0,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000918,1,2017-12-15 00:54:47,2017-12-15 00:55:21,N,5,,,,,1,0.00,14,0,0,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000919,1,2017-12-15 00:35:43,2017-12-15 01:08:42,N,1,,,,,1,11.70,37.5,0.5,0.5,9.7,0,,0.3,48.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000920,2,2017-12-15 00:13:56,2017-12-15 00:41:08,N,1,,,,,1,3.73,19,0.5,0.5,3,0,,0.3,23.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000921,2,2017-12-15 00:50:32,2017-12-15 01:06:42,N,1,,,,,1,2.81,12,0.5,0.5,2.66,0,,0.3,15.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000922,1,2017-12-15 00:39:58,2017-12-15 01:01:50,N,1,,,,,1,4.50,18,0.5,0.5,3.85,0,,0.3,23.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000923,1,2017-12-15 00:08:23,2017-12-15 01:07:29,N,1,,,,,1,7.70,38,0.5,0.5,0,0,,0.3,39.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000924,1,2017-12-15 00:00:04,2017-12-15 00:14:22,N,1,,,,,2,2.30,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000925,1,2017-12-15 00:15:32,2017-12-15 00:39:24,N,1,,,,,2,3.10,16.5,0.5,0.5,2,0,,0.3,19.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000926,1,2017-12-15 00:58:47,2017-12-15 01:01:23,N,1,,,,,1,0.60,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000927,2,2017-12-15 00:19:03,2017-12-15 00:26:33,N,1,,,,,1,1.07,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000928,2,2017-12-15 00:27:47,2017-12-15 01:05:07,N,1,,,,,1,5.63,26,0.5,0.5,5.46,0,,0.3,32.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000929,1,2017-12-15 00:02:07,2017-12-15 00:28:16,N,1,,,,,0,6.70,23.5,0.5,0.5,0,0,,0.3,24.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000930,1,2017-12-15 00:33:18,2017-12-15 01:03:18,N,1,,,,,1,4.80,21.5,0.5,0.5,5.7,5.76,,0.3,34.26,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000931,1,2017-12-15 00:38:52,2017-12-15 00:38:57,N,2,,,,,1,4.60,52,0,0.5,0,0,,0.3,52.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000932,1,2017-12-15 00:43:53,2017-12-15 01:18:44,N,1,,,,,1,6.40,26.5,0,0.5,0,0,,0.3,27.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000933,1,2017-12-15 00:04:13,2017-12-15 00:51:06,N,1,,,,,2,9.20,36,0.5,0.5,7.45,0,,0.3,44.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000934,2,2017-12-15 00:09:49,2017-12-15 00:23:32,N,1,,,,,1,2.51,11.5,0.5,0.5,0,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000935,2,2017-12-15 00:28:33,2017-12-15 01:04:59,N,1,,,,,1,6.32,25.5,0.5,0.5,5.36,0,,0.3,32.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000936,2,2017-12-15 00:06:03,2017-12-15 00:22:31,N,1,,,,,5,3.26,14,0.5,0.5,3.06,0,,0.3,18.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000937,2,2017-12-15 00:37:08,2017-12-15 01:03:40,N,1,,,,,5,2.83,17.5,0.5,0.5,0,0,,0.3,18.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000938,2,2017-12-15 00:57:16,2017-12-15 01:16:42,N,1,,,,,1,2.93,14,0.5,0.5,1,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000939,2,2017-12-15 00:10:53,2017-12-15 00:22:45,N,1,,,,,2,1.44,9,0.5,0.5,2.5,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000940,2,2017-12-15 00:50:00,2017-12-15 01:26:28,N,1,,,,,2,10.03,35.5,0.5,0.5,0,0,,0.3,36.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000941,2,2017-12-15 00:07:21,2017-12-15 00:20:07,N,1,,,,,1,1.85,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000942,2,2017-12-15 00:29:10,2017-12-15 00:36:06,N,1,,,,,1,1.68,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000943,2,2017-12-15 00:47:04,2017-12-15 01:20:23,N,1,,,,,1,15.51,45,0.5,0.5,9.26,0,,0.3,55.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000944,1,2017-12-15 00:19:02,2017-12-15 00:29:05,N,1,,,,,1,1.90,9.5,0.5,0.5,1,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000945,1,2017-12-15 00:31:52,2017-12-15 00:44:24,N,1,,,,,1,2.20,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000946,1,2017-12-15 00:46:42,2017-12-15 01:05:12,N,1,,,,,1,6.20,21,0.5,0.5,3.35,0,,0.3,25.65,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000947,2,2017-12-15 00:11:42,2017-12-15 00:43:20,N,1,,,,,1,6.04,25.5,0.5,0.5,5.36,0,,0.3,34.11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000948,1,2017-12-15 00:21:34,2017-12-15 00:41:16,N,1,,,,,1,2.30,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000949,1,2017-12-15 00:42:11,2017-12-15 00:55:13,N,1,,,,,2,3.50,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000950,2,2017-12-15 00:00:16,2017-12-15 00:17:07,N,1,,,,,2,4.05,15.5,0.5,0.5,0,0,,0.3,16.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000951,2,2017-12-15 00:47:25,2017-12-15 01:08:08,N,1,,,,,2,4.73,18,0.5,0.5,0,0,,0.3,19.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000952,2,2017-12-14 23:58:52,2017-12-15 23:48:39,N,1,,,,,2,0.27,3.5,0.5,0.5,0,0,,0.3,4.8,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,
1460000953,2,2017-12-15 00:02:43,2017-12-15 00:31:33,N,1,,,,,1,3.81,19.5,0.5,0.5,3,0,,0.3,23.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000954,2,2017-12-15 00:08:07,2017-12-15 00:22:27,N,1,,,,,1,1.58,10.5,0.5,0.5,1.5,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000955,2,2017-12-15 00:23:59,2017-12-15 00:43:51,N,1,,,,,1,3.09,14.5,0.5,0.5,4.74,0,,0.3,20.54,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000956,2,2017-12-15 00:53:01,2017-12-15 00:53:05,N,5,,,,,1,0.00,5,0,0.5,0,0,,0.3,5.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000957,2,2017-12-15 00:55:09,2017-12-15 01:07:32,N,1,,,,,1,2.52,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000958,2,2017-12-15 00:22:57,2017-12-15 00:36:58,N,1,,,,,1,2.75,12,0.5,0.5,0.66,0,,0.3,13.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000959,2,2017-12-15 00:03:25,2017-12-15 00:13:02,N,1,,,,,1,0.99,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000960,2,2017-12-15 00:17:06,2017-12-15 00:31:31,N,1,,,,,1,1.79,10.5,0.5,0.5,0,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000961,2,2017-12-15 00:37:42,2017-12-15 01:37:47,N,5,,,,,1,16.84,80,0,0.5,0,0,,0.3,80.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000962,1,2017-12-15 00:24:46,2017-12-15 00:34:31,N,1,,,,,1,1.60,8.5,0.5,0.5,1.95,0,,0.3,11.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000963,1,2017-12-15 00:47:28,2017-12-15 01:07:04,N,1,,,,,1,4.20,16.5,0.5,0.5,5,0,,0.3,22.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000964,2,2017-12-15 00:01:11,2017-12-15 00:09:14,N,1,,,,,1,1.57,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000965,2,2017-12-15 00:30:12,2017-12-15 01:01:05,N,1,,,,,1,4.68,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000966,1,2017-12-15 00:24:00,2017-12-15 00:38:13,N,1,,,,,1,3.20,12.5,0.5,0.5,2.75,0,,0.3,16.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000967,2,2017-12-15 00:49:53,2017-12-15 01:17:25,N,1,,,,,3,6.37,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000968,2,2017-12-15 00:05:30,2017-12-15 00:37:30,N,2,,,,,4,18.09,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000969,2,2017-12-15 00:47:11,2017-12-15 01:02:37,N,1,,,,,4,3.35,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000970,2,2017-12-15 00:32:31,2017-12-15 01:15:13,N,1,,,,,6,10.56,38.5,0.5,0.5,4,5.76,,0.3,49.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000971,1,2017-12-15 00:55:25,2017-12-15 01:00:03,N,1,,,,,1,0.50,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000972,1,2017-12-15 00:00:54,2017-12-15 00:04:07,N,1,,,,,1,0.90,5,0.5,0.5,1.55,0,,0.3,7.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000973,1,2017-12-15 00:11:51,2017-12-15 00:24:10,N,1,,,,,1,1.70,10,0.5,0.5,2,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000974,1,2017-12-15 00:33:26,2017-12-15 01:19:20,N,1,,,,,1,8.50,34.5,0.5,0.5,7.15,0,,0.3,42.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000975,1,2017-12-15 00:11:27,2017-12-15 00:54:09,N,1,,,,,2,7.90,31.5,0.5,0.5,6.55,0,,0.3,39.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000976,2,2017-12-15 00:10:38,2017-12-15 00:50:52,N,1,,,,,1,6.88,29.5,0.5,0.5,7.7,0,,0.3,38.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000977,2,2017-12-15 00:04:00,2017-12-15 00:21:02,N,1,,,,,6,1.51,12,0.5,0.5,3.99,0,,0.3,17.29,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000978,2,2017-12-15 00:21:43,2017-12-15 00:33:56,N,1,,,,,6,0.71,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000979,2,2017-12-15 00:36:45,2017-12-15 00:48:13,N,1,,,,,6,1.00,8.5,0.5,0.5,2.94,0,,0.3,12.74,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000980,2,2017-12-15 00:50:35,2017-12-15 01:14:51,N,1,,,,,6,4.34,18,0.5,0.5,4.82,0,,0.3,24.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000981,2,2017-12-15 00:35:10,2017-12-15 00:40:56,N,1,,,,,1,1.21,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000982,2,2017-12-15 00:53:17,2017-12-15 01:09:14,N,1,,,,,1,3.38,13.5,0.5,0.5,3.7,0,,0.3,18.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000983,2,2017-12-15 00:48:43,2017-12-15 00:58:44,N,1,,,,,1,1.34,8.5,0.5,0.5,1,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000984,2,2017-12-15 00:37:51,2017-12-15 00:38:05,N,5,,,,,1,0.00,60,0,0,0,0,,0.3,60.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000985,2,2017-12-15 00:45:54,2017-12-15 00:52:31,N,1,,,,,1,1.92,7.5,0.5,0.5,2,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000986,2,2017-12-15 00:07:43,2017-12-15 00:28:15,N,1,,,,,2,10.02,29.5,0.5,0.5,0,0,,0.3,30.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000987,2,2017-12-15 00:32:40,2017-12-15 00:41:01,N,1,,,,,2,1.29,7.5,0.5,0.5,1,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000988,2,2017-12-15 00:41:52,2017-12-15 00:48:28,N,1,,,,,2,1.00,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000989,2,2017-12-15 00:52:43,2017-12-15 01:37:15,N,5,,,,,2,4.32,70,0,0.5,0,0,,0.3,70.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000990,1,2017-12-15 00:03:36,2017-12-15 00:25:14,N,1,,,,,2,3.90,17.5,0.5,0.5,3.75,0,,0.3,22.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000991,1,2017-12-15 00:39:01,2017-12-15 01:05:04,N,1,,,,,2,4.70,19.5,0.5,0.5,3,0,,0.3,23.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000992,2,2017-12-15 00:04:21,2017-12-15 00:09:29,N,1,,,,,1,0.69,5,0.5,0.5,1.89,0,,0.3,8.19,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000993,2,2017-12-15 00:19:51,2017-12-15 00:24:23,N,1,,,,,1,0.79,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000994,2,2017-12-15 00:25:57,2017-12-15 00:55:44,N,1,,,,,1,3.62,19.5,0.5,0.5,4.16,0,,0.3,24.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000995,1,2017-12-15 00:00:00,2017-12-15 00:15:00,N,1,,,,,1,2.20,12,0.5,0.5,2.65,0,,0.3,15.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000996,1,2017-12-15 00:16:51,2017-12-15 00:49:40,N,1,,,,,2,8.50,29,0.5,0.5,7.2,5.76,,0.3,43.26,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000997,2,2017-12-15 00:42:37,2017-12-15 01:24:17,N,1,,,,,1,8.46,31.5,0.5,0.5,8.2,0,,0.3,41,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000998,2,2017-12-15 00:06:02,2017-12-15 00:18:45,N,1,,,,,1,2.28,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460000999,2,2017-12-15 00:21:04,2017-12-15 00:31:14,N,1,,,,,1,1.01,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,
1460001000,2,2017-12-15 00:33:21,2017-12-15 00:42:19,N,1,,,,,1,1.87,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,


================================================
FILE: examples/data/plasticc_test_set_1k.csv
================================================
object_id,mjd,passband,flux,flux_err,detected
13,59798.3205,2,-1.299735,1.357315,0
13,59798.3281,1,-2.095392,1.148654,0
13,59798.3357,3,-0.923794,1.763655,0
13,59798.3466,4,-4.009815,2.602911,0
13,59798.3576,5,-3.403503,5.367328,0
13,59801.3553,2,-1.778855,2.448943,0
13,59801.3629,1,2.491993,3.540421,0
13,59801.3705,3,1.644129,2.284999,0
13,59801.3815,4,-0.158192,2.515900,0
13,59801.3924,5,-6.457387,5.381231,0
13,59818.2740,0,1.962846,1.795587,0
13,59819.2541,0,-1.697929,2.433431,0
13,59820.2522,0,-1.698675,1.898612,0
13,59821.2478,0,-0.776626,2.435191,0
13,59822.2433,0,-3.826187,2.853957,0
13,59823.2659,0,-0.837001,2.690573,0
13,59826.3105,2,0.529480,0.920972,0
13,59826.3181,1,-0.702092,0.923219,0
13,59826.3258,3,-0.797231,1.508073,0
13,59826.3367,4,-3.898993,2.156529,0
13,59826.3477,5,-2.883462,5.535779,0
13,59842.2456,2,-0.838558,0.849664,0
13,59842.2532,1,-0.038403,0.813772,0
13,59842.2608,3,2.863780,1.322607,0
13,59842.2718,4,0.088068,2.166394,0
13,59842.2827,5,-1.672899,6.068114,0
13,59851.1792,0,-0.737309,2.574759,0
13,59854.1485,2,0.040905,1.224270,0
13,59854.1563,1,-0.265884,1.336087,0
13,59854.1640,3,-0.369843,1.901700,0
13,59854.1750,4,2.415236,2.771065,0
13,59854.1860,5,-4.745292,5.788470,0
13,59857.1408,2,4.814427,2.132096,0
13,59857.1485,1,-2.618018,2.915296,0
13,59857.1563,3,-2.852325,2.092946,0
13,59857.1673,4,-1.584260,2.676820,0
13,59857.1782,5,-12.249563,6.628258,0
13,59867.1112,2,-1.164210,1.083173,0
13,59867.1189,1,-1.176300,0.974351,0
13,59867.1267,3,0.451480,1.538879,0
13,59867.1377,4,0.359918,2.357359,0
13,59867.1487,5,-2.699421,5.830664,0
13,59870.1049,2,1.381781,0.971909,0
13,59870.1126,1,0.387448,0.822428,0
13,59870.1204,3,0.817313,1.373219,0
13,59870.1314,4,0.151035,2.283455,0
13,59870.1424,5,2.986818,5.280643,0
13,59873.0971,2,-1.240265,1.040900,0
13,59873.1049,1,0.931574,1.042814,0
13,59873.1126,3,2.134935,1.442823,0
13,59873.1236,4,-0.530458,2.068875,0
13,59873.1346,5,-0.314489,4.587787,0
13,59874.1461,0,-2.490195,1.611522,0
13,59875.0995,0,-1.276708,2.019888,0
13,59876.0980,0,3.425512,2.443817,0
13,59877.0976,0,2.518677,1.776579,0
13,59878.0964,0,1.872855,2.020787,0
13,59879.0895,0,0.522355,1.864210,0
13,59880.1017,0,-0.761489,1.371363,0
13,59884.1760,2,0.408623,0.768234,0
13,59884.1836,1,0.610256,0.887502,0
13,59884.1913,3,-0.134252,1.138571,0
13,59884.2022,4,-0.009705,1.609182,0
13,59884.2132,5,6.633543,4.214399,0
13,59887.2856,2,1.784338,1.826830,0
13,59887.2933,1,1.591402,2.206727,0
13,59887.3009,3,0.109085,2.350564,0
13,59887.3118,4,-2.874674,3.145338,0
13,59887.3228,5,-2.586321,7.947824,0
13,59896.1307,2,1.084506,0.822793,0
13,59896.1384,1,-0.561922,0.703150,0
13,59896.1460,3,0.284255,1.393400,0
13,59896.1569,4,0.466466,2.192684,0
13,59896.1679,5,0.433727,5.948182,0
13,59899.1519,2,0.102152,1.365073,0
13,59899.1595,1,1.624047,1.388812,0
13,59899.1672,3,1.820581,2.378815,0
13,59899.1781,4,-3.816161,3.709328,0
13,59899.1891,5,-8.805643,7.905914,0
13,59902.1384,2,0.196845,1.186146,0
13,59902.1460,1,0.993784,1.079431,0
13,59902.1537,3,1.891725,1.943785,0
13,59902.1646,4,-1.455820,2.762865,0
13,59902.1755,5,-0.622898,7.024046,0
13,59904.1053,0,-4.067552,2.281505,0
13,59905.0555,0,0.633017,2.504281,0
13,59906.0562,0,1.127558,2.030226,0
13,59907.0567,0,0.456890,1.749502,0
13,59908.0681,0,-1.925056,2.683702,0
13,59909.0582,0,-0.678187,1.834134,0
13,59910.0503,0,1.194135,1.869489,0
13,59914.0526,2,-1.644144,1.438881,0
13,59914.0602,1,-1.866351,1.956770,0
13,59914.0678,3,-0.710913,1.686324,0
13,59914.0788,4,5.825469,2.269697,0
13,59914.0897,5,3.513655,5.356808,0
13,59924.1060,2,-0.028742,1.131780,0
13,59924.1136,1,-1.108478,1.574127,0
13,59924.1212,3,-1.323133,1.728779,0
13,59924.1322,4,-5.513097,2.527147,0
13,59924.1431,5,4.266651,5.633382,0
13,59927.1074,2,-0.400232,1.190879,0
13,59927.1151,1,1.286833,1.135341,0
13,59927.1227,3,2.155155,1.593354,0
13,59927.1336,4,2.415218,2.299777,0
13,59927.1446,5,3.778454,5.891465,0
13,59930.1236,2,1.373614,0.942084,0
13,59930.1312,1,-1.140287,0.828822,0
13,59930.1388,3,0.832613,1.283629,0
13,59930.1498,4,2.345280,2.033006,0
13,59930.1607,5,-4.211092,5.330056,0
13,59933.1249,2,0.395000,0.691634,0
13,59933.1325,1,-0.423847,0.693980,0
13,59933.1401,3,-1.108444,1.268825,0
13,59933.1511,4,-0.328867,1.913908,0
13,59933.1620,5,7.551563,5.429599,0
13,59934.0638,0,-1.113758,1.141001,0
13,59935.0646,0,-0.818122,1.122888,0
13,59936.0642,0,2.141036,1.531045,0
13,59937.0650,0,-1.277371,1.639949,0
13,59938.0647,0,4.045178,2.015162,0
13,59939.0650,0,-0.050647,2.194245,0
13,60165.3032,2,-0.244917,1.195023,0
13,60165.3109,1,-0.051452,1.347307,0
13,60165.3186,3,-1.446614,1.518360,0
13,60165.3295,4,-1.239533,2.022870,0
13,60165.3405,5,-2.233357,4.782032,0
13,60168.2892,2,-0.272922,0.834000,0
13,60168.2970,1,-0.551885,0.787909,0
13,60168.3047,3,0.215221,1.300493,0
13,60168.3157,4,-0.354714,2.065893,0
13,60168.3267,5,9.800998,4.821743,0
13,60176.2820,0,-2.711282,2.363240,0
13,60177.2726,0,2.907485,2.777568,0
13,60181.4088,2,-0.778592,0.924176,0
13,60181.4164,1,0.776635,1.262891,0
13,60181.4232,3,5.557160,3.239328,0
13,60183.2660,2,0.008992,1.915637,0
13,60183.2736,1,3.495092,2.936934,0
13,60183.2812,3,-4.313775,2.548015,0
13,60183.2922,4,4.681122,2.996080,0
13,60183.3031,5,-0.644280,6.178194,0
13,60195.2812,2,-0.992008,0.933265,0
13,60195.2888,1,-0.199896,1.020470,0
13,60195.2964,3,1.175473,1.626211,0
13,60195.3073,4,-0.928975,2.339218,0
13,60195.3183,5,3.537798,6.276374,0
13,60198.2690,0,0.879443,2.172960,0
13,60199.2186,0,1.393119,2.161035,0
13,60200.2139,0,0.226138,2.589170,0
13,60201.2072,0,0.024500,2.490959,0
13,60202.2089,0,-1.152337,2.483400,0
13,60209.1811,2,-0.534034,1.565458,0
13,60209.1888,1,0.822128,1.410793,0
13,60209.1965,3,-0.684217,1.831758,0
13,60209.2075,4,-3.066642,2.909104,0
13,60209.2184,5,0.602669,6.696602,0
13,60212.1675,2,1.117115,1.936030,0
13,60212.1753,1,1.782217,2.752773,0
13,60212.1830,3,0.321750,2.295181,0
13,60212.1941,4,-1.936185,2.590936,0
13,60212.2050,5,11.339810,5.666715,0
13,60223.2416,2,0.123572,1.182341,0
13,60223.2493,1,0.568967,1.241492,0
13,60223.2569,3,0.040453,1.648758,0
13,60223.2678,4,0.459117,2.472937,0
13,60223.2788,5,-7.159738,5.794432,0
13,60226.3337,2,0.034159,0.944593,0
13,60226.3413,1,1.465278,0.946326,0
13,60226.3489,3,0.247305,1.793391,0
13,60226.3599,4,4.326116,3.071592,0
13,60226.3708,5,1.558216,8.109918,0
13,60238.3197,2,-0.738155,1.053325,0
13,60238.3273,1,-0.990783,1.152318,0
13,60238.3349,3,1.786266,2.171606,0
13,60238.3459,4,3.042687,3.829919,0
13,60238.3568,5,8.879921,11.257108,0
13,60241.0870,2,-0.147198,1.999162,0
13,60241.0948,1,-1.383542,3.066069,0
13,60241.1025,3,-0.347256,2.444988,0
13,60241.1136,4,2.242954,2.917921,0
13,60241.1245,5,4.122897,5.941785,0
13,60250.1708,2,2.348668,2.103043,0
13,60250.1957,1,1.762817,2.811031,0
13,60250.2034,3,-4.162498,2.927869,0
13,60250.2143,4,3.563345,3.983845,0
13,60250.2253,5,5.870506,7.937796,0
13,60261.1296,0,2.993462,2.083503,0
13,60262.0550,0,1.952810,2.311998,0
13,60263.0556,0,1.620923,1.977922,0
13,60264.0559,0,3.218953,2.187246,0
13,60265.0780,0,-4.046216,2.407065,0
13,60268.0449,2,1.923153,1.404630,0
13,60268.0525,1,-0.380431,1.849085,0
13,60268.0601,3,1.653331,2.130050,0
13,60268.0711,4,-2.881836,2.734059,0
13,60268.0820,5,7.530805,6.747447,0
13,60278.0993,2,-1.574151,1.643888,0
13,60278.1069,1,-4.148108,2.273913,0
13,60278.1145,3,-1.136458,2.169661,0
13,60278.1255,4,-4.712543,3.059077,0
13,60278.1364,5,-4.843711,7.638536,0
13,60281.1023,2,-0.579702,0.798074,0
13,60281.1099,1,0.559643,0.795803,0
13,60281.1175,3,0.985490,1.369862,0
13,60281.1285,4,-0.125175,1.990803,0
13,60281.1394,5,-6.211294,5.176151,0
13,60284.1027,2,0.920098,0.837512,0
13,60284.1104,1,-0.531105,0.761165,0
13,60284.1180,3,-1.038009,1.160178,0
13,60284.1289,4,-0.543047,1.702779,0
13,60284.1399,5,-5.946274,4.435276,0
13,60287.1047,2,-0.388991,1.250426,0
13,60287.1123,1,0.622866,1.083712,0
13,60287.1200,3,-1.365885,1.796073,0
13,60287.1309,4,-1.566869,2.963949,0
13,60287.1418,5,-12.680235,7.013420,0
13,60290.0761,0,-0.996401,1.912932,0
13,60291.0689,0,-1.639395,1.661116,0
13,60292.0699,0,-0.087568,1.855572,0
13,60293.0699,0,3.384405,1.578842,0
13,60294.0708,0,0.056788,1.397227,0
13,60532.3019,2,24.529644,1.046373,1
13,60532.3097,1,23.404964,0.992504,1
13,60532.3173,3,36.069386,1.568371,1
13,60532.3282,4,42.765503,2.305008,1
13,60532.3392,5,36.567162,5.439748,1
13,60535.2802,2,23.662449,1.422315,1
13,60535.2879,1,20.202259,1.361849,1
13,60535.2957,3,39.966290,2.106815,1
13,60535.3068,4,39.323189,3.034410,1
13,60535.3177,5,25.412567,7.170496,1
13,60538.2826,2,19.899044,1.856537,1
13,60538.2903,1,14.983138,2.683123,1
13,60538.2980,3,32.445835,1.936947,1
13,60538.3089,4,37.566696,2.327296,1
13,60538.3199,5,36.802090,5.025538,1
13,60554.2651,0,2.890699,2.258298,0
13,60555.2411,0,1.421973,2.038275,0
13,60556.2370,0,5.816270,2.553418,0
13,60557.2322,0,2.976124,1.849058,0
13,60558.2332,0,3.465247,2.503530,0
13,60559.2274,0,2.022449,2.213130,0
13,60560.2268,0,3.503388,3.087183,0
13,60567.3291,2,14.211590,0.945795,1
13,60567.3368,1,8.278180,1.052801,1
13,60567.3444,3,24.237070,1.263879,1
13,60567.3553,4,31.986134,1.675546,1
13,60567.3663,5,26.146296,4.267163,1
13,60580.1736,2,10.443220,1.455966,1
13,60580.1813,1,4.004329,1.395011,0
13,60580.1889,3,21.601105,2.202044,1
13,60580.1999,4,33.927616,3.210988,1
13,60580.2108,5,31.062853,7.647927,0
13,60582.1681,0,1.094513,2.405975,0
13,60583.1640,0,-1.259893,2.979695,0
13,60584.1591,0,1.617939,2.344479,0
13,60585.1601,0,1.637716,1.874226,0
13,60586.1564,0,0.117864,1.815223,0
13,60587.1540,0,0.302183,2.121386,0
13,60588.1461,0,3.094076,1.792624,0
13,60593.1209,2,10.800429,1.153115,1
13,60593.1287,1,4.406773,1.308495,0
13,60593.1365,3,16.092447,1.545535,1
13,60593.1476,4,24.343342,2.181189,1
13,60593.1585,5,19.054943,5.111032,1
13,60596.1351,2,12.383826,2.473228,0
13,60596.1427,1,0.372005,3.515397,0
13,60596.1504,3,16.204580,2.651616,1
13,60596.1613,4,26.244993,3.235833,1
13,60596.1723,5,20.389833,7.186456,0
13,60605.0908,2,7.542615,0.977457,1
13,60605.0986,1,3.585346,0.917699,0
13,60605.1063,3,15.296724,1.463083,1
13,60605.1174,4,21.654158,2.154012,1
13,60605.1283,5,12.261124,5.138537,1
13,60608.0836,2,7.212741,0.886919,1
13,60608.0913,1,3.103603,0.823795,0
13,60608.0991,3,15.016788,1.333418,1
13,60608.1101,4,22.194675,1.964862,1
13,60608.1211,5,16.844475,4.664538,0
13,60611.0756,2,5.219585,0.772730,1
13,60611.0833,1,2.315308,0.703447,0
13,60611.0911,3,13.987851,1.177967,1
13,60611.1021,4,23.238731,1.752712,1
13,60611.1130,5,13.764248,4.170574,0
13,60612.0813,0,-0.500034,1.440229,0
13,60613.0818,0,1.002482,1.524789,0
13,60614.0803,0,-1.957333,1.736466,0
13,60615.0761,0,-1.864921,1.981273,0
13,60616.0769,0,-0.823935,2.007182,0
13,60617.0737,0,4.029193,1.584846,0
13,60620.1444,0,1.135807,1.713108,0
13,60621.2673,2,5.227184,0.942700,1
13,60621.2749,1,0.979372,0.945305,0
13,60621.2825,3,10.753699,1.650229,1
13,60621.2934,4,15.025331,2.743479,1
13,60621.3044,5,20.601212,7.516092,0
13,60624.1290,2,4.608896,1.191301,0
13,60624.1366,1,3.302126,1.565634,0
13,60624.1442,3,13.692765,1.502649,1
13,60624.1551,4,22.563152,2.025444,1
13,60624.1661,5,17.466129,4.765408,0
13,60633.0541,2,5.740388,1.090800,1
13,60633.0617,1,-0.248447,1.055601,0
13,60633.0693,3,11.721886,1.678442,1
13,60633.0803,4,21.894281,2.489034,1
13,60633.0912,5,8.198497,5.988672,0
13,60636.0482,2,4.445689,1.029320,0
13,60636.0558,1,0.510712,0.992636,0
13,60636.0635,3,8.838101,1.589973,0
13,60636.0744,4,18.146425,2.370814,1
13,60636.0854,5,20.537880,5.701680,0
13,60640.0972,2,2.708286,0.871447,0
13,60640.1049,1,0.387127,0.847005,0
13,60640.1125,3,11.844381,1.395034,1
13,60640.1234,4,16.336960,2.129130,1
13,60640.1344,5,22.784824,5.287734,0
13,60642.0643,0,-0.045857,2.075920,0
13,60643.0521,0,-0.339986,1.668015,0
13,60644.0621,0,-0.061904,1.665856,0
13,60645.0625,0,-1.683454,1.768579,0
13,60646.0636,0,-1.090810,1.782456,0
13,60647.0635,0,-0.917836,1.968023,0
13,60648.0642,0,-0.471162,1.443392,0
13,60652.1289,2,2.063019,0.939241,0
13,60652.1365,1,0.914091,1.117558,0
13,60652.1441,3,8.505517,1.381162,1
13,60652.1550,4,20.247869,2.050198,1
13,60652.1660,5,4.584575,5.200393,0
14,59798.3205,2,14.465278,1.364599,1
14,59798.3281,1,13.748290,1.165200,1
14,59798.3357,3,8.555202,1.766522,1
14,59798.3466,4,7.253281,2.603756,0
14,59798.3576,5,1.134257,5.365180,0
14,59801.3553,2,9.855556,2.449258,0
14,59801.3629,1,10.256726,3.538008,0
14,59801.3705,3,10.469539,2.286622,0
14,59801.3815,4,4.839510,2.516017,0
14,59801.3924,5,6.016302,5.381228,0
14,59818.2740,0,-1.523922,1.781903,0
14,59819.2541,0,6.043163,2.455132,0
14,59820.2522,0,-3.440181,1.895149,0
14,59821.2478,0,-1.079028,2.430641,0
14,59822.2433,0,-0.827454,2.848530,0
14,59823.2659,0,5.701578,2.706588,0
14,59826.3105,2,2.735408,0.921596,0
14,59826.3181,1,2.361817,0.925133,0
14,59826.3258,3,4.882393,1.509568,1
14,59826.3367,4,5.898125,2.157517,0
14,59826.3477,5,7.172944,5.536250,0
14,59842.2456,2,1.210632,0.849698,0
14,59842.2532,1,0.480856,0.813325,0
14,59842.2608,3,3.207553,1.321817,0
14,59842.2718,4,4.451898,2.166812,0
14,59842.2827,5,4.518466,6.066945,0
14,59851.1792,0,4.186630,2.586060,0
14,59854.1485,2,0.507158,1.223279,0
14,59854.1563,1,1.509088,1.335596,0
14,59854.1640,3,3.079645,1.901563,0
14,59854.1750,4,2.476080,2.769468,0
14,59854.1860,5,5.571058,5.787987,0
14,59857.1408,2,2.902585,2.129320,0
14,59857.1485,1,0.076045,2.911045,0
14,59857.1563,3,5.179635,2.093322,0
14,59857.1673,4,4.421298,2.676636,0
14,59857.1782,5,4.276851,6.626348,0
14,59867.1112,2,-0.034593,1.082066,0
14,59867.1189,1,-1.146521,0.972931,0
14,59867.1267,3,4.685015,1.540005,0
14,59867.1377,4,3.080778,2.356951,0
14,59867.1487,5,-3.318216,5.827800,0
14,59870.1049,2,0.554243,0.970361,0
14,59870.1126,1,0.375991,0.821212,0
14,59870.1204,3,4.517774,1.374431,0
14,59870.1314,4,1.906460,2.282754,0
14,59870.1424,5,-0.145795,5.276641,0
14,59873.0971,2,2.108685,1.041195,0
14,59873.1049,1,0.095400,1.040191,0
14,59873.1126,3,3.909108,1.442765,0
14,59873.1236,4,9.230726,2.071694,0
14,59873.1346,5,5.649127,4.588614,0
14,59874.1461,0,-3.214277,1.608558,0
14,59875.0995,0,2.425352,2.028404,0
14,59876.0980,0,3.646141,2.440229,0
14,59877.0976,0,-1.473760,1.759196,0
14,59878.0964,0,0.576515,2.010597,0
14,59879.0895,0,0.572945,1.861002,0
14,59880.1017,0,-0.224757,1.368790,0
14,59884.1760,2,-0.132164,0.767148,0
14,59884.1836,1,-0.991678,0.885456,0
14,59884.1913,3,2.358108,1.139322,0
14,59884.2022,4,3.557226,1.610064,0
14,59884.2132,5,1.323540,4.209188,0
14,59887.2856,2,-2.655194,1.824434,0
14,59887.2933,1,-3.118680,2.202677,0
14,59887.3009,3,4.665102,2.350384,0
14,59887.3118,4,1.135563,3.143775,0
14,59887.3228,5,4.491710,7.945457,0
14,59896.1307,2,-0.963295,0.821164,0
14,59896.1384,1,-0.071138,0.702119,0
14,59896.1460,3,2.277665,1.393450,0
14,59896.1569,4,7.248254,2.194077,0
14,59896.1679,5,4.658720,5.946997,0
14,59899.1519,2,-1.774250,1.363683,0
14,59899.1595,1,-1.174699,1.385324,0
14,59899.1672,3,-0.953666,2.376407,0
14,59899.1781,4,8.315199,3.709089,0
14,59899.1891,5,-5.444651,7.901953,0
14,59902.1384,2,0.053353,1.184878,0
14,59902.1460,1,-0.247240,1.076702,0
14,59902.1537,3,3.547660,1.943007,0
14,59902.1646,4,3.456085,2.762274,0
14,59902.1755,5,-2.950504,7.020600,0
14,59904.1053,0,4.177117,2.292928,0
14,59905.0555,0,-1.216755,2.497270,0
14,59906.0562,0,2.410480,2.032158,0
14,59907.0567,0,-1.391141,1.743943,0
14,59908.0681,0,3.657772,2.691214,0
14,59909.0582,0,0.521167,1.833233,0
14,59910.0503,0,-0.100852,1.860389,0
14,59914.0526,2,1.198847,1.437903,0
14,59914.0602,1,-4.574095,1.954059,0
14,59914.0678,3,0.367522,1.685196,0
14,59914.0788,4,2.276051,2.267016,0
14,59914.0897,5,10.662569,5.357585,0
14,59924.1060,2,0.898902,1.131097,0
14,59924.1136,1,-0.533751,1.571826,0
14,59924.1212,3,0.148105,1.727512,0
14,59924.1322,4,4.691455,2.527090,0
14,59924.1431,5,-2.120271,5.628747,0
14,59927.1074,2,-0.466719,1.189681,0
14,59927.1151,1,-0.370304,1.132207,0
14,59927.1227,3,2.069784,1.592095,0
14,59927.1336,4,2.988137,2.298649,0
14,59927.1446,5,2.182713,5.887877,0
14,59930.1236,2,0.240297,0.940370,0
14,59930.1312,1,-0.920183,0.827606,0
14,59930.1388,3,1.243185,1.282906,0
14,59930.1498,4,3.796952,2.032441,0
14,59930.1607,5,4.695318,5.329674,0
14,59933.1249,2,0.006509,0.690589,0
14,59933.1325,1,1.157228,0.695202,0
14,59933.1401,3,2.223172,1.269250,0
14,59933.1511,4,2.050447,1.913679,0
14,59933.1620,5,6.551545,5.426427,0
14,59934.0638,0,-1.038826,1.138865,0
14,59935.0646,0,-0.855022,1.120789,0
14,59936.0642,0,4.664287,1.541755,0
14,59937.0650,0,0.068496,1.637206,0
14,59938.0647,0,-0.037584,1.994424,0
14,59939.0650,0,0.482500,2.191997,0
14,60165.3032,2,-0.497132,1.193823,0
14,60165.3109,1,2.598282,1.347751,0
14,60165.3186,3,-2.635130,1.517240,0
14,60165.3295,4,5.500406,2.024001,0
14,60165.3405,5,-1.459476,4.779670,0
14,60168.2892,2,-0.132310,0.833159,0
14,60168.2970,1,1.305878,0.789044,0
14,60168.3047,3,1.487449,1.300279,0
14,60168.3157,4,2.190609,2.065560,0
14,60168.3267,5,-2.313916,4.814189,0
14,60176.2820,0,-2.258721,2.358804,0
14,60177.2726,0,-0.240173,2.761198,0
14,60181.4088,2,-0.118050,0.923237,0
14,60181.4164,1,-1.757872,1.260326,0
14,60181.4232,3,-0.566288,3.235162,0
14,60183.2660,2,-0.732747,1.913709,0
14,60183.2736,1,0.012732,2.931251,0
14,60183.2812,3,1.402672,2.546405,0
14,60183.2922,4,-1.725346,2.993038,0
14,60183.3031,5,0.494266,6.175346,0
14,60195.2812,2,3.029382,0.934420,0
14,60195.2888,1,-0.670324,1.018990,0
14,60195.2964,3,-0.662677,1.624419,0
14,60195.3073,4,-3.872562,2.337922,0
14,60195.3183,5,2.712977,6.272959,0
14,60198.2690,0,2.956521,2.177715,0
14,60199.2186,0,1.331341,2.156719,0
14,60200.2139,0,1.351745,2.588871,0
14,60201.2072,0,2.853797,2.498285,0
14,60202.2089,0,1.906982,2.486693,0
14,60209.1811,2,-1.602512,1.563901,0
14,60209.1888,1,-0.671917,1.407978,0
14,60209.1965,3,0.374204,1.830516,0
14,60209.2075,4,2.568392,2.908100,0
14,60209.2184,5,6.678216,6.695596,0
14,60212.1675,2,3.776216,1.935000,0
14,60212.1753,1,0.021062,2.747988,0
14,60212.1830,3,-1.807372,2.293340,0
14,60212.1941,4,2.568171,2.590221,0
14,60212.2050,5,10.840719,5.663691,0
14,60223.2416,2,-0.194212,1.181089,0
14,60223.2493,1,-0.036469,1.239146,0
14,60223.2569,3,1.530641,1.648199,0
14,60223.2678,4,1.801575,2.471934,0
14,60223.2788,5,4.744702,5.793488,0
14,60226.3337,2,-0.744695,0.943627,0
14,60226.3413,1,-0.106145,0.942899,0
14,60226.3489,3,2.442077,1.793019,0
14,60226.3599,4,1.509347,3.068994,0
14,60226.3708,5,-6.627614,8.105484,0
14,60238.3197,2,-0.863180,1.052267,0
14,60238.3273,1,1.557186,1.152551,0
14,60238.3349,3,-4.336288,2.169387,0
14,60238.3459,4,4.612921,3.828061,0
14,60238.3568,5,-2.021824,11.249375,0
14,60241.0870,2,0.738195,1.997376,0
14,60241.0948,1,-1.971425,3.061613,0
14,60241.1025,3,-2.025038,2.443142,0
14,60241.1136,4,-0.312927,2.915593,0
14,60241.1245,5,3.056638,5.938406,0
14,60250.1708,2,-2.482518,2.100286,0
14,60250.1957,1,-0.029834,2.806254,0
14,60250.2034,3,4.678575,2.926827,0
14,60250.2143,4,-3.449543,3.980825,0
14,60250.2253,5,-3.656875,7.932066,0
14,60261.1296,0,-3.352771,2.067494,0
14,60262.0550,0,-3.894481,2.299585,0
14,60263.0556,0,0.061202,1.966700,0
14,60264.0559,0,-0.441779,2.168793,0
14,60265.0780,0,-0.070926,2.402439,0
14,60268.0449,2,-2.115148,1.402396,0
14,60268.0525,1,-1.156284,1.846402,0
14,60268.0601,3,-0.248761,2.127820,0
14,60268.0711,4,1.514031,2.732877,0
14,60268.0820,5,-11.142164,6.741534,0
14,60278.0993,2,0.058940,1.642234,0
14,60278.1069,1,1.004120,2.270966,0
14,60278.1145,3,-0.427660,2.168000,0
14,60278.1255,4,0.148053,3.057248,0
14,60278.1364,5,14.839427,7.639682,0
14,60281.1023,2,0.190632,0.797410,0
14,60281.1099,1,-0.334674,0.793767,0
14,60281.1175,3,-1.839893,1.368291,0
14,60281.1285,4,-1.943368,1.989664,0
14,60281.1394,5,3.423336,5.175146,0
14,60284.1027,2,0.880585,0.836640,0
14,60284.1104,1,-1.071148,0.760074,0
14,60284.1180,3,-0.493422,1.159284,0
14,60284.1289,4,-1.769790,1.701805,0
14,60284.1399,5,10.491048,4.439059,0
14,60287.1047,2,-2.440768,1.249228,0
14,60287.1123,1,-0.587990,1.081420,0
14,60287.1200,3,-0.658860,1.794693,0
14,60287.1309,4,6.071679,2.963972,0
14,60287.1418,5,-0.912052,7.009711,0
14,60290.0761,0,0.791311,1.912837,0
14,60291.0689,0,-0.916465,1.657989,0
14,60292.0699,0,-0.940145,1.852110,0
14,60293.0699,0,-0.065888,1.558097,0
14,60294.0708,0,-0.362213,1.394291,0
14,60532.3019,2,-0.128229,1.022716,0
14,60532.3097,1,-0.763027,0.950918,0
14,60532.3173,3,-0.986068,1.539248,0
14,60532.3282,4,-0.845234,2.279716,0
14,60532.3392,5,8.809470,5.421995,0
14,60535.2802,2,-0.482045,1.404915,0
14,60535.2879,1,1.358840,1.336113,0
14,60535.2957,3,-0.531306,2.081220,0
14,60535.3068,4,-3.251427,3.016502,0
14,60535.3177,5,-1.253254,7.157213,0
14,60538.2826,2,1.802223,1.846149,0
14,60538.2903,1,-0.026881,2.671491,0
14,60538.2980,3,1.533429,1.916902,0
14,60538.3089,4,-0.856327,2.306201,0
14,60538.3199,5,-1.663799,5.002286,0
14,60554.2651,0,-1.160009,2.241721,0
14,60555.2411,0,-2.432893,2.027424,0
14,60556.2370,0,2.398724,2.534231,0
14,60557.2322,0,-1.568709,1.829169,0
14,60558.2332,0,-4.116055,2.484439,0
14,60559.2274,0,0.190265,2.200325,0
14,60560.2268,0,1.347325,3.073875,0
14,60567.3291,2,0.910389,0.934006,0
14,60567.3368,1,-1.444133,1.041686,0
14,60567.3444,3,-1.500671,1.243064,0
14,60567.3553,4,2.684830,1.653721,0
14,60567.3663,5,0.345307,4.248466,0
14,60580.1736,2,0.807077,1.449346,0
14,60580.1813,1,2.870951,1.391824,0
14,60580.1889,3,-0.262873,2.190222,0
14,60580.1999,4,-6.388680,3.196808,0
14,60580.2108,5,9.181289,7.635880,0
14,60582.1681,0,3.604234,2.412230,0
14,60583.1640,0,-2.491946,2.974151,0
14,60584.1591,0,0.669646,2.335869,0
14,60585.1601,0,0.574023,1.865054,0
14,60586.1564,0,-0.297618,1.811183,0
14,60587.1540,0,-1.218754,2.115987,0
14,60588.1461,0,-1.992268,1.771815,0
14,60593.1209,2,-0.511945,1.144706,0
14,60593.1287,1,-0.914986,1.302187,0
14,60593.1365,3,-1.629705,1.534286,0
14,60593.1476,4,-3.500163,2.167956,0
14,60593.1585,5,-7.225812,5.098632,0
14,60596.1351,2,1.942903,2.467501,0
14,60596.1427,1,-3.828583,3.510221,0
14,60596.1504,3,-0.379595,2.643848,0
14,60596.1613,4,-4.746652,3.225241,0
14,60596.1723,5,-0.740908,7.175334,0
14,60605.0908,2,-0.240784,0.970734,0
14,60605.0986,1,1.228715,0.912676,0
14,60605.1063,3,-0.173487,1.451932,0
14,60605.1174,4,-3.157050,2.142289,0
14,60605.1283,5,-4.353776,5.129833,0
14,60608.0836,2,-1.290518,0.880033,0
14,60608.0913,1,0.897031,0.818780,0
14,60608.0991,3,-1.789510,1.321652,0
14,60608.1101,4,-0.283683,1.951819,0
14,60608.1211,5,-1.071998,4.652730,0
14,60611.0756,2,0.428420,0.767492,0
14,60611.0833,1,-0.175538,0.697855,0
14,60611.0911,3,-1.351069,1.165860,0
14,60611.1021,4,3.187355,1.739082,0
14,60611.1130,5,-2.026892,4.159950,0
14,60612.0813,0,1.714202,1.449054,0
14,60613.0818,0,-1.208979,1.515573,0
14,60614.0803,0,2.366692,1.746634,0
14,60615.0761,0,0.562962,1.980418,0
14,60616.0769,0,-0.108613,2.003422,0
14,60617.0737,0,1.100774,1.564082,0
14,60620.1444,0,-3.990349,1.704574,0
14,60621.2673,2,-1.006580,0.937842,0
14,60621.2749,1,-0.619488,0.942508,0
14,60621.2825,3,1.860522,1.643903,0
14,60621.2934,4,1.823893,2.737007,0
14,60621.3044,5,-2.549210,7.504488,0
14,60624.1290,2,-1.234065,1.187694,0
14,60624.1366,1,0.769079,1.561521,0
14,60624.1442,3,-1.651142,1.493277,0
14,60624.1551,4,-1.015730,2.012733,0
14,60624.1661,5,-1.555369,4.753550,0
14,60633.0541,2,0.136783,1.086145,0
14,60633.0617,1,-0.152341,1.054063,0
14,60633.0693,3,-0.510173,1.670818,0
14,60633.0803,4,0.315384,2.478524,0
14,60633.0912,5,-6.652580,5.982395,0
14,60636.0482,2,-0.451914,1.025376,0
14,60636.0558,1,0.286247,0.990897,0
14,60636.0635,3,0.255769,1.584027,0
14,60636.0744,4,-0.340420,2.361790,0
14,60636.0854,5,9.500138,5.693579,0
14,60640.0972,2,0.332550,0.868813,0
14,60640.1049,1,-0.473409,0.845206,0
14,60640.1125,3,0.729877,1.386701,0
14,60640.1234,4,-1.934210,2.120461,0
14,60640.1344,5,-4.023204,5.273717,0
14,60642.0643,0,6.371965,2.098372,0
14,60643.0521,0,0.069668,1.665262,0
14,60644.0621,0,-0.031334,1.662740,0
14,60645.0625,0,0.388892,1.767081,0
14,60646.0636,0,-0.740859,1.779115,0
14,60647.0635,0,3.793745,1.980666,0
14,60648.0642,0,-2.506170,1.440771,0
14,60652.1289,2,2.536718,0.938627,0
14,60652.1365,1,-0.345612,1.114977,0
14,60652.1441,3,-0.218325,1.374793,0
14,60652.1550,4,3.384219,2.040273,0
14,60652.1660,5,5.932649,5.198509,0
17,59750.4229,2,0.384775,1.502702,0
17,59750.4306,1,2.970657,2.100801,0
17,59750.4383,3,-3.890317,2.298941,0
17,59750.4450,4,-7.424517,10.310197,0
17,59752.4070,2,1.180267,1.063318,0
17,59752.4147,1,-0.427451,1.103617,0
17,59752.4224,3,0.804619,1.459435,0
17,59752.4334,4,0.082026,2.345975,0
17,59752.4435,5,-12.127212,9.469489,0
17,59767.2968,2,0.300545,0.879810,0
17,59767.3045,1,-0.789039,0.757829,0
17,59767.3122,3,-2.516992,1.335309,0
17,59767.3233,4,-0.305087,2.137903,0
17,59767.3343,5,-2.361575,4.981940,0
17,59770.2179,2,2.652607,2.031536,0
17,59770.2256,1,0.104747,2.751110,0
17,59770.2334,3,-1.251170,2.138434,0
17,59770.2445,4,1.627103,2.665143,0
17,59770.2557,5,3.234555,6.275570,0
17,59779.3188,2,-1.560282,2.314641,0
17,59779.3265,1,1.138921,2.800010,0
17,59779.3342,3,1.399184,2.246128,0
17,59779.3452,4,-4.086601,2.966733,0
17,59779.3562,5,3.600588,7.075419,0
17,59782.1897,2,-0.205182,1.441666,0
17,59782.1974,1,-0.008989,1.378359,0
17,59782.2051,3,-3.370768,2.336834,0
17,59782.2162,4,-1.506593,3.089562,0
17,59782.2274,5,-1.340868,7.552328,0
17,59797.2861,2,0.574704,1.059202,0
17,59797.2938,1,-1.925344,1.095736,0
17,59797.3015,3,-1.303792,1.916743,0
17,59797.3126,4,-0.918545,2.786953,0
17,59797.3237,5,-7.784748,6.963860,0
17,59800.3168,2,0.594011,2.277387,0
17,59800.3244,1,6.002035,3.310603,0
17,59800.3320,3,0.828272,2.636445,0
17,59800.3429,4,-2.143990,3.045394,0
17,59800.3539,5,2.182031,6.526192,0
17,59807.1738,2,0.345367,1.600768,0
17,59807.1815,1,1.497795,2.061084,0
17,59807.1892,3,-1.738689,1.721755,0
17,59807.2003,4,-2.008608,2.187723,0
17,59807.2114,5,-4.591490,5.073585,0
17,59810.1045,2,-0.190427,1.024519,0
17,59810.1122,1,1.409750,0.987204,0
17,59810.1200,3,2.097727,1.766024,0
17,59810.1311,4,4.661858,2.723413,0
17,59810.1422,5,1.852001,6.541757,0
17,59813.1044,2,-0.609008,0.979608,0
17,59813.1122,1,-1.283370,0.947661,0
17,59813.1199,3,0.443231,1.563090,0
17,59813.1310,4,0.463331,2.578759,0
17,59813.1422,5,-4.206830,5.891186,0
17,59819.1532,0,-0.111748,1.900487,0
17,59820.1047,0,-1.127300,1.845100,0
17,59821.1026,0,-2.983045,2.653126,0
17,59822.1105,0,-3.786463,3.135153,0
17,59823.1505,0,-6.251946,2.924953,0
17,59835.0600,2,-0.105554,1.651957,0
17,59835.0678,1,-3.813735,2.461054,0
17,59835.0755,3,-5.183056,2.248735,0
17,59835.0866,4,-3.032220,2.768835,0
17,59835.0978,5,-1.849971,5.832443,0
17,59839.0306,2,1.615393,1.178528,0
17,59839.0384,1,-0.088986,1.136387,0
17,59839.0461,3,-3.522223,1.587615,0
17,59839.0573,4,-2.323542,2.134739,0
17,59839.0684,5,2.161493,5.221899,0
17,59842.0207,2,1.318505,1.033401,0
17,59842.0285,1,0.922532,1.039005,0
17,59842.0362,3,1.569138,1.599077,0
17,59842.0473,4,3.807324,2.315629,0
17,59842.0585,5,-1.946610,5.253097,0
17,59851.1114,0,0.466809,1.833261,0
17,59854.0796,2,-0.432977,1.106505,0
17,59854.0873,1,-0.349620,1.186553,0
17,59854.0950,3,-1.028248,1.475277,0
17,59854.1061,4,0.624902,2.189718,0
17,59854.1172,5,-6.577788,5.223208,0
17,59857.0453,2,1.048279,1.770880,0
17,59857.0531,1,4.293002,2.442868,0
17,59857.0608,3,0.670081,1.885174,0
17,59857.0719,4,-1.393756,2.439591,0
17,59857.0830,5,7.867962,5.441516,0
17,59864.0162,2,1.446581,0.850881,0
17,59864.0239,1,1.881013,0.881634,0
17,59864.0316,3,1.176724,1.403741,0
17,59864.0428,4,1.061918,2.333516,0
17,59864.0539,5,7.476207,5.917978,0
17,59867.0178,2,0.380713,1.309081,0
17,59867.0255,1,-3.510653,1.302897,0
17,59867.0332,3,-2.578208,2.183999,0
17,59867.0443,4,1.046922,3.227464,0
17,59867.0554,5,-14.202744,9.457583,0
17,59870.0194,2,-1.554325,0.927003,0
17,59870.0272,1,-0.831158,0.920545,0
17,59870.0349,3,-1.187623,1.500118,0
17,59870.0459,4,2.125045,2.528746,0
17,59870.0571,5,2.077787,5.911781,0
17,59873.0212,2,-0.457801,0.738598,0
17,59873.0289,1,2.796051,0.760300,0
17,59873.0366,3,0.138800,1.232345,0
17,59873.0477,4,0.273784,1.886916,0
17,59873.0588,5,-5.356641,4.801971,0
17,59874.0599,0,-2.398834,1.584590,0
17,59875.0311,0,-0.540180,1.315395,0
17,59876.0231,0,0.411913,2.120848,0
17,59877.0238,0,-0.891288,1.592397,0
17,59878.0246,0,0.252658,1.625712,0
17,59879.0248,0,0.494818,1.668736,0
17,59880.0258,0,1.609941,1.357399,0
17,59884.0823,2,1.431002,1.091537,0
17,59884.0900,1,-0.669697,1.493729,0
17,59884.0976,3,0.632236,1.522321,0
17,59884.1085,4,2.388964,2.008811,0
17,59884.1195,5,-3.279281,4.883777,0
17,59887.0298,2,-1.951323,1.872626,0
17,59887.0375,1,-0.140049,2.947258,0
17,59887.0451,3,-0.987463,2.277290,0
17,59887.0562,4,-1.669648,2.256846,0
17,59887.0673,5,4.776433,5.354571,0
17,60118.4163,0,1.237189,1.560750,0
17,60124.2541,2,-0.027002,2.279922,0
17,60124.2618,1,-1.582832,2.932222,0
17,60124.2695,3,-2.243939,2.501337,0
17,60124.2807,4,-2.010008,2.976800,0
17,60124.2918,5,-3.812106,6.686803,0
17,60140.2290,0,2.889142,1.885088,0
17,60141.2225,0,-2.675623,3.532677,0
17,60142.2202,0,3.701951,3.092139,0
17,60143.2212,0,0.107055,2.421978,0
17,60144.2186,0,2.121859,1.934957,0
17,60145.2123,0,3.519005,2.431906,0
17,60153.2274,2,1.331808,1.381732,0
17,60153.2351,1,1.678943,1.884794,0
17,60153.2428,3,-0.669065,1.646313,0
17,60153.2539,4,-2.336847,2.069594,0
17,60153.2650,5,-3.040281,5.269622,0
17,60162.1477,2,0.274822,1.883733,0
17,60162.1554,1,-1.090832,2.591505,0
17,60162.1631,3,-4.857473,2.765956,0
17,60162.1742,4,-10.275881,3.141578,0
17,60162.1853,5,-9.252888,7.983027,0
17,60165.1369,2,-0.468279,0.846974,0
17,60165.1446,1,-0.430437,0.708165,0
17,60165.1524,3,-0.581242,1.364931,0
17,60165.1635,4,3.187243,2.182051,0
17,60165.1746,5,6.814413,5.636336,0
17,60168.1260,2,1.795461,0.881347,0
17,60168.1337,1,-0.350579,0.854263,0
17,60168.1414,3,-2.100255,1.659393,0
17,60168.1525,4,-1.526452,2.377539,0
17,60168.1637,5,2.514493,5.409767,0
17,60176.1332,0,-1.022222,2.365174,0
17,60177.1370,0,-3.289955,2.790762,0
17,60181.3147,2,-1.503265,1.258997,0
17,60181.3223,1,1.620242,1.237350,0
17,60181.3299,3,2.305993,2.212692,0
17,60181.3409,4,-0.697285,4.016526,0
17,60181.3518,5,3.620666,8.753998,0
17,60184.3625,2,-0.132555,2.135256,0
17,60184.3701,1,-0.614091,2.560603,0
17,60184.3777,3,0.653611,2.486900,0
17,60184.3887,4,1.677764,2.891815,0
17,60184.3996,5,7.855544,6.898280,0
17,60194.1575,2,1.309505,1.067185,0
17,60194.1652,1,-0.309205,1.053532,0
17,60194.1729,3,-0.334617,1.786434,0
17,60194.1839,4,0.655740,3.139240,0
17,60194.1926,5,5.044827,11.278636,0
17,60197.1181,2,0.552177,1.097758,0
17,60197.1258,1,0.541816,0.909236,0
17,60197.1335,3,0.639729,1.336890,0
17,60197.1446,4,-0.978055,2.037800,0
17,60197.1557,5,4.619338,4.987423,0
17,60198.1077,0,2.149141,2.452106,0
17,60199.0914,0,5.486719,3.143868,0
17,60200.0650,0,1.228997,2.524841,0
17,60201.0680,0,6.056491,2.897650,0
17,60202.0552,0,3.771713,2.094126,0
17,60206.1107,0,1.964452,1.559410,0
17,60207.1469,0,1.315419,2.819893,0
17,60208.0229,2,-0.600505,1.423023,0
17,60208.0307,1,0.017527,1.343573,0
17,60208.0384,3,-1.550505,1.831250,0
17,60208.0495,4,3.882280,2.726225,0
17,60208.0606,5,9.559813,7.414577,0
17,60211.0124,2,1.860704,2.250480,0
17,60211.0202,1,-2.090354,3.034887,0
17,60211.0279,3,0.410192,2.599820,0
17,60211.0390,4,-1.062310,3.468099,0
17,60211.0502,5,8.321412,9.169344,0
17,60221.0153,2,0.448097,0.997295,0
17,60221.0230,1,0.082163,0.883182,0
17,60221.0308,3,1.515744,1.365631,0
17,60221.0419,4,-0.955922,2.171834,0
17,60221.0530,5,6.189006,5.659595,0
17,60224.0140,2,-0.977404,1.146213,0
17,60224.0217,1,-1.527085,1.044619,0
17,60224.0294,3,-2.026714,1.896492,0
17,60224.0405,4,-2.270058,2.563563,0
17,60224.0516,5,1.532559,5.857242,0
17,60227.0151,2,0.269555,1.054280,0
17,60227.0228,1,1.115057,0.999069,0
17,60227.0305,3,1.552577,1.598891,0
17,60227.0416,4,0.899977,2.226238,0
17,60227.0527,5,-13.314797,5.586868,0
17,60228.0187,0,1.561344,2.716171,0
17,60229.0162,0,2.115411,2.732387,0
17,60234.0265,0,-0.632177,2.254489,0
17,60237.2206,2,0.991648,0.921157,0
17,60237.2283,1,1.011999,0.870146,0
17,60237.2359,3,1.932343,1.516595,0
17,60237.2468,4,0.683186,2.506894,0
17,60237.2578,5,-2.942844,7.042232,0
17,60240.0223,2,0.455927,1.997859,0
17,60240.0300,1,2.247968,2.647892,0
17,60240.0377,3,6.028008,2.647428,0
17,60240.0488,4,2.535219,3.012484,0
17,60240.0598,5,8.448805,5.811503,0
17,60249.0338,2,0.460854,0.866548,0
17,60249.0415,1,0.452554,0.772836,0
17,60249.0492,3,-1.191506,1.286252,0
17,60249.0602,4,2.830095,2.116775,0
17,60249.0712,5,-4.378712,5.177276,0
17,60260.0423,0,-7.464523,2.644202,0
17,60261.0361,0,0.930443,1.914282,0
17,60262.0367,0,4.964463,1.854471,0
17,60263.0373,0,0.783741,2.196650,0
17,60264.0465,0,-3.080768,1.916776,0
17,60490.2647,2,-0.977129,1.482041,0
17,60490.2725,1,-2.611819,1.794772,0
17,60490.2802,3,1.078630,2.071858,0
17,60490.2913,4,-0.492853,2.955702,0
17,60490.3024,5,3.792550,7.104066,0
17,60493.2372,2,0.021979,1.054925,0
17,60493.2450,1,-1.376170,0.980580,0
17,60493.2527,3,1.015628,1.668951,0
17,60493.2639,4,-0.002133,2.546342,0
17,60493.2750,5,-11.551120,6.173368,0
17,60499.2467,0,0.438197,2.600874,0
17,60500.2437,0,-1.686091,2.074696,0
17,60501.2385,0,0.020948,2.299457,0
17,60502.2355,0,1.068030,2.629417,0
17,60508.2638,2,-2.790141,2.436066,0
17,60508.2715,1,5.700086,3.490426,0
17,60508.2792,3,0.113332,2.677060,0
17,60508.2903,4,-1.245703,3.280095,0
17,60508.3014,5,16.609587,7.427779,0
17,60524.2390,0,-0.490797,2.080026,0
17,60525.1736,0,2.666769,2.941305,0
17,60532.3489,2,-0.207433,0.864004,0
17,60532.3565,1,-0.134437,0.845488,0
17,60532.3641,3,-0.515472,1.462743,0
17,60532.3751,4,0.897895,2.323136,0
17,60532.3860,5,1.356515,5.973135,0
17,60535.1253,2,3.357187,1.895133,0
17,60535.1330,1,1.788090,2.447693,0
17,60535.1408,3,-0.746670,2.393641,0
17,60535.1519,4,1.761451,3.298109,0
17,60535.1630,5,-3.134825,7.769337,0
17,60538.2351,2,7.960939,1.803240,0
17,60538.2428,1,6.926830,2.602727,0
17,60538.2505,3,3.353295,1.972350,0
17,60538.2615,4,5.710919,2.318584,0
17,60538.2725,5,0.716567,5.113697,0
17,60546.3406,2,12.973317,1.903937,1
17,60546.3482,1,11.756248,2.837219,1
17,60546.3558,3,10.891992,2.308349,0
17,60546.3668,4,10.706098,3.126808,0
17,60546.3777,5,1.998101,7.646282,0
17,60549.0879,2,8.457420,1.114208,1
17,60549.0956,1,10.614448,1.048073,1
17,60549.1034,3,9.131392,1.747332,1
17,60549.1145,4,12.109291,2.651627,0
17,60549.1256,5,-0.454903,6.443316,0
17,60554.0964,0,3.385612,2.492159,0
17,60555.0951,0,-0.653454,2.008215,0
17,60556.0879,0,4.359657,2.500103,0
17,60557.0831,0,0.208935,1.804377,0
17,60558.1093,0,2.002906,2.298758,0
17,60559.1097,0,-1.713623,1.974990,0
17,60560.1065,0,-4.030066,2.814673,0
17,60567.2821,2,5.086202,1.698282,0
17,60567.2897,1,4.862064,2.316888,0
17,60567.2973,3,2.821583,1.933260,0
17,60567.3083,4,8.017381,2.382588,0
17,60567.3192,5,5.673804,5.440264,0
17,60574.1118,2,4.409955,1.550580,0
17,60574.1195,1,2.795052,2.196265,0
17,60574.1272,3,4.197219,1.987329,0
17,60574.1383,4,10.014592,2.528254,0
17,60574.1493,5,12.793771,5.745250,0
17,60577.0186,2,4.357023,1.470498,0
17,60577.0263,1,1.377891,1.400257,0
17,60577.0340,3,3.097299,2.332945,0
17,60577.0451,4,5.029422,3.520210,0
17,60577.0563,5,9.703295,8.530192,0
17,60580.0095,2,0.080061,1.449560,0
17,60580.0173,1,0.636153,1.387539,0
17,60580.0250,3,1.814262,2.309473,0
17,60580.0361,4,2.268791,3.480996,0
17,60580.0472,5,3.766253,8.433430,0
17,60582.0840,0,2.160855,2.057876,0
17,60583.0169,0,-2.377121,2.935992,0
17,60584.0117,0,-7.030778,2.272231,0
17,60585.0117,0,-2.432227,1.843772,0
17,60586.0123,0,0.942960,1.782520,0
17,60587.0127,0,-0.601708,2.076788,0
17,60588.0131,0,-2.069814,1.647215,0
17,60593.0636,2,0.346279,0.950896,0
17,60593.0713,1,1.045972,1.126506,0
17,60593.0790,3,1.954885,1.349871,0
17,60593.0901,4,4.170722,1.979371,0
17,60593.1012,5,5.737381,4.821476,0
17,60596.0304,2,1.884329,2.578470,0
17,60596.0381,1,-2.693286,3.627853,0
17,60596.0458,3,0.705422,2.812787,0
17,60596.0569,4,1.202975,3.396812,0
17,60596.0680,5,16.761280,7.451248,0
17,60603.0208,2,0.386061,0.711484,0
17,60603.0286,1,-0.007518,0.663680,0
17,60603.0363,3,0.688574,1.169908,0
17,60603.0473,4,4.215033,1.826377,0
17,60603.0584,5,4.616831,4.548802,0
17,60606.0225,2,-0.884437,1.024675,0
17,60606.0303,1,-0.808401,0.984848,0
17,60606.0379,3,1.421415,1.656912,0
17,60606.0490,4,-0.165550,2.551519,0
17,60606.0601,5,-0.249700,6.335219,0
17,60609.0247,2,0.342698,0.744517,0
17,60609.0323,1,0.365452,0.700571,0
17,60609.0400,3,3.474475,1.220512,0
17,60609.0510,4,5.966081,1.906627,0
17,60609.0621,5,-4.503197,4.756433,0
17,60612.0266,0,-1.850411,1.158724,0
17,60613.0269,0,-1.095945,1.252873,0
17,60614.0276,0,1.658578,1.438634,0
17,60615.0375,0,-1.868202,1.603315,0


================================================
FILE: examples/data/plasticc_test_set_metadata_1k.csv
================================================
object_id,ra,decl,gal_l,gal_b,ddf,hostgal_specz,hostgal_photoz,hostgal_photoz_err,distmod,mwebv
13,34.453125,-5.229529,169.987075,-59.956185,1,0.3048,0.3193,0.0542,41.1123,0.019
14,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.6323,0.0179,42.8774,0.018
17,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.8297,0.0605,43.6000,0.016
23,34.804688,-5.829153,171.307861,-60.174401,1,nan,0.6533,0.1479,42.9640,0.023
34,351.321442,-64.198746,317.458993,-50.429931,1,0.4557,0.4617,0.0122,42.0540,0.023
35,35.332031,-5.979157,172.286722,-59.931743,1,nan,0.8388,0.0375,43.6290,0.022
43,0.574468,-45.981140,327.041068,-68.778764,1,nan,0.6669,0.0546,43.0186,0.006
50,0.574468,-45.981140,327.041068,-68.778764,1,nan,1.4663,0.0529,45.1281,0.006
60,346.562500,-63.448284,320.824720,-49.866957,1,nan,0.9462,0.0116,43.9519,0.021
69,349.160583,-64.760857,318.219706,-49.458924,1,nan,1.0432,0.1092,44.2138,0.020
88,349.160583,-64.760857,318.219706,-49.458924,1,0.1608,0.1650,0.0053,39.4929,0.020
96,151.171875,2.537361,237.288526,43.169764,1,0.3277,0.3680,0.0340,41.4711,0.024
106,1.666667,-44.399834,327.519190,-70.529554,1,nan,0.8532,0.0602,43.6747,0.009
114,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.7996,0.2747,43.5011,0.020
115,151.347656,4.181528,235.568369,44.259942,1,nan,0.8979,0.0515,43.8114,0.016
116,150.468750,1.641510,237.714575,42.075234,1,nan,1.1244,0.0363,44.4151,0.017
130,34.277344,-5.679190,170.314930,-60.410322,1,0.3395,0.3368,0.0728,41.2464,0.020
142,1.694561,-45.191612,326.278557,-69.858253,1,nan,1.2710,0.0796,44.7444,0.011
147,150.820312,1.641510,237.994507,42.358984,1,nan,0.2904,0.1155,40.8738,0.020
151,151.171875,1.342993,238.602520,42.464379,1,nan,0.5090,0.0122,42.3075,0.026
168,349.429535,-62.508568,320.039643,-51.393745,1,nan,0.0000,0.0000,nan,0.020
171,52.910156,-27.953188,223.774083,-54.639214,1,nan,0.8623,0.0583,43.7031,0.007
173,150.996094,4.181528,235.291975,43.970869,1,nan,0.4490,0.0219,41.9820,0.015
176,52.910156,-27.953188,223.774083,-54.639214,1,0.3775,0.3642,0.0064,41.4450,0.007
184,352.711273,-63.823658,316.922299,-51.059403,1,nan,0.9112,0.0513,43.8508,0.024
186,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.9302,0.0118,43.9062,0.011
195,152.050781,2.985506,237.495952,44.143927,1,nan,0.4658,0.0250,42.0768,0.019
198,1.694561,-45.191612,326.278557,-69.858253,1,0.4060,0.3959,0.0146,41.6579,0.011
204,349.046051,-61.943836,320.796530,-51.753706,1,0.5584,0.4997,0.0312,42.2594,0.017
211,53.613281,-27.953188,223.929533,-54.024772,1,0.5469,0.5644,0.0113,42.5781,0.007
216,150.820312,1.641510,237.994507,42.358984,1,nan,0.4056,0.0489,41.7202,0.020
236,2.457983,-45.389202,324.632685,-69.945696,1,0.3436,0.2885,0.0162,40.8574,0.011
240,151.171875,2.537361,237.288526,43.169764,1,nan,1.0936,0.0318,44.3405,0.024
260,150.820312,3.732834,235.666318,43.572109,1,nan,0.7554,0.0425,43.3496,0.016
268,149.589844,3.583322,234.885369,42.474696,1,nan,0.6234,0.0184,42.8401,0.024
272,149.414062,3.433834,234.919132,42.245550,1,nan,0.7059,0.0220,43.1693,0.027
277,348.595886,-63.072620,320.023289,-50.713060,1,nan,0.8751,0.0187,43.7426,0.021
289,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.7442,0.0682,43.3099,0.006
306,148.886719,2.686724,235.347248,41.389003,1,0.7180,0.7265,0.0182,43.2458,0.028
316,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.5615,0.1166,42.5647,0.017
337,150.117188,2.836105,236.124718,42.483719,1,nan,1.4098,0.0499,45.0228,0.016
349,34.453125,-5.229529,169.987075,-59.956185,1,nan,0.7679,0.0318,43.3934,0.019
357,349.966217,-62.696659,319.542989,-51.376556,1,nan,0.8937,0.0213,43.7988,0.021
366,53.613281,-26.944359,222.237403,-53.863858,1,nan,1.3577,0.2274,44.9217,0.009
384,359.816315,-44.003082,331.451340,-70.123054,1,nan,0.8134,0.0374,43.5469,0.013
402,349.891296,-64.573555,317.972107,-49.786192,1,nan,0.5684,0.0804,42.5965,0.023
406,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.8989,0.0967,43.8145,0.018
409,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.9110,0.0551,43.8503,0.017
413,349.429535,-62.508568,320.039643,-51.393745,1,0.6430,0.6411,0.0083,42.9139,0.020
443,150.996094,2.388015,237.313912,42.939977,1,0.3682,0.3649,0.0113,41.4497,0.021
451,349.615387,-63.636005,318.927246,-50.506542,1,nan,0.8853,0.0298,43.7734,0.018
455,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.4671,0.0117,42.0842,0.011
466,34.277344,-5.079716,169.526841,-59.956640,1,0.4986,0.5527,0.0171,42.5229,0.019
467,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.6573,0.0463,42.9800,0.009
478,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.7283,0.0517,43.2524,0.008
483,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.7835,0.0575,43.4469,0.016
489,0.574468,-45.981140,327.041068,-68.778764,1,nan,0.5798,0.0174,42.6490,0.006
524,34.277344,-5.079716,169.526841,-59.956640,1,0.4067,0.3530,0.0854,41.3660,0.019
561,349.891296,-64.573555,317.972107,-49.786192,1,nan,0.9267,0.0135,43.8959,0.023
565,152.050781,3.284369,237.157374,44.318466,1,nan,0.8253,0.0201,43.5857,0.019
568,53.789062,-27.784405,223.685697,-53.845803,1,nan,1.0649,0.2186,44.2692,0.009
583,149.414062,2.238686,236.239766,41.565558,1,0.7070,0.7129,0.0137,43.1952,0.017
607,349.160583,-64.760857,318.219706,-49.458924,1,0.2424,0.4948,0.3294,42.2339,0.020
611,149.589844,3.583322,234.885369,42.474696,1,0.5068,0.5222,0.0459,42.3744,0.024
613,150.820312,1.641510,237.994507,42.358984,1,0.3014,0.6138,0.3121,42.7991,0.020
622,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.9127,0.3417,43.8552,0.009
639,52.207031,-28.630989,224.800211,-55.343637,1,nan,0.6900,0.0255,43.1086,0.009
662,33.750000,-4.630479,168.146242,-59.949072,1,0.4181,0.4889,0.0244,42.2029,0.019
670,51.855469,-28.630989,224.733260,-55.649872,1,nan,1.3168,0.1915,44.8395,0.009
672,350.230255,-61.943836,320.053946,-52.070537,1,nan,0.9496,0.0789,43.9616,0.017
674,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.6032,0.0466,42.7531,0.018
680,32.871094,-4.780192,166.959493,-60.615132,1,nan,1.4883,0.2383,45.1681,0.017
683,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.8370,0.0432,43.6235,0.011
686,358.665253,-45.783966,330.353593,-68.203652,1,nan,1.3769,0.2502,44.9594,0.009
687,150.468750,3.732834,235.392208,43.283244,1,nan,0.3617,0.2373,41.4273,0.020
694,34.453125,-5.229529,169.987075,-59.956185,1,0.4544,0.4391,0.0361,41.9241,0.019
699,0.589520,-47.161343,325.385896,-67.769893,1,0.5659,0.5592,0.0064,42.5537,0.009
721,358.665253,-45.783966,330.353593,-68.203652,1,0.4664,0.4355,0.3217,41.9033,0.009
725,359.446716,-44.201530,331.730015,-69.805709,1,nan,0.7186,0.0156,43.2165,0.010
729,52.207031,-26.610098,221.298836,-55.042928,1,nan,0.7994,0.0124,43.5005,0.014
731,346.276581,-64.011238,320.448031,-49.344136,1,0.5315,0.5418,0.0087,42.4710,0.019
734,34.804688,-5.829153,171.307861,-60.174401,1,nan,0.9281,0.0119,43.9001,0.023
747,54.667969,-27.615883,223.610785,-53.050840,1,nan,1.3670,0.0656,44.9399,0.009
759,151.347656,3.583322,236.252362,43.918627,1,nan,1.5633,0.2334,45.2997,0.015
779,347.861847,-61.943836,321.519104,-51.424048,1,0.7469,0.7381,0.0118,43.2877,0.017
793,359.058563,-45.191612,330.695783,-68.844915,1,nan,0.6826,0.0160,43.0802,0.011
810,35.332031,-5.979157,172.286722,-59.931743,1,nan,0.9306,0.0189,43.9073,0.022
830,1.694561,-45.191612,326.278557,-69.858253,1,nan,0.6893,0.0081,43.1060,0.011
833,150.292969,2.686724,236.427488,42.541447,1,nan,0.4998,0.0202,42.2604,0.016
834,359.058563,-45.191612,330.695783,-68.844915,1,0.5767,0.5866,0.0128,42.6794,0.011
843,53.789062,-27.784405,223.685697,-53.845803,1,nan,0.8523,0.0367,43.6717,0.009
868,35.332031,-5.979157,172.286722,-59.931743,1,0.3881,0.3855,1.3203,41.5900,0.022
883,53.261719,-27.615883,223.280041,-54.281374,1,nan,0.8622,0.0603,43.7027,0.006
886,33.574219,-4.780192,168.064587,-60.175886,1,0.4615,0.4476,0.7778,41.9738,0.019
887,358.648071,-46.375080,329.462659,-67.716008,1,nan,1.2642,0.2378,44.7300,0.009
888,359.814819,-44.399834,330.775011,-69.801007,1,nan,1.2159,0.1488,44.6254,0.009
905,0.189873,-45.586655,328.254458,-68.969298,1,nan,0.6630,0.0275,43.0028,0.007
916,150.292969,2.686724,236.427488,42.541447,1,0.4052,0.4393,0.0217,41.9253,0.016
917,1.666667,-44.399834,327.519190,-70.529554,1,nan,1.3559,0.2676,44.9180,0.009
943,2.457983,-45.389202,324.632685,-69.945696,1,0.8582,0.8663,0.0291,43.7153,0.011
946,359.446716,-44.201530,331.730015,-69.805709,1,nan,1.5205,0.0959,45.2254,0.010
960,351.382965,-64.011238,317.574052,-50.604657,1,nan,0.6480,0.0129,42.9424,0.023
962,150.820312,1.641510,237.994507,42.358984,1,nan,0.6779,0.0117,43.0616,0.020
965,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.6636,0.0081,43.0054,0.011
968,52.910156,-27.953188,223.774083,-54.639214,1,nan,0.2472,0.0116,40.4743,0.007
978,34.101562,-5.829153,170.247753,-60.638325,1,0.4938,0.4954,0.0349,42.2373,0.019
979,148.710938,2.836105,235.050801,41.328739,1,nan,1.1407,0.1072,44.4539,0.031
983,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.3106,0.0065,41.0429,0.014
1017,351.299988,-62.320400,319.038597,-52.026867,1,nan,1.4323,0.0481,45.0652,0.018
1018,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.9262,0.0111,43.8945,0.018
1020,149.238281,3.882372,234.283829,42.351155,1,nan,1.0264,0.2278,44.1701,0.033
1030,352.711273,-63.823658,316.922299,-51.059403,1,nan,1.2671,0.0235,44.7362,0.024
1039,150.644531,3.583322,235.698235,43.342784,1,nan,0.7124,0.0358,43.1936,0.018
1049,1.723404,-45.981140,325.117958,-69.180825,1,nan,1.2628,0.1822,44.7271,0.010
1059,53.085938,-27.784405,223.525509,-54.460748,1,nan,1.0223,0.0247,44.1595,0.007
1063,53.789062,-27.784405,223.685697,-53.845803,1,nan,0.0000,0.0000,nan,0.009
1065,53.613281,-26.944359,222.237403,-53.863858,1,0.4795,0.4443,0.0390,41.9546,0.009
1067,2.071130,-45.191612,325.606223,-69.989264,1,0.7417,0.8350,0.0544,43.6171,0.011
1084,0.965665,-46.375080,325.845907,-68.579427,1,0.2126,0.5647,0.4351,42.5797,0.007
1087,352.132874,-63.636005,317.424173,-51.095855,1,nan,1.1039,0.1661,44.3657,0.021
1088,34.277344,-5.679190,170.314930,-60.410322,1,0.7550,0.7142,0.0477,43.2001,0.020
1100,34.101562,-5.829153,170.247753,-60.638325,1,0.9332,0.8409,0.0631,43.6357,0.019
1106,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.9258,0.0219,43.8935,0.022
1108,51.855469,-26.276812,220.627031,-55.293792,1,0.6459,0.7378,0.0318,43.2867,0.014
1111,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.3981,0.7822,41.6724,0.007
1114,33.398438,-4.331149,167.226341,-59.936551,1,0.5401,0.5514,0.0091,42.5169,0.018
1115,150.820312,3.732834,235.666318,43.572109,1,nan,1.1354,0.0545,44.4412,0.016
1123,151.171875,2.238686,237.619933,42.994783,1,0.5819,0.5516,0.3972,42.5177,0.024
1127,0.965665,-46.375080,325.845907,-68.579427,1,nan,1.0761,0.1787,44.2973,0.007
1128,149.414062,1.940072,236.565366,41.393323,1,nan,0.3618,0.0387,41.4282,0.018
1138,359.816315,-44.003082,331.451340,-70.123054,1,nan,1.4331,0.2745,45.0668,0.013
1151,33.574219,-4.780192,168.064587,-60.175886,1,nan,0.9335,0.2471,43.9155,0.019
1168,347.861847,-61.943836,321.519104,-51.424048,1,nan,1.5435,0.1436,45.2656,0.017
1174,0.949367,-45.586655,326.991548,-69.251686,1,nan,1.0215,0.1042,44.1572,0.013
1193,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.7156,0.0493,43.2055,0.017
1216,53.964844,-28.630989,225.142950,-53.813613,1,nan,0.9028,0.0148,43.8260,0.009
1245,53.085938,-28.122234,224.100909,-54.509752,1,0.4937,0.5333,0.0166,42.4294,0.007
1254,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.5079,0.3209,45.2031,0.023
1265,54.667969,-27.615883,223.610785,-53.050840,1,0.5310,0.6159,0.0352,42.8082,0.009
1266,149.414062,1.940072,236.565366,41.393323,1,nan,0.6497,0.0183,42.9492,0.018
1271,33.750000,-4.630479,168.146242,-59.949072,1,nan,1.0088,0.0104,44.1239,0.019
1274,0.189873,-45.586655,328.254458,-68.969298,1,nan,1.4221,0.2779,45.0460,0.007
1288,33.222656,-4.780192,167.515653,-60.396584,1,nan,0.9169,0.1056,43.8676,0.018
1289,346.276581,-64.011238,320.448031,-49.344136,1,nan,1.1128,0.0552,44.3872,0.019
1304,347.846710,-64.760857,318.929827,-49.143596,1,0.3102,0.3081,0.0119,41.0226,0.019
1321,151.523438,3.134927,236.900695,43.803170,1,nan,1.2033,0.1759,44.5975,0.019
1347,349.429535,-62.508568,320.039643,-51.393745,1,nan,0.7305,0.0458,43.2603,0.020
1354,53.085938,-28.122234,224.100909,-54.509752,1,nan,0.8265,0.0527,43.5896,0.007
1365,1.708861,-45.586655,325.688716,-69.520253,1,0.6978,0.7233,0.0170,43.2338,0.011
1380,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.9291,0.0148,43.9029,0.020
1388,52.910156,-26.276812,220.926149,-54.363918,1,nan,0.8363,0.0411,43.6210,0.008
1393,152.050781,2.985506,237.495952,44.143927,1,0.4202,2.8977,0.8121,46.9354,0.019
1415,51.328125,-27.784405,223.130589,-55.999499,1,0.7371,0.7551,0.0216,43.3483,0.013
1421,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.6717,0.0415,43.0374,0.009
1439,34.101562,-5.829153,170.247753,-60.638325,1,nan,1.0442,0.1545,44.2165,0.019
1450,53.964844,-28.630989,225.142950,-53.813613,1,nan,0.8016,0.0528,43.5077,0.009
1463,1.363636,-46.768478,324.669342,-68.371416,1,0.2333,0.5070,0.1130,42.2975,0.008
1467,151.523438,3.134927,236.900695,43.803170,1,nan,0.8282,0.1183,43.5952,0.019
1468,346.562500,-63.448284,320.824720,-49.866957,1,nan,0.8677,0.0290,43.7199,0.021
1469,149.414062,2.238686,236.239766,41.565558,1,nan,1.0850,0.0150,44.3193,0.017
1487,150.468750,3.732834,235.392208,43.283244,1,0.6295,0.5829,0.0311,42.6627,0.020
1492,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.7396,0.0157,43.2934,0.021
1498,152.050781,3.284369,237.157374,44.318466,1,nan,0.8187,0.0238,43.5644,0.019
1500,149.414062,2.238686,236.239766,41.565558,1,nan,1.0392,0.0139,44.2035,0.017
1522,352.132874,-63.636005,317.424173,-51.095855,1,nan,1.2524,0.0277,44.7049,0.021
1523,150.468750,1.641510,237.714575,42.075234,1,0.2669,0.2407,0.0254,40.4086,0.017
1536,33.574219,-5.079716,168.448505,-60.407218,1,nan,0.5789,0.1445,42.6449,0.016
1545,346.562500,-63.448284,320.824720,-49.866957,1,0.6731,0.7236,0.0459,43.2351,0.021
1567,152.050781,2.985506,237.495952,44.143927,1,0.5581,0.5870,0.0381,42.6815,0.019
1570,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.7967,0.0516,43.4914,0.017
1578,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.5831,0.1709,42.6637,0.009
1589,51.679688,-27.447618,222.618229,-55.642263,1,0.3366,0.3400,0.0077,41.2708,0.010
1593,150.996094,4.181528,235.291975,43.970869,1,nan,1.2366,0.1482,44.6708,0.015
1597,351.299988,-62.320400,319.038597,-52.026867,1,0.3161,0.3182,0.0159,41.1033,0.018
1599,52.207031,-26.610098,221.298836,-55.042928,1,nan,1.0088,0.2519,44.1237,0.014
1600,34.101562,-5.829153,170.247753,-60.638325,1,0.5596,0.5490,0.0474,42.5054,0.019
1601,53.085938,-28.122234,224.100909,-54.509752,1,0.7155,0.7376,0.0244,43.2861,0.007
1619,2.097458,-45.783966,324.737840,-69.478613,1,nan,1.1271,0.0568,44.4216,0.011
1630,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.7373,0.0283,43.2851,0.014
1636,0.965665,-46.375080,325.845907,-68.579427,1,nan,0.9029,0.0862,43.8264,0.007
1660,359.446716,-44.201530,331.730015,-69.805709,1,nan,0.8022,0.0194,43.5100,0.010
1667,33.574219,-5.079716,168.448505,-60.407218,1,nan,1.0792,0.0660,44.3050,0.016
1676,0.965665,-46.375080,325.845907,-68.579427,1,nan,0.9666,0.2691,44.0090,0.007
1678,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.7378,0.0146,43.2866,0.011
1687,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.9107,0.0140,43.8494,0.006
1691,152.050781,2.985506,237.495952,44.143927,1,0.7346,0.7037,0.0529,43.1609,0.019
1695,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.9741,0.2405,44.0299,0.020
1702,150.996094,2.388015,237.313912,42.939977,1,nan,0.7806,0.0916,43.4371,0.021
1720,1.694561,-45.191612,326.278557,-69.858253,1,nan,0.6746,0.0276,43.0488,0.011
1729,51.328125,-27.447618,222.535046,-55.950727,1,0.6110,0.5932,0.0122,42.7092,0.013
1730,150.117188,3.732834,235.120533,42.993809,1,nan,0.6999,0.0471,43.1464,0.020
1754,148.710938,2.836105,235.050801,41.328739,1,nan,0.9109,0.0431,43.8500,0.031
1763,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.8860,0.0855,43.7757,0.017
1770,0.190678,-45.783966,327.956322,-68.803772,1,0.2562,0.2577,0.0118,40.5774,0.005
1780,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.3954,0.0285,41.6551,0.011
1781,351.382965,-64.011238,317.574052,-50.604657,1,0.4976,0.4790,0.0223,42.1493,0.023
1830,51.328125,-27.784405,223.130589,-55.999499,1,nan,0.7349,0.0181,43.2763,0.013
1833,32.871094,-4.780192,166.959493,-60.615132,1,0.3202,0.3262,0.0567,41.1659,0.017
1841,0.589520,-47.161343,325.385896,-67.769893,1,nan,0.9916,0.0165,44.0776,0.009
1871,33.574219,-4.780192,168.064587,-60.175886,1,nan,0.5952,0.0260,42.7180,0.019
1894,150.468750,1.641510,237.714575,42.075234,1,nan,1.2779,0.0607,44.7590,0.017
1914,359.816315,-44.003082,331.451340,-70.123054,1,0.6648,0.6345,0.0370,42.8867,0.013
1933,150.117188,2.836105,236.124718,42.483719,1,nan,0.4788,0.2350,42.1486,0.016
1934,351.734680,-62.884678,318.284128,-51.651217,1,0.6909,0.7567,0.0533,43.3540,0.019
1941,52.207031,-26.610098,221.298836,-55.042928,1,nan,0.9255,0.0135,43.8926,0.014
1948,359.814819,-44.399834,330.775011,-69.801007,1,nan,1.2610,0.0630,44.7233,0.009
1949,33.398438,-3.732834,166.492280,-59.466614,1,0.5070,0.5068,0.0084,42.2963,0.022
1971,149.414062,1.940072,236.565366,41.393323,1,nan,1.1763,0.1964,44.5365,0.018
1978,54.667969,-27.615883,223.610785,-53.050840,1,nan,0.8494,0.0510,43.6626,0.009
1999,349.160583,-64.760857,318.219706,-49.458924,1,0.6580,0.5147,0.5022,42.3368,0.020
2000,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.6559,0.0240,42.9743,0.017
2017,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.3325,0.1725,41.2145,0.010
2021,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.6515,0.0269,42.9566,0.014
2046,150.644531,3.583322,235.698235,43.342784,1,0.7425,0.7898,0.0207,43.4684,0.018
2054,152.050781,2.985506,237.495952,44.143927,1,nan,0.7912,0.0208,43.4730,0.019
2074,359.446716,-44.201530,331.730015,-69.805709,1,0.5175,0.4921,0.0570,42.2198,0.010
2093,150.996094,4.181528,235.291975,43.970869,1,nan,0.3252,0.0452,41.1578,0.015
2097,151.347656,4.181528,235.568369,44.259942,1,nan,0.6327,0.0348,42.8789,0.016
2106,1.694561,-45.191612,326.278557,-69.858253,1,nan,1.1768,0.0524,44.5376,0.011
2109,358.665253,-45.783966,330.353593,-68.203652,1,0.4692,0.4797,0.0133,42.1532,0.009
2133,349.891296,-64.573555,317.972107,-49.786192,1,nan,0.6612,0.0157,42.9957,0.023
2145,51.328125,-27.447618,222.535046,-55.950727,1,nan,1.2076,0.0605,44.6070,0.013
2147,0.589520,-47.161343,325.385896,-67.769893,1,nan,1.5213,0.2618,45.2269,0.009
2160,34.277344,-5.079716,169.526841,-59.956640,1,nan,0.5716,0.0242,42.6114,0.019
2168,359.446716,-44.201530,331.730015,-69.805709,1,0.2214,0.2308,0.2438,40.3055,0.010
2172,0.189873,-45.586655,328.254458,-68.969298,1,0.6266,0.6681,0.0264,43.0232,0.007
2182,151.523438,3.134927,236.900695,43.803170,1,nan,0.8239,0.0320,43.5812,0.019
2183,148.710938,2.836105,235.050801,41.328739,1,0.2383,0.2618,0.0166,40.6165,0.031
2187,34.101562,-5.829153,170.247753,-60.638325,1,0.3599,0.3656,0.5424,41.4546,0.019
2193,347.846710,-64.760857,318.929827,-49.143596,1,nan,1.4268,0.0800,45.0550,0.019
2195,150.468750,3.732834,235.392208,43.283244,1,nan,0.4598,0.0125,42.0435,0.020
2198,351.382965,-64.011238,317.574052,-50.604657,1,nan,0.9277,0.2414,43.8988,0.023
2206,52.910156,-25.944481,220.366350,-54.301439,1,nan,1.5014,0.0616,45.1915,0.010
2208,352.398651,-62.696659,318.017427,-51.967966,1,0.7188,0.7126,0.0121,43.1942,0.020
2223,358.648071,-46.375080,329.462659,-67.716008,1,nan,1.1672,0.0321,44.5157,0.009
2228,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.6837,0.0267,43.0844,0.014
2243,150.820312,3.134927,236.341348,43.230123,1,nan,1.3813,0.1932,44.9680,0.016
2246,348.529419,-61.755440,321.293980,-51.763351,1,0.6467,0.7143,0.0413,43.2008,0.016
2252,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.5851,0.0120,42.6728,0.008
2265,359.058563,-45.191612,330.695783,-68.844915,1,0.2786,0.2719,0.0249,40.7105,0.011
2270,151.171875,2.238686,237.619933,42.994783,1,nan,0.0000,0.0000,nan,0.024
2276,346.655182,-63.260487,320.952196,-50.040935,1,nan,0.6827,0.0726,43.0806,0.019
2281,346.655182,-63.260487,320.952196,-50.040935,1,0.3442,0.5476,0.1871,42.4986,0.019
2287,351.953644,-62.132156,318.777388,-52.347124,1,nan,0.5387,0.0152,42.4558,0.019
2292,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.9430,0.0086,43.9427,0.014
2293,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.9148,0.0376,43.8614,0.010
2296,1.694561,-45.191612,326.278557,-69.858253,1,nan,0.8033,0.0310,43.5136,0.011
2299,352.132874,-63.636005,317.424173,-51.095855,1,nan,0.8109,0.0190,43.5386,0.021
2304,1.708861,-45.586655,325.688716,-69.520253,1,nan,1.3102,0.0895,44.8261,0.011
2333,349.966217,-62.696659,319.542989,-51.376556,1,0.4284,0.4611,0.0224,42.0509,0.021
2348,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.5904,0.0092,42.6966,0.018
2351,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.7537,0.1688,43.3435,0.018
2354,51.679688,-27.447618,222.618229,-55.642263,1,0.3829,0.3935,0.0180,41.6425,0.010
2370,35.683594,-5.379379,171.992947,-59.253501,1,nan,0.6099,0.0096,42.7820,0.020
2372,34.453125,-5.229529,169.987075,-59.956185,1,0.3353,0.4757,0.6757,42.1315,0.019
2388,54.667969,-27.615883,223.610785,-53.050840,1,nan,1.0298,0.0768,44.1791,0.009
2395,359.814819,-44.399834,330.775011,-69.801007,1,0.6315,0.6359,0.0276,42.8924,0.009
2407,0.190678,-45.783966,327.956322,-68.803772,1,0.5859,0.5249,0.0829,42.3879,0.005
2410,52.207031,-26.610098,221.298836,-55.042928,1,0.3848,0.4258,0.0241,41.8449,0.014
2451,359.814819,-44.399834,330.775011,-69.801007,1,nan,0.7416,0.0343,43.3003,0.009
2476,149.414062,1.940072,236.565366,41.393323,1,nan,0.4730,0.1009,42.1166,0.018
2538,346.130127,-63.072620,321.423103,-50.042305,1,0.6846,0.6643,0.0319,43.0081,0.020
2550,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.3640,0.1891,41.4439,0.020
2603,0.965665,-46.375080,325.845907,-68.579427,1,nan,1.2175,0.4670,44.6290,0.007
2615,52.910156,-26.276812,220.926149,-54.363918,1,nan,1.2323,0.0534,44.6613,0.008
2660,151.523438,3.134927,236.900695,43.803170,1,1.0412,1.3254,0.1765,44.8570,0.019
2661,346.130127,-63.072620,321.423103,-50.042305,1,0.5160,0.5185,0.0319,42.3560,0.020
2683,0.965665,-46.375080,325.845907,-68.579427,1,0.6609,0.6757,0.0091,43.0530,0.007
2687,348.529419,-61.755440,321.293980,-51.763351,1,0.6669,0.6531,0.0406,42.9628,0.016
2693,358.665253,-45.783966,330.353593,-68.203652,1,nan,1.5860,0.2426,45.3384,0.009
2701,349.891296,-64.573555,317.972107,-49.786192,1,0.4057,0.3811,0.0322,41.5609,0.023
2702,35.683594,-5.379379,171.992947,-59.253501,1,0.5081,0.5489,0.0165,42.5049,0.020
2707,33.398438,-4.331149,167.226341,-59.936551,1,0.1061,0.1359,0.0193,39.0322,0.018
2744,348.595886,-63.072620,320.023289,-50.713060,1,nan,0.5735,0.0245,42.6203,0.021
2753,51.328125,-27.784405,223.130589,-55.999499,1,0.4302,0.3267,0.0389,41.1697,0.013
2759,148.886719,2.686724,235.347248,41.389003,1,nan,0.5974,0.0069,42.7274,0.028
2760,149.414062,3.433834,234.919132,42.245550,1,nan,0.3415,0.2635,41.2821,0.027
2766,150.468750,3.732834,235.392208,43.283244,1,nan,1.0362,0.0118,44.1956,0.020
2786,349.615387,-63.636005,318.927246,-50.506542,1,nan,0.3010,0.0504,40.9634,0.018
2790,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.0000,0.0000,nan,0.018
2794,359.415588,-46.768478,327.729895,-67.686097,1,0.6983,0.7077,0.0266,43.1760,0.009
2814,150.117188,2.836105,236.124718,42.483719,1,nan,0.7215,0.0782,43.2272,0.016
2818,52.207031,-28.291550,224.208534,-55.300157,1,nan,0.8251,0.0216,43.5851,0.007
2856,53.964844,-28.630989,225.142950,-53.813613,1,nan,0.6348,0.0515,42.8877,0.009
2858,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.1145,0.0322,44.3914,0.023
2860,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.4398,0.2606,41.9285,0.016
2871,346.655182,-63.260487,320.952196,-50.040935,1,nan,0.4647,0.0229,42.0709,0.019
2882,151.171875,1.342993,238.602520,42.464379,1,nan,2.7824,0.2944,46.8290,0.026
2886,148.710938,2.836105,235.050801,41.328739,1,nan,0.3475,0.0073,41.3259,0.031
2909,359.446716,-44.201530,331.730015,-69.805709,1,nan,0.3229,0.3149,41.1406,0.010
2932,352.711273,-63.823658,316.922299,-51.059403,1,nan,0.1911,0.0104,39.8466,0.024
2933,348.586945,-64.573555,318.693903,-49.477869,1,0.6336,0.6157,0.0140,42.8070,0.018
2943,0.589520,-47.161343,325.385896,-67.769893,1,0.7816,0.7511,0.0242,43.3344,0.009
2945,52.910156,-25.944481,220.366350,-54.301439,1,nan,1.1553,0.0720,44.4880,0.010
2949,52.910156,-25.944481,220.366350,-54.301439,1,nan,1.2125,0.0257,44.6178,0.010
2958,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.5145,0.0383,42.3356,0.009
2961,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.3771,0.0426,41.5336,0.019
2962,149.414062,3.433834,234.919132,42.245550,1,nan,0.7453,0.0637,43.3138,0.027
2970,53.085938,-27.111860,222.384291,-54.355086,1,0.4506,0.5123,0.0647,42.3247,0.007
2975,53.085938,-28.122234,224.100909,-54.509752,1,nan,1.4981,0.2615,45.1857,0.007
3004,349.966217,-62.696659,319.542989,-51.376556,1,0.4053,0.4978,0.0481,42.2498,0.021
3008,358.312500,-44.993881,332.185785,-68.685906,1,nan,0.1344,0.7139,39.0065,0.009
3012,150.117188,2.238686,236.784618,42.139082,1,nan,1.4853,0.0818,45.1627,0.016
3022,52.207031,-28.630989,224.800211,-55.343637,1,0.4266,0.3630,0.0443,41.4365,0.009
3025,359.415588,-46.768478,327.729895,-67.686097,1,0.6636,0.6827,0.0129,43.0806,0.009
3028,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.6075,0.2450,42.7719,0.017
3029,347.617462,-62.508568,321.121462,-50.904708,1,nan,0.7168,0.0344,43.2100,0.019
3037,150.820312,3.732834,235.666318,43.572109,1,0.3807,0.3856,0.0770,41.5907,0.016
3052,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.9395,0.0475,43.9327,0.019
3073,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.5457,0.7707,42.4896,0.014
3077,0.189873,-45.586655,328.254458,-68.969298,1,0.2853,0.3011,0.0497,40.9649,0.007
3133,0.574468,-45.981140,327.041068,-68.778764,1,nan,1.6167,0.0739,45.3897,0.006
3163,34.277344,-5.079716,169.526841,-59.956640,1,0.6388,0.6322,0.0085,42.8770,0.019
3170,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.4754,0.0150,42.1298,0.018
3171,33.925781,-5.979157,170.179895,-60.866303,1,0.5550,0.5525,0.0096,42.5220,0.022
3175,347.617462,-62.508568,321.121462,-50.904708,1,nan,1.0217,0.0243,44.1579,0.019
3176,150.820312,3.134927,236.341348,43.230123,1,nan,0.7794,0.0377,43.4329,0.016
3190,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.7705,0.0220,43.4023,0.022
3193,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.8700,0.0495,43.7269,0.021
3196,358.648071,-46.375080,329.462659,-67.716008,1,0.3112,0.2901,0.0115,40.8716,0.009
3206,53.085938,-27.111860,222.384291,-54.355086,1,0.6472,0.6860,0.0225,43.0931,0.007
3209,1.363636,-46.768478,324.669342,-68.371416,1,0.4369,0.4994,1.2314,42.2578,0.008
3210,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.7133,0.0739,43.1968,0.008
3214,348.908447,-63.823658,319.169886,-50.176186,1,0.2600,0.2876,0.3195,40.8500,0.018
3217,151.347656,4.181528,235.568369,44.259942,1,0.1826,0.4846,0.3727,42.1795,0.016
3247,1.708861,-45.586655,325.688716,-69.520253,1,nan,1.3152,0.1148,44.8363,0.011
3252,349.615387,-63.636005,318.927246,-50.506542,1,nan,0.8013,0.0111,43.5070,0.018
3278,53.613281,-27.953188,223.929533,-54.024772,1,nan,1.5343,0.1417,45.2497,0.007
3284,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.4121,1.2419,41.7612,0.011
3286,33.574219,-4.780192,168.064587,-60.175886,1,nan,0.7985,0.0224,43.4974,0.019
3288,51.855469,-26.276812,220.627031,-55.293792,1,nan,1.2928,0.0725,44.7902,0.014
3325,53.613281,-28.630989,225.073365,-54.119461,1,0.4825,0.5158,0.0204,42.3423,0.006
3336,34.453125,-5.229529,169.987075,-59.956185,1,nan,0.7542,0.0326,43.3453,0.019
3347,52.910156,-27.953188,223.774083,-54.639214,1,nan,0.5940,0.0536,42.7127,0.007
3351,359.811707,-45.191612,329.485675,-69.150905,1,0.5478,0.5828,0.0191,42.6626,0.010
3362,347.617462,-62.508568,321.121462,-50.904708,1,nan,0.6700,0.1306,43.0306,0.019
3373,352.398651,-62.696659,318.017427,-51.967966,1,nan,0.4474,0.2471,41.9728,0.020
3395,53.613281,-27.953188,223.929533,-54.024772,1,0.3963,0.3657,0.8633,41.4551,0.007
3398,34.804688,-5.829153,171.307861,-60.174401,1,nan,1.0070,0.0407,44.1188,0.023
3410,349.615387,-63.636005,318.927246,-50.506542,1,nan,1.1667,0.0446,44.5144,0.018
3418,0.190678,-45.783966,327.956322,-68.803772,1,nan,1.0109,0.0153,44.1294,0.005
3441,0.189873,-45.586655,328.254458,-68.969298,1,nan,0.4974,0.0111,42.2477,0.007
3457,151.347656,4.181528,235.568369,44.259942,1,nan,0.7204,0.0318,43.2232,0.016
3475,149.414062,3.433834,234.919132,42.245550,1,nan,0.7214,0.0251,43.2270,0.027
3498,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.9131,0.0795,43.8565,0.019
3499,150.996094,2.985506,236.647967,43.287350,1,0.9971,1.0190,0.0171,44.1508,0.020
3500,52.207031,-28.630989,224.800211,-55.343637,1,0.2306,0.4849,0.6744,42.1816,0.009
3504,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.5829,0.0306,42.6630,0.019
3509,53.261719,-27.615883,223.280041,-54.281374,1,0.5035,0.5169,0.2812,42.3478,0.006
3522,52.910156,-27.279613,222.625192,-54.536648,1,0.8636,0.9140,0.0250,43.8590,0.007
3529,348.586945,-64.573555,318.693903,-49.477869,1,nan,0.9039,0.0247,43.8293,0.018
3535,359.814819,-44.399834,330.775011,-69.801007,1,nan,1.4756,0.1618,45.1450,0.009
3538,150.996094,2.985506,236.647967,43.287350,1,0.4615,0.4664,0.0103,42.0806,0.020
3547,2.097458,-45.783966,324.737840,-69.478613,1,nan,0.9467,0.0338,43.9533,0.011
3551,358.636353,-46.768478,328.890146,-67.388837,1,0.2573,0.2393,0.0145,40.3948,0.008
3566,151.171875,2.238686,237.619933,42.994783,1,0.6331,0.5448,0.6502,42.4852,0.024
3581,51.855469,-26.276812,220.627031,-55.293792,1,0.4844,0.4938,0.0183,42.2287,0.014
3585,150.820312,3.732834,235.666318,43.572109,1,nan,0.5231,0.1034,42.3791,0.016
3597,2.457983,-45.389202,324.632685,-69.945696,1,0.8409,0.8742,0.0306,43.7396,0.011
3620,35.859375,-4.630479,171.270769,-58.580806,1,nan,0.7190,0.0186,43.2181,0.022
3629,51.855469,-28.630989,224.733260,-55.649872,1,nan,1.3951,0.1453,44.9947,0.009
3641,149.414062,3.433834,234.919132,42.245550,1,nan,0.5965,0.0131,42.7237,0.027
3645,346.562500,-63.448284,320.824720,-49.866957,1,0.5351,0.5274,0.0205,42.4003,0.021
3652,149.238281,3.882372,234.283829,42.351155,1,nan,0.8499,0.0477,43.6644,0.033
3657,34.277344,-5.679190,170.314930,-60.410322,1,0.3050,2.7750,0.9745,46.8221,0.020
3661,347.812500,-63.448284,320.128971,-50.202348,1,nan,1.3297,0.0487,44.8658,0.021
3666,150.117188,2.836105,236.124718,42.483719,1,nan,1.1820,0.1059,44.5495,0.016
3672,348.529419,-61.755440,321.293980,-51.763351,1,0.6020,0.5785,0.0108,42.6431,0.016
3681,150.996094,2.985506,236.647967,43.287350,1,0.6459,0.6429,0.0113,42.9215,0.020
3702,151.171875,2.238686,237.619933,42.994783,1,nan,0.8082,0.1270,43.5299,0.024
3706,51.328125,-27.784405,223.130589,-55.999499,1,0.3284,0.3150,0.1745,41.0780,0.013
3709,151.171875,2.238686,237.619933,42.994783,1,nan,0.8753,0.0139,43.7432,0.024
3710,1.666667,-44.399834,327.519190,-70.529554,1,nan,0.8356,0.0335,43.6190,0.009
3735,0.574468,-45.981140,327.041068,-68.778764,1,nan,0.7982,0.1565,43.4967,0.006
3737,150.292969,2.686724,236.427488,42.541447,1,nan,0.7567,0.0404,43.3543,0.016
3748,52.910156,-25.944481,220.366350,-54.301439,1,nan,1.0812,0.0621,44.3099,0.010
3749,359.415588,-46.768478,327.729895,-67.686097,1,0.7033,0.7840,0.0371,43.4487,0.009
3751,352.132874,-63.636005,317.424173,-51.095855,1,0.6527,0.6351,0.0190,42.8890,0.021
3753,346.562500,-63.448284,320.824720,-49.866957,1,0.1191,0.0761,0.0195,37.6870,0.021
3763,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.3349,0.0091,41.2320,0.017
3764,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.8530,0.0218,43.6740,0.021
3777,33.574219,-5.079716,168.448505,-60.407218,1,nan,0.5510,0.0652,42.5152,0.016
3779,348.595886,-63.072620,320.023289,-50.713060,1,nan,0.9939,0.2200,44.0838,0.021
3785,359.415588,-46.768478,327.729895,-67.686097,1,0.5052,0.5168,0.0229,42.3475,0.009
3794,52.910156,-26.276812,220.926149,-54.363918,1,0.6236,0.6316,0.0331,42.8743,0.008
3803,151.171875,1.342993,238.602520,42.464379,1,nan,0.7038,0.0688,43.1613,0.026
3821,52.910156,-27.953188,223.774083,-54.639214,1,nan,1.4631,0.2823,45.1222,0.007
3822,358.648071,-46.375080,329.462659,-67.716008,1,0.3455,2.9896,1.2696,47.0170,0.009
3824,1.753247,-46.768478,324.030235,-68.498041,1,0.8063,0.8352,0.0339,43.6178,0.014
3841,2.071130,-45.191612,325.606223,-69.989264,1,nan,0.5971,0.0990,42.7263,0.011
3844,2.457983,-45.389202,324.632685,-69.945696,1,0.4789,0.3748,0.4652,41.5181,0.011
3855,349.891296,-64.573555,317.972107,-49.786192,1,0.4507,0.3704,0.5053,41.4880,0.023
3857,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.7254,0.0296,43.2418,0.011
3875,52.207031,-28.630989,224.800211,-55.343637,1,0.6825,0.6468,0.0279,42.9374,0.009
3878,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.9110,0.2538,43.8503,0.008
3880,351.382965,-64.011238,317.574052,-50.604657,1,nan,1.5002,0.2529,45.1894,0.023
3889,53.964844,-28.630989,225.142950,-53.813613,1,0.1582,0.2015,1.1876,39.9743,0.009
3925,150.996094,4.181528,235.291975,43.970869,1,nan,0.2421,1.1561,40.4235,0.015
3928,150.996094,2.985506,236.647967,43.287350,1,nan,0.7806,0.0538,43.4371,0.020
3929,149.589844,3.583322,234.885369,42.474696,1,nan,1.8897,0.1427,45.8062,0.024
3936,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.0853,0.0192,44.3200,0.023
3950,352.132874,-63.636005,317.424173,-51.095855,1,0.7004,0.6924,0.0235,43.1180,0.021
3970,1.753247,-46.768478,324.030235,-68.498041,1,nan,1.4637,0.1090,45.1234,0.014
3972,33.574219,-5.079716,168.448505,-60.407218,1,nan,0.7985,0.0333,43.4976,0.016
4001,150.996094,4.181528,235.291975,43.970869,1,nan,1.5533,0.1631,45.2826,0.015
4007,150.820312,3.732834,235.666318,43.572109,1,nan,0.7855,0.0241,43.4536,0.016
4016,346.130127,-63.072620,321.423103,-50.042305,1,nan,1.2676,0.0738,44.7372,0.020
4023,0.589520,-47.161343,325.385896,-67.769893,1,nan,0.9574,0.1398,43.9835,0.009
4025,33.222656,-4.780192,167.515653,-60.396584,1,nan,0.0000,0.0000,nan,0.018
4038,35.859375,-4.630479,171.270769,-58.580806,1,nan,1.1349,0.0866,44.4401,0.022
4044,151.171875,2.238686,237.619933,42.994783,1,nan,0.6693,0.0222,43.0278,0.024
4054,34.453125,-5.229529,169.987075,-59.956185,1,0.5653,0.5478,0.0237,42.4998,0.019
4062,358.312500,-44.993881,332.185785,-68.685906,1,nan,1.1896,0.0988,44.5666,0.009
4063,348.586945,-64.573555,318.693903,-49.477869,1,nan,0.6406,0.1744,42.9119,0.018
4065,35.332031,-5.979157,172.286722,-59.931743,1,nan,1.3023,0.0984,44.8098,0.022
4077,150.468750,1.641510,237.714575,42.075234,1,nan,1.1662,0.1634,44.5133,0.017
4103,349.160583,-64.760857,318.219706,-49.458924,1,0.3113,0.3331,0.0201,41.2185,0.020
4109,52.910156,-26.276812,220.926149,-54.363918,1,nan,0.0000,0.0000,nan,0.008
4183,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.8906,0.0209,43.7897,0.009
4197,54.667969,-27.615883,223.610785,-53.050840,1,nan,0.7358,0.0471,43.2794,0.009
4201,34.453125,-5.229529,169.987075,-59.956185,1,0.2193,0.2338,0.1899,40.3374,0.019
4202,33.398438,-3.732834,166.492280,-59.466614,1,0.6916,0.7529,0.0348,43.3408,0.022
4216,53.964844,-28.630989,225.142950,-53.813613,1,0.4807,0.4515,0.0246,41.9964,0.009
4224,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.6240,0.0370,42.8426,0.022
4225,346.562500,-63.448284,320.824720,-49.866957,1,0.2704,0.3546,0.2924,41.3770,0.021
4249,51.679688,-27.447618,222.618229,-55.642263,1,nan,1.3003,0.1720,44.8058,0.010
4262,346.130127,-63.072620,321.423103,-50.042305,1,nan,0.7108,0.0343,43.1877,0.020
4267,33.222656,-4.780192,167.515653,-60.396584,1,0.4186,0.3797,0.0298,41.5512,0.018
4272,53.613281,-26.944359,222.237403,-53.863858,1,0.2892,0.2718,0.0268,40.7089,0.009
4274,351.321442,-64.198746,317.458993,-50.429931,1,0.6479,0.6553,0.0091,42.9721,0.023
4278,149.414062,2.238686,236.239766,41.565558,1,nan,0.8546,0.0815,43.6790,0.017
4282,53.085938,-27.784405,223.525509,-54.460748,1,nan,0.4929,0.0100,42.2237,0.007
4283,52.910156,-27.279613,222.625192,-54.536648,1,0.3708,0.3947,0.0245,41.6505,0.007
4293,1.753247,-46.768478,324.030235,-68.498041,1,nan,1.6305,0.1495,45.4125,0.014
4294,150.117188,2.836105,236.124718,42.483719,1,nan,0.8510,0.1009,43.6678,0.016
4300,51.855469,-26.276812,220.627031,-55.293792,1,0.6127,0.6028,0.0088,42.7511,0.014
4303,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.9797,0.0098,44.0451,0.018
4304,34.277344,-5.679190,170.314930,-60.410322,1,nan,0.2127,0.0101,40.1063,0.020
4306,151.171875,2.537361,237.288526,43.169764,1,nan,1.4930,0.1628,45.1765,0.024
4313,149.414062,3.433834,234.919132,42.245550,1,nan,1.1395,0.1626,44.4510,0.027
4322,151.347656,4.181528,235.568369,44.259942,1,0.7816,0.7648,0.0184,43.3824,0.016
4325,34.980469,-6.279288,172.180075,-60.389399,1,nan,0.6802,0.0118,43.0708,0.023
4330,53.613281,-28.630989,225.073365,-54.119461,1,nan,1.1409,0.1366,44.4543,0.006
4337,34.804688,-5.829153,171.307861,-60.174401,1,nan,0.6082,0.0100,42.7747,0.023
4347,35.683594,-5.379379,171.992947,-59.253501,1,nan,1.5051,0.0376,45.1982,0.020
4369,53.964844,-28.630989,225.142950,-53.813613,1,nan,1.1974,0.0630,44.5843,0.009
4372,351.259003,-64.386185,317.344860,-50.255113,1,0.8588,0.8990,0.0160,43.8146,0.020
4376,33.222656,-4.780192,167.515653,-60.396584,1,nan,0.9810,0.0139,44.0488,0.018
4390,53.437500,-29.142223,225.908120,-54.336118,1,nan,1.0152,0.3139,44.1407,0.008
4424,151.699219,3.583322,236.533224,44.205648,1,nan,1.0317,0.0256,44.1840,0.016
4467,53.261719,-27.615883,223.280041,-54.281374,1,0.3489,0.3023,0.1233,40.9750,0.006
4469,150.820312,1.641510,237.994507,42.358984,1,nan,1.0352,0.2064,44.1930,0.020
4475,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.5952,0.0666,42.7177,0.014
4489,35.332031,-5.979157,172.286722,-59.931743,1,nan,0.3478,0.0152,41.3280,0.022
4490,1.723404,-45.981140,325.117958,-69.180825,1,nan,1.1304,0.0573,44.4295,0.010
4492,358.636353,-46.768478,328.890146,-67.388837,1,0.2686,0.6256,0.1162,42.8492,0.008
4494,2.097458,-45.783966,324.737840,-69.478613,1,0.4329,0.4492,0.0147,41.9831,0.011
4507,35.683594,-5.379379,171.992947,-59.253501,1,nan,1.1883,0.0370,44.5636,0.020
4508,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.7777,0.0513,43.4272,0.021
4525,149.589844,3.583322,234.885369,42.474696,1,nan,0.7945,0.0729,43.4840,0.024
4528,359.811707,-45.191612,329.485675,-69.150905,1,0.6181,0.5632,0.0527,42.5723,0.010
4533,150.996094,2.985506,236.647967,43.287350,1,nan,0.9019,0.2483,43.8233,0.020
4551,358.312500,-44.993881,332.185785,-68.685906,1,0.2666,0.6085,0.0540,42.7761,0.009
4559,0.929752,-44.597992,328.531426,-70.083244,1,0.3229,0.3027,0.6578,40.9778,0.011
4561,351.953644,-62.132156,318.777388,-52.347124,1,nan,0.7697,0.0339,43.3995,0.019
4576,150.292969,2.686724,236.427488,42.541447,1,0.5613,0.5636,0.0277,42.5742,0.016
4586,2.071130,-45.191612,325.606223,-69.989264,1,0.3810,0.3822,0.0308,41.5678,0.011
4589,150.820312,1.641510,237.994507,42.358984,1,nan,1.2748,0.1135,44.7525,0.020
4592,52.207031,-26.610098,221.298836,-55.042928,1,0.7073,0.7753,0.0429,43.4190,0.014
4597,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.6120,0.0070,42.7913,0.018
4611,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.8291,0.0669,43.5981,0.007
4625,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.7723,0.0357,43.4086,0.021
4644,35.859375,-4.630479,171.270769,-58.580806,1,0.4138,0.4027,0.0331,41.7017,0.022
4645,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.6546,0.1405,42.9690,0.009
4653,150.292969,2.686724,236.427488,42.541447,1,nan,0.9818,0.0169,44.0509,0.016
4673,352.711273,-63.823658,316.922299,-51.059403,1,nan,0.5303,0.0066,42.4145,0.024
4677,53.085938,-27.784405,223.525509,-54.460748,1,0.3093,0.1395,0.2071,39.0948,0.007
4695,349.966217,-62.696659,319.542989,-51.376556,1,0.6195,0.6168,0.0156,42.8120,0.021
4707,359.814819,-44.399834,330.775011,-69.801007,1,nan,0.8457,0.0478,43.6511,0.009
4712,0.190678,-45.783966,327.956322,-68.803772,1,0.1206,0.5167,0.0880,42.3468,0.005
4713,358.665253,-45.783966,330.353593,-68.203652,1,0.8042,0.7677,0.0483,43.3928,0.009
4719,0.965665,-46.375080,325.845907,-68.579427,1,0.3619,0.3727,0.0303,41.5035,0.007
4720,359.805206,-46.768478,327.135979,-67.829903,1,0.5530,0.5247,0.1218,42.3871,0.011
4724,352.398651,-62.696659,318.017427,-51.967966,1,0.5390,0.4826,0.1275,42.1692,0.020
4738,35.332031,-5.979157,172.286722,-59.931743,1,nan,1.0768,0.0572,44.2988,0.022
4739,51.855469,-26.276812,220.627031,-55.293792,1,nan,0.7661,0.0148,43.3871,0.014
4750,0.965665,-46.375080,325.845907,-68.579427,1,0.4822,0.2376,0.3459,40.3769,0.007
4759,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.7922,0.2472,43.4764,0.010
4760,350.230255,-61.943836,320.053946,-52.070537,1,nan,1.1776,0.0799,44.5394,0.017
4762,51.679688,-27.447618,222.618229,-55.642263,1,nan,1.0371,0.2322,44.1980,0.010
4768,349.285706,-62.884678,319.786163,-51.046461,1,nan,1.0012,0.0457,44.1033,0.018
4776,150.117188,2.238686,236.784618,42.139082,1,0.4602,0.4465,0.0429,41.9677,0.016
4812,151.171875,1.342993,238.602520,42.464379,1,nan,0.8278,0.0161,43.5940,0.026
4822,149.589844,3.583322,234.885369,42.474696,1,nan,0.8752,0.0381,43.7427,0.024
4824,346.655182,-63.260487,320.952196,-50.040935,1,0.4116,0.3961,1.0457,41.6592,0.019
4825,150.468750,3.732834,235.392208,43.283244,1,nan,0.8466,0.0165,43.6540,0.020
4830,1.666667,-44.399834,327.519190,-70.529554,1,nan,0.8267,0.0561,43.5904,0.009
4833,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.6975,0.0469,43.1373,0.011
4834,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.6570,0.0487,42.9789,0.008
4840,151.171875,2.537361,237.288526,43.169764,1,nan,0.9905,0.0337,44.0747,0.024
4844,34.277344,-5.679190,170.314930,-60.410322,1,nan,1.0144,0.1816,44.1385,0.020
4853,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.8895,0.1178,43.7862,0.017
4854,0.949367,-45.586655,326.991548,-69.251686,1,nan,0.7034,0.0178,43.1597,0.013
4864,151.171875,1.342993,238.602520,42.464379,1,0.1623,0.1825,0.0221,39.7354,0.026
4866,351.953644,-62.132156,318.777388,-52.347124,1,nan,0.6113,0.0972,42.7883,0.019
4903,151.171875,2.238686,237.619933,42.994783,1,nan,0.4125,0.0223,41.7635,0.024
4910,1.723404,-45.981140,325.117958,-69.180825,1,nan,0.6558,0.0453,42.9739,0.010
4934,52.031250,-26.443335,220.963669,-55.168557,1,nan,1.2051,0.0888,44.6015,0.014
4937,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.5072,0.0257,42.2985,0.010
4948,151.171875,2.537361,237.288526,43.169764,1,nan,1.0614,0.0814,44.2604,0.024
4986,32.871094,-4.780192,166.959493,-60.615132,1,0.5554,0.4904,0.4394,42.2108,0.017
4990,149.414062,1.940072,236.565366,41.393323,1,nan,0.6203,0.0205,42.8268,0.018
4994,150.468750,1.641510,237.714575,42.075234,1,nan,0.8252,0.0451,43.5856,0.017
5024,352.711273,-63.823658,316.922299,-51.059403,1,0.3343,0.5197,0.1503,42.3618,0.024
5034,346.276581,-64.011238,320.448031,-49.344136,1,nan,0.2533,0.8554,40.5348,0.019
5057,150.468750,3.732834,235.392208,43.283244,1,0.8187,0.7582,0.0593,43.3595,0.020
5061,150.292969,2.686724,236.427488,42.541447,1,0.4812,0.4838,0.0111,42.1754,0.016
5084,52.207031,-26.610098,221.298836,-55.042928,1,nan,1.0849,0.0363,44.3190,0.014
5088,349.160583,-64.760857,318.219706,-49.458924,1,0.7677,0.7828,0.0261,43.4447,0.020
5094,2.071130,-45.191612,325.606223,-69.989264,1,nan,0.3655,0.0077,41.4542,0.011
5099,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.3400,0.2409,41.2708,0.014
5135,33.925781,-5.979157,170.179895,-60.866303,1,0.7377,0.7425,0.0053,43.3035,0.022
5145,150.468750,1.641510,237.714575,42.075234,1,nan,1.1364,0.0385,44.4437,0.017
5153,149.414062,2.238686,236.239766,41.565558,1,0.7765,0.7695,0.0155,43.3989,0.017
5158,359.415588,-46.768478,327.729895,-67.686097,1,0.4004,0.4266,0.0302,41.8498,0.009
5162,52.558594,-27.279613,222.538937,-54.845107,1,nan,0.5872,0.0094,42.6823,0.008
5166,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.7222,0.0331,43.2299,0.008
5176,359.814819,-44.399834,330.775011,-69.801007,1,nan,0.8147,0.0386,43.5513,0.009
5204,33.574219,-4.780192,168.064587,-60.175886,1,0.4879,0.5068,0.0211,42.2965,0.019
5210,33.574219,-4.780192,168.064587,-60.175886,1,0.4971,0.4954,0.0328,42.2371,0.019
5217,53.261719,-27.615883,223.280041,-54.281374,1,nan,1.6018,0.0726,45.3649,0.006
5234,51.328125,-27.784405,223.130589,-55.999499,1,0.7616,0.8177,0.0589,43.5609,0.013
5236,35.332031,-5.979157,172.286722,-59.931743,1,0.3651,0.4588,0.1365,42.0379,0.022
5249,348.586945,-64.573555,318.693903,-49.477869,1,nan,0.9021,0.0339,43.8239,0.018
5264,347.617462,-62.508568,321.121462,-50.904708,1,nan,0.6893,0.0103,43.1059,0.019
5278,53.789062,-27.784405,223.685697,-53.845803,1,0.4394,0.3893,0.1926,41.6151,0.009
5280,351.299988,-62.320400,319.038597,-52.026867,1,nan,1.2309,0.2137,44.6583,0.018
5283,347.013428,-62.508568,321.472056,-50.735330,1,nan,0.6296,0.1513,42.8661,0.018
5286,347.846710,-64.760857,318.929827,-49.143596,1,0.5384,0.4991,0.1472,42.2564,0.019
5307,2.457983,-45.389202,324.632685,-69.945696,1,0.4932,0.4967,0.0234,42.2437,0.011
5313,349.891296,-64.573555,317.972107,-49.786192,1,0.3924,0.4004,0.3110,41.6868,0.023
5315,359.805206,-46.768478,327.135979,-67.829903,1,nan,0.7107,0.0354,43.1872,0.011
5317,33.574219,-5.379379,168.838090,-60.637536,1,0.2383,0.2830,0.1436,40.8096,0.017
5319,148.710938,2.836105,235.050801,41.328739,1,nan,0.5440,0.0154,42.4816,0.031
5322,0.929752,-44.597992,328.531426,-70.083244,1,nan,1.0052,0.0257,44.1143,0.011
5335,348.908447,-63.823658,319.169886,-50.176186,1,0.2108,0.2283,0.1361,40.2787,0.018
5338,152.050781,3.284369,237.157374,44.318466,1,nan,0.8439,0.0429,43.6452,0.019
5354,34.980469,-6.279288,172.180075,-60.389399,1,0.2692,0.3663,0.1994,41.4598,0.023
5365,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.9173,0.0221,43.8686,0.018
5370,358.636353,-46.768478,328.890146,-67.388837,1,0.7437,0.6297,0.0526,42.8666,0.008
5372,348.908447,-63.823658,319.169886,-50.176186,1,0.5219,0.5300,0.0576,42.4134,0.018
5386,34.453125,-5.229529,169.987075,-59.956185,1,nan,0.6316,0.0102,42.8746,0.019
5403,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.9827,0.0233,44.0533,0.009
5412,151.347656,4.181528,235.568369,44.259942,1,0.3435,0.3761,0.2378,41.5268,0.016
5417,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.7730,0.0296,43.4109,0.017
5419,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.5571,0.0159,42.5440,0.008
5422,0.190678,-45.783966,327.956322,-68.803772,1,nan,1.1195,0.1674,44.4033,0.005
5433,53.085938,-28.122234,224.100909,-54.509752,1,nan,1.1700,0.1749,44.5221,0.007
5453,150.117188,3.732834,235.120533,42.993809,1,nan,0.5035,0.3843,42.2794,0.020
5473,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.5390,0.0204,42.4575,0.008
5478,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.9349,0.0204,43.9195,0.018
5520,152.050781,2.985506,237.495952,44.143927,1,0.2992,0.2421,0.0411,40.4233,0.019
5521,51.328125,-27.784405,223.130589,-55.999499,1,nan,0.9909,0.0426,44.0758,0.013
5535,53.261719,-27.615883,223.280041,-54.281374,1,nan,0.9054,0.0151,43.8337,0.006
5543,33.398438,-3.732834,166.492280,-59.466614,1,nan,0.8296,0.1240,43.5996,0.022
5550,151.171875,1.342993,238.602520,42.464379,1,nan,0.5898,0.0156,42.6937,0.026
5562,351.734680,-62.884678,318.284128,-51.651217,1,0.3430,0.4407,0.1538,41.9338,0.019
5564,351.321442,-64.198746,317.458993,-50.429931,1,nan,0.6240,0.0197,42.8426,0.023
5565,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.7586,0.0376,43.3607,0.011
5566,347.013428,-62.508568,321.472056,-50.735330,1,nan,0.9684,0.0328,44.0142,0.018
5569,33.574219,-5.079716,168.448505,-60.407218,1,0.2696,0.2658,0.6279,40.6536,0.016
5570,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.6613,0.0352,42.9962,0.017
5576,0.589520,-47.161343,325.385896,-67.769893,1,nan,1.4255,0.1860,45.0525,0.009
5591,350.230255,-61.943836,320.053946,-52.070537,1,nan,0.7811,0.0143,43.4387,0.017
5623,1.753247,-46.768478,324.030235,-68.498041,1,0.3915,0.4107,1.0343,41.7522,0.014
5635,359.415588,-46.768478,327.729895,-67.686097,1,nan,1.0468,0.1246,44.2231,0.009
5655,1.753247,-46.768478,324.030235,-68.498041,1,nan,1.4730,0.2564,45.1403,0.014
5656,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.9835,0.0168,44.0555,0.011
5661,35.683594,-5.379379,171.992947,-59.253501,1,nan,0.2668,0.2983,40.6628,0.020
5696,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.8087,0.0795,43.5316,0.018
5704,350.230255,-61.943836,320.053946,-52.070537,1,nan,0.5054,0.0636,42.2892,0.017
5723,351.321442,-64.198746,317.458993,-50.429931,1,nan,0.8582,0.1911,43.6902,0.023
5729,53.085938,-27.784405,223.525509,-54.460748,1,nan,1.3930,0.0637,44.9906,0.007
5758,52.910156,-25.944481,220.366350,-54.301439,1,nan,0.9711,0.0522,44.0216,0.010
5761,150.468750,1.641510,237.714575,42.075234,1,nan,0.9866,0.2871,44.0641,0.017
5763,349.891296,-64.573555,317.972107,-49.786192,1,nan,1.0138,0.2387,44.1370,0.023
5794,351.299988,-62.320400,319.038597,-52.026867,1,nan,1.3563,0.0554,44.9189,0.018
5798,51.328125,-27.784405,223.130589,-55.999499,1,nan,0.8854,0.0513,43.7737,0.013
5801,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.8968,0.0208,43.8080,0.006
5817,34.277344,-5.079716,169.526841,-59.956640,1,0.5298,0.5217,0.0200,42.3721,0.019
5834,352.398651,-62.696659,318.017427,-51.967966,1,nan,0.8078,0.0389,43.5284,0.020
5844,2.097458,-45.783966,324.737840,-69.478613,1,nan,1.1469,0.0339,44.4683,0.011
5852,0.190678,-45.783966,327.956322,-68.803772,1,nan,0.8192,0.0304,43.5659,0.005
5864,51.855469,-28.630989,224.733260,-55.649872,1,nan,1.3871,0.1040,44.9791,0.009
5877,151.347656,4.181528,235.568369,44.259942,1,0.3096,0.3166,0.7338,41.0910,0.016
5881,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.5213,0.0134,42.3702,0.018
5895,348.908447,-63.823658,319.169886,-50.176186,1,nan,0.8744,0.0299,43.7403,0.018
5911,33.750000,-4.630479,168.146242,-59.949072,1,0.2117,0.5363,0.4038,42.4441,0.019
5922,1.694561,-45.191612,326.278557,-69.858253,1,0.1845,0.2240,0.3763,40.2324,0.011
5924,51.855469,-28.630989,224.733260,-55.649872,1,0.7987,0.8068,0.0181,43.5253,0.009
5930,149.589844,3.583322,234.885369,42.474696,1,nan,1.3364,0.0830,44.8793,0.024
5937,2.457983,-45.389202,324.632685,-69.945696,1,nan,1.2012,0.0366,44.5926,0.011
5954,347.617462,-62.508568,321.121462,-50.904708,1,nan,1.1290,0.0618,44.4261,0.019
5956,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.8904,0.0380,43.7889,0.009
5957,150.468750,3.732834,235.392208,43.283244,1,nan,0.7308,0.0756,43.2613,0.020
5978,346.655182,-63.260487,320.952196,-50.040935,1,nan,0.5152,0.0154,42.3393,0.019
5982,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.4710,0.0617,42.1060,0.022
5995,359.058563,-45.191612,330.695783,-68.844915,1,nan,1.5541,0.1950,45.2840,0.011
6000,0.574468,-45.981140,327.041068,-68.778764,1,nan,1.2101,0.0361,44.6126,0.006
6001,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.8961,0.0542,43.8060,0.008
6003,349.891296,-64.573555,317.972107,-49.786192,1,0.6069,0.5395,0.0894,42.4600,0.023
6017,151.347656,4.181528,235.568369,44.259942,1,nan,0.5050,0.5080,42.2869,0.016
6021,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.9940,0.0435,44.0841,0.008
6022,352.711273,-63.823658,316.922299,-51.059403,1,nan,0.2460,0.2146,40.4626,0.024
6023,348.595886,-63.072620,320.023289,-50.713060,1,nan,0.4699,0.0341,42.0998,0.021
6028,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.7000,0.0060,43.1467,0.009
6030,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.7964,0.0420,43.4906,0.008
6076,0.190678,-45.783966,327.956322,-68.803772,1,nan,1.2728,0.0758,44.7483,0.005
6087,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.8998,0.0507,43.8170,0.010
6091,359.058563,-45.191612,330.695783,-68.844915,1,0.1812,0.1971,0.0181,39.9205,0.011
6092,348.595886,-63.072620,320.023289,-50.713060,1,nan,1.2574,0.2139,44.7155,0.021
6096,351.953644,-62.132156,318.777388,-52.347124,1,nan,1.1012,0.0271,44.3592,0.019
6106,150.117188,2.836105,236.124718,42.483719,1,nan,0.5103,0.0193,42.3145,0.016
6119,1.708861,-45.586655,325.688716,-69.520253,1,0.3975,0.4588,0.5979,42.0377,0.011
6120,346.655182,-63.260487,320.952196,-50.040935,1,nan,1.1674,0.4165,44.5161,0.019
6126,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.7902,0.0393,43.4696,0.007
6135,348.586945,-64.573555,318.693903,-49.477869,1,0.8477,0.8451,0.1229,43.6492,0.018
6151,150.292969,2.686724,236.427488,42.541447,1,nan,0.8598,0.0520,43.6952,0.016
6162,2.071130,-45.191612,325.606223,-69.989264,1,nan,1.3977,0.0957,44.9997,0.011
6164,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.5785,0.3260,45.3258,0.023
6173,150.117188,2.836105,236.124718,42.483719,1,0.3881,0.3821,0.0142,41.5673,0.016
6179,348.586945,-64.573555,318.693903,-49.477869,1,0.5991,0.5030,0.0291,42.2767,0.018
6187,349.891296,-64.573555,317.972107,-49.786192,1,nan,1.1064,0.0831,44.3719,0.023
6191,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.2328,0.0166,40.3267,0.006
6197,151.523438,3.134927,236.900695,43.803170,1,nan,1.0306,0.0174,44.1812,0.019
6205,33.574219,-6.579593,170.455585,-61.548219,1,0.5161,0.5873,0.0270,42.6825,0.021
6211,0.949367,-45.586655,326.991548,-69.251686,1,0.5940,0.5967,0.0095,42.7247,0.013
6217,35.683594,-5.379379,171.992947,-59.253501,1,nan,0.6309,0.0313,42.8717,0.020
6223,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.2937,0.0805,40.9020,0.017
6287,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.6517,0.0263,42.9572,0.009
6289,52.207031,-26.610098,221.298836,-55.042928,1,0.2186,0.3975,0.2967,41.6682,0.014
6293,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.8837,0.0179,43.7686,0.022
6297,53.964844,-28.630989,225.142950,-53.813613,1,0.5674,0.6449,0.0377,42.9295,0.009
6308,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.9701,0.0203,44.0187,0.021
6313,349.966217,-62.696659,319.542989,-51.376556,1,nan,0.3825,0.0215,41.5700,0.021
6343,33.398438,-4.331149,167.226341,-59.936551,1,nan,1.0126,0.0334,44.1339,0.018
6345,0.190678,-45.783966,327.956322,-68.803772,1,nan,1.0255,0.0178,44.1678,0.005
6347,149.414062,3.433834,234.919132,42.245550,1,nan,0.2243,0.0261,40.2360,0.027
6350,33.574219,-5.379379,168.838090,-60.637536,1,nan,1.2142,0.1253,44.6216,0.017
6352,150.996094,4.181528,235.291975,43.970869,1,nan,1.4079,0.0591,45.0191,0.015
6354,34.804688,-5.829153,171.307861,-60.174401,1,0.3701,0.4966,0.3999,42.2433,0.023
6368,351.299988,-62.320400,319.038597,-52.026867,1,nan,0.9252,0.0390,43.8918,0.018
6369,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.5996,0.0136,42.7374,0.017
6372,359.805206,-46.768478,327.135979,-67.829903,1,0.7964,0.7850,0.0422,43.4520,0.011
6376,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.7511,0.0307,43.3345,0.018
6378,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.6002,0.0209,42.7397,0.016
6390,0.589520,-47.161343,325.385896,-67.769893,1,0.5421,0.4998,0.2593,42.2602,0.009
6391,34.101562,-5.829153,170.247753,-60.638325,1,nan,1.1253,0.1662,44.4173,0.019
6402,51.679688,-27.447618,222.618229,-55.642263,1,0.4580,0.4038,0.0453,41.7087,0.010
6405,34.804688,-5.829153,171.307861,-60.174401,1,nan,1.3943,0.2658,44.9931,0.023
6436,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.4164,0.3756,41.7878,0.006
6437,33.925781,-5.979157,170.179895,-60.866303,1,0.4279,0.4344,0.0322,41.8968,0.022
6450,0.190678,-45.783966,327.956322,-68.803772,1,nan,0.7339,0.0231,43.2726,0.005
6458,352.711273,-63.823658,316.922299,-51.059403,1,1.1032,1.0971,0.1861,44.3491,0.024
6460,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.3954,0.0255,41.6549,0.010
6470,358.636353,-46.768478,328.890146,-67.388837,1,nan,1.0362,0.0170,44.1958,0.008
6474,51.855469,-27.953188,223.543603,-55.561470,1,nan,0.7091,0.1441,43.1812,0.008
6498,348.586945,-64.573555,318.693903,-49.477869,1,nan,1.1153,0.1670,44.3933,0.018
6514,2.071130,-45.191612,325.606223,-69.989264,1,0.3119,0.3565,0.4554,41.3906,0.011
6515,34.804688,-5.829153,171.307861,-60.174401,1,nan,1.1225,0.0927,44.4106,0.023
6537,152.050781,2.985506,237.495952,44.143927,1,nan,1.2730,0.1522,44.7487,0.019
6546,53.085938,-28.122234,224.100909,-54.509752,1,0.2509,0.3826,0.2720,41.5706,0.007
6548,148.710938,2.836105,235.050801,41.328739,1,nan,1.1286,0.1579,44.4252,0.031
6555,151.699219,3.583322,236.533224,44.205648,1,0.2053,0.2236,0.0177,40.2274,0.016
6560,53.613281,-27.953188,223.929533,-54.024772,1,nan,0.7824,0.0662,43.4432,0.007
6567,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.7820,0.0137,43.4419,0.017
6568,51.679688,-27.447618,222.618229,-55.642263,1,nan,1.0385,0.0242,44.2017,0.010
6575,33.398438,-3.732834,166.492280,-59.466614,1,0.5197,0.5580,0.0155,42.5483,0.022
6598,0.589520,-47.161343,325.385896,-67.769893,1,nan,0.7918,0.0238,43.4750,0.009
6606,151.523438,3.134927,236.900695,43.803170,1,nan,0.5715,0.0474,42.6108,0.019
6620,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.9861,0.0153,44.0628,0.020
6623,33.750000,-4.630479,168.146242,-59.949072,1,nan,0.8838,0.0439,43.7691,0.019
6633,349.891296,-64.573555,317.972107,-49.786192,1,0.5312,0.5073,0.1220,42.2989,0.023
6654,150.468750,3.732834,235.392208,43.283244,1,0.8058,0.8088,0.0111,43.5318,0.020
6655,0.929752,-44.597992,328.531426,-70.083244,1,0.6631,0.6768,0.0337,43.0574,0.011
6663,52.910156,-26.276812,220.926149,-54.363918,1,nan,0.4449,0.3362,41.9584,0.008
6673,150.820312,3.732834,235.666318,43.572109,1,0.5028,0.5238,0.0312,42.3823,0.016
6682,352.711273,-63.823658,316.922299,-51.059403,1,0.2960,0.5328,0.7393,42.4271,0.024
6692,1.694561,-45.191612,326.278557,-69.858253,1,nan,0.4504,0.0611,41.9897,0.011
6704,348.586945,-64.573555,318.693903,-49.477869,1,0.3131,0.2939,0.0390,40.9044,0.018
6723,349.160583,-64.760857,318.219706,-49.458924,1,0.2401,0.2742,0.1134,40.7312,0.020
6729,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.9192,0.0320,43.8743,0.011
6742,349.615387,-63.636005,318.927246,-50.506542,1,nan,1.7240,0.2208,45.5614,0.018
6756,359.816315,-44.003082,331.451340,-70.123054,1,nan,1.0236,0.0293,44.1629,0.013
6814,152.050781,3.284369,237.157374,44.318466,1,0.6160,0.6226,0.6151,42.8365,0.019
6821,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.3834,0.0765,41.5757,0.011
6827,358.636353,-46.768478,328.890146,-67.388837,1,nan,1.1052,0.1737,44.3688,0.008
6833,150.996094,2.388015,237.313912,42.939977,1,nan,1.4382,0.0724,45.0762,0.021
6837,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.8994,0.0420,43.8158,0.017
6847,33.574219,-5.079716,168.448505,-60.407218,1,nan,0.6653,0.0303,43.0120,0.016
6855,346.500000,-62.320400,321.951129,-50.736054,1,nan,0.9313,0.0387,43.9092,0.020
6856,1.363636,-46.768478,324.669342,-68.371416,1,0.3068,0.2935,0.0090,40.9003,0.008
6876,150.644531,3.583322,235.698235,43.342784,1,0.3246,0.3216,0.4661,41.1300,0.018
6878,32.871094,-4.780192,166.959493,-60.615132,1,0.5919,0.5848,0.1811,42.6713,0.017
6884,52.910156,-27.279613,222.625192,-54.536648,1,0.2612,0.2825,1.1744,40.8053,0.007
6897,359.814819,-44.399834,330.775011,-69.801007,1,nan,0.6130,0.0239,42.7954,0.009
6907,151.171875,1.342993,238.602520,42.464379,1,0.4791,0.5900,0.3591,42.6946,0.026
6911,0.929752,-44.597992,328.531426,-70.083244,1,0.3125,0.3006,0.0066,40.9604,0.011
6919,346.655182,-63.260487,320.952196,-50.040935,1,nan,1.6301,0.2474,45.4117,0.019
6927,151.171875,2.537361,237.288526,43.169764,1,nan,0.5389,0.2528,42.4568,0.024
6930,52.207031,-26.610098,221.298836,-55.042928,1,0.6478,0.5687,0.2240,42.5980,0.014
6932,52.910156,-27.953188,223.774083,-54.639214,1,nan,1.3042,0.0324,44.8137,0.007
6937,0.965665,-46.375080,325.845907,-68.579427,1,nan,0.8820,0.0370,43.7636,0.007
6949,346.130127,-63.072620,321.423103,-50.042305,1,nan,0.8422,0.0195,43.6399,0.020
6951,0.589520,-47.161343,325.385896,-67.769893,1,0.5372,0.5224,0.0077,42.3758,0.009
6952,150.117188,2.238686,236.784618,42.139082,1,nan,1.7307,0.2954,45.5719,0.016
6964,359.816315,-44.003082,331.451340,-70.123054,1,nan,1.1443,0.2764,44.4622,0.013
6982,358.312500,-44.993881,332.185785,-68.685906,1,0.3227,0.3189,0.0263,41.1089,0.009
6988,150.996094,2.985506,236.647967,43.287350,1,nan,0.8042,0.0261,43.5165,0.020
7013,34.277344,-5.679190,170.314930,-60.410322,1,nan,0.5236,0.0416,42.3817,0.020
7017,349.429535,-62.508568,320.039643,-51.393745,1,nan,0.3210,0.0095,41.1251,0.020
7021,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.9959,0.0260,44.0892,0.009
7032,51.328125,-27.784405,223.130589,-55.999499,1,nan,0.8584,0.0214,43.6910,0.013
7055,34.277344,-5.679190,170.314930,-60.410322,1,nan,0.9347,0.0439,43.9190,0.020
7060,52.207031,-28.291550,224.208534,-55.300157,1,nan,0.6635,0.0179,43.0048,0.007
7065,52.910156,-26.276812,220.926149,-54.363918,1,nan,1.5901,0.1813,45.3453,0.008
7129,51.328125,-27.447618,222.535046,-55.950727,1,nan,1.1651,0.0362,44.5107,0.013
7140,150.820312,3.134927,236.341348,43.230123,1,nan,0.9363,0.0122,43.9237,0.016
7161,1.363636,-46.768478,324.669342,-68.371416,1,0.3784,0.3433,0.1675,41.2947,0.008
7162,52.910156,-27.953188,223.774083,-54.639214,1,0.3593,0.4691,0.9910,42.0956,0.007
7163,53.085938,-27.784405,223.525509,-54.460748,1,nan,1.1234,0.1962,44.4129,0.007
7172,151.171875,1.342993,238.602520,42.464379,1,nan,0.6339,0.0129,42.8842,0.026
7175,347.617462,-62.508568,321.121462,-50.904708,1,nan,1.0926,0.1310,44.3382,0.019
7224,53.964844,-28.630989,225.142950,-53.813613,1,nan,1.2376,0.0743,44.6730,0.009
7226,52.207031,-26.610098,221.298836,-55.042928,1,0.3847,0.3273,0.8615,41.1744,0.014
7233,35.683594,-5.379379,171.992947,-59.253501,1,nan,1.2494,0.2608,44.6985,0.020
7241,150.820312,3.732834,235.666318,43.572109,1,nan,0.7200,0.0438,43.2218,0.016
7246,151.347656,4.181528,235.568369,44.259942,1,nan,0.6039,0.0147,42.7560,0.016
7265,151.171875,2.238686,237.619933,42.994783,1,nan,0.7073,0.0647,43.1743,0.024
7275,1.708861,-45.586655,325.688716,-69.520253,1,0.4268,0.4094,0.0089,41.7441,0.011
7282,51.855469,-28.630989,224.733260,-55.649872,1,nan,1.4386,0.0998,45.0769,0.009
7292,33.574219,-5.379379,168.838090,-60.637536,1,nan,0.7780,0.0880,43.4282,0.017
7297,0.574468,-45.981140,327.041068,-68.778764,1,0.5290,0.5535,0.4055,42.5268,0.006
7326,351.299988,-62.320400,319.038597,-52.026867,1,0.2394,0.2364,0.0161,40.3644,0.018
7344,33.398438,-4.331149,167.226341,-59.936551,1,0.5794,0.5951,0.0111,42.7176,0.018
7378,347.846710,-64.760857,318.929827,-49.143596,1,nan,1.0318,0.1438,44.1843,0.019
7381,52.031250,-26.443335,220.963669,-55.168557,1,0.3373,0.5151,0.2782,42.3389,0.014
7385,52.910156,-26.276812,220.926149,-54.363918,1,0.7536,0.7669,0.0410,43.3898,0.008
7389,359.446716,-44.201530,331.730015,-69.805709,1,0.6180,0.6621,0.1010,42.9994,0.010
7447,0.190678,-45.783966,327.956322,-68.803772,1,nan,0.6276,0.0506,42.8577,0.005
7451,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.7431,0.0243,43.3058,0.019
7464,346.500000,-62.320400,321.951129,-50.736054,1,0.5846,0.6343,0.0145,42.8857,0.020
7479,359.058563,-45.191612,330.695783,-68.844915,1,0.3899,0.3252,0.7307,41.1585,0.011
7496,0.190678,-45.783966,327.956322,-68.803772,1,nan,0.6583,0.0121,42.9840,0.005
7508,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.1198,0.0531,44.4042,0.023
7515,150.820312,3.134927,236.341348,43.230123,1,nan,1.0715,0.0116,44.2857,0.016
7535,33.222656,-4.780192,167.515653,-60.396584,1,nan,1.4866,0.1394,45.1649,0.018
7556,53.085938,-27.784405,223.525509,-54.460748,1,0.4003,0.4870,0.4078,42.1925,0.007
7590,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.7702,0.0277,43.4013,0.010
7593,53.085938,-27.784405,223.525509,-54.460748,1,nan,1.1395,0.0880,44.4511,0.007
7596,358.312500,-44.993881,332.185785,-68.685906,1,nan,1.0214,0.2333,44.1572,0.009
7597,152.050781,2.985506,237.495952,44.143927,1,0.7873,0.8649,0.0311,43.7112,0.019
7599,346.130127,-63.072620,321.423103,-50.042305,1,nan,0.6460,0.0576,42.9341,0.020
7616,151.347656,3.583322,236.252362,43.918627,1,nan,0.6020,0.0567,42.7478,0.015
7651,2.097458,-45.783966,324.737840,-69.478613,1,0.4739,0.4618,0.0214,42.0550,0.011
7655,150.117188,2.836105,236.124718,42.483719,1,nan,0.7487,0.0442,43.3259,0.016
7657,352.398651,-62.696659,318.017427,-51.967966,1,nan,1.1347,0.0316,44.4397,0.020
7667,53.613281,-27.953188,223.929533,-54.024772,1,nan,0.6882,0.0180,43.1017,0.007
7693,349.615387,-63.636005,318.927246,-50.506542,1,0.5467,0.5108,0.1636,42.3168,0.018
7705,347.013428,-62.508568,321.472056,-50.735330,1,0.6672,0.6780,0.0682,43.0621,0.018
7723,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.7023,0.0297,43.1555,0.018
7764,349.429535,-62.508568,320.039643,-51.393745,1,nan,0.5974,0.0081,42.7277,0.020
7771,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.5341,0.0966,42.4333,0.017
7775,349.615387,-63.636005,318.927246,-50.506542,1,0.3416,0.3512,0.0280,41.3526,0.018
7788,346.130127,-63.072620,321.423103,-50.042305,1,nan,1.3320,0.0596,44.8704,0.020
7793,33.925781,-5.979157,170.179895,-60.866303,1,nan,1.1112,0.0841,44.3835,0.022
7806,152.050781,2.985506,237.495952,44.143927,1,0.1438,1.9587,0.6082,45.9018,0.019
7809,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.6914,0.0120,43.1142,0.009
7820,148.710938,2.836105,235.050801,41.328739,1,0.3550,0.2846,0.0753,40.8234,0.031
7830,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.6696,0.0570,43.0291,0.011
7858,1.363636,-46.768478,324.669342,-68.371416,1,0.4673,0.4927,0.0100,42.2231,0.008
7867,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.6117,0.0088,42.7899,0.010
7873,150.468750,1.641510,237.714575,42.075234,1,0.5708,0.6019,0.0233,42.7474,0.017
7920,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.8342,0.0228,43.6144,0.009
7928,151.171875,1.342993,238.602520,42.464379,1,nan,0.8590,0.0235,43.6927,0.026
7931,33.574219,-6.579593,170.455585,-61.548219,1,nan,0.5225,0.4053,42.3762,0.021
7940,52.910156,-25.944481,220.366350,-54.301439,1,0.6969,0.7104,0.0284,43.1859,0.010
7942,0.190678,-45.783966,327.956322,-68.803772,1,0.2814,0.3115,0.5654,41.0497,0.005
7957,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.9375,0.0504,43.9271,0.011
7977,33.398438,-3.732834,166.492280,-59.466614,1,nan,1.0780,0.0578,44.3020,0.022
7980,358.312500,-44.993881,332.185785,-68.685906,1,nan,1.2456,0.1373,44.6902,0.009
7982,346.276581,-64.011238,320.448031,-49.344136,1,0.4520,0.4716,0.0369,42.1091,0.019
7983,0.949367,-45.586655,326.991548,-69.251686,1,nan,1.0778,0.0319,44.3015,0.013
7984,34.453125,-5.229529,169.987075,-59.956185,1,nan,1.4448,0.2401,45.0886,0.019
7990,1.723404,-45.981140,325.117958,-69.180825,1,nan,0.8379,0.0368,43.6263,0.010
8019,33.574219,-6.579593,170.455585,-61.548219,1,0.2036,0.2032,0.0104,39.9944,0.021
8034,52.207031,-26.610098,221.298836,-55.042928,1,nan,1.5348,0.1021,45.2506,0.014
8036,51.328125,-27.447618,222.535046,-55.950727,1,0.3381,0.5336,0.3006,42.4311,0.013
8037,0.189873,-45.586655,328.254458,-68.969298,1,nan,0.8038,0.0994,43.5152,0.007
8054,52.910156,-25.944481,220.366350,-54.301439,1,nan,0.8669,0.0250,43.7173,0.010
8057,359.058563,-45.191612,330.695783,-68.844915,1,nan,0.7677,0.0225,43.3927,0.011
8058,52.031250,-26.443335,220.963669,-55.168557,1,nan,1.0909,0.0215,44.3339,0.014
8063,150.468750,3.732834,235.392208,43.283244,1,nan,1.0023,0.2042,44.1065,0.020
8094,149.414062,2.238686,236.239766,41.565558,1,nan,0.5647,0.0343,42.5793,0.017
8102,2.071130,-45.191612,325.606223,-69.989264,1,nan,1.1741,0.0373,44.5314,0.011
8129,359.814819,-44.399834,330.775011,-69.801007,1,0.4569,0.4721,0.2974,42.1119,0.009
8135,0.190678,-45.783966,327.956322,-68.803772,1,0.2615,0.2935,0.0129,40.9008,0.005
8153,350.230255,-61.943836,320.053946,-52.070537,1,0.2924,0.3076,0.0101,41.0184,0.017
8165,33.750000,-4.630479,168.146242,-59.949072,1,nan,1.0355,0.1353,44.1940,0.019
8166,150.292969,2.686724,236.427488,42.541447,1,nan,1.2423,0.1331,44.6832,0.016
8179,52.207031,-28.630989,224.800211,-55.343637,1,nan,0.6201,0.0376,42.8258,0.009
8181,0.965665,-46.375080,325.845907,-68.579427,1,0.6159,0.6147,0.0327,42.8026,0.007
8182,348.586945,-64.573555,318.693903,-49.477869,1,nan,1.0126,0.0872,44.1337,0.018
8196,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.6223,0.0130,42.8354,0.007
8197,33.750000,-4.630479,168.146242,-59.949072,1,nan,0.8678,0.0526,43.7202,0.019
8201,53.261719,-27.615883,223.280041,-54.281374,1,nan,1.0299,0.0450,44.1792,0.006
8242,150.468750,1.641510,237.714575,42.075234,1,nan,0.9643,0.0352,44.0026,0.017
8250,51.855469,-27.953188,223.543603,-55.561470,1,nan,1.3103,0.1902,44.8263,0.008
8252,359.446716,-44.201530,331.730015,-69.805709,1,nan,0.8891,0.1482,43.7851,0.010
8263,32.695312,-4.929937,166.868469,-60.841230,1,0.7023,0.7653,0.0398,43.3844,0.018
8271,53.964844,-28.630989,225.142950,-53.813613,1,nan,1.2974,0.0994,44.7998,0.009
8283,349.160583,-64.760857,318.219706,-49.458924,1,nan,1.1567,0.0332,44.4912,0.020
8284,35.859375,-4.630479,171.270769,-58.580806,1,0.4045,0.4502,0.0352,41.9889,0.022
8315,152.050781,3.284369,237.157374,44.318466,1,0.7654,0.7712,0.0372,43.4049,0.019
8322,150.117188,2.836105,236.124718,42.483719,1,0.0718,0.0803,0.1025,37.8083,0.016
8329,52.031250,-26.443335,220.963669,-55.168557,1,nan,1.5343,0.1326,45.2496,0.014
8331,33.574219,-5.079716,168.448505,-60.407218,1,0.6347,0.6283,0.0176,42.8605,0.016
8332,35.859375,-4.630479,171.270769,-58.580806,1,0.5166,0.5551,0.0652,42.5347,0.022
8345,359.058563,-45.191612,330.695783,-68.844915,1,0.5651,0.5479,0.0133,42.5004,0.011
8361,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.9078,0.0244,43.8409,0.009
8386,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.9630,0.0958,43.9990,0.010
8388,33.750000,-4.630479,168.146242,-59.949072,1,nan,1.0142,0.0893,44.1381,0.019
8399,1.363636,-46.768478,324.669342,-68.371416,1,0.6489,0.6460,0.0193,42.9339,0.008
8401,152.050781,2.985506,237.495952,44.143927,1,0.5695,0.5583,0.0070,42.5495,0.019
8404,52.910156,-25.944481,220.366350,-54.301439,1,nan,0.9683,0.0275,44.0136,0.010
8413,349.966217,-62.696659,319.542989,-51.376556,1,nan,0.9296,0.2134,43.9042,0.021
8420,359.811707,-45.191612,329.485675,-69.150905,1,nan,1.2904,0.0367,44.7852,0.010
8430,51.855469,-26.276812,220.627031,-55.293792,1,nan,1.3057,0.0606,44.8168,0.014
8441,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.6746,0.0423,43.0487,0.018
8442,351.953644,-62.132156,318.777388,-52.347124,1,nan,0.0000,0.0000,nan,0.019
8455,151.347656,3.583322,236.252362,43.918627,1,nan,1.0052,0.0667,44.1141,0.015
8463,150.644531,3.583322,235.698235,43.342784,1,nan,0.9599,0.0304,43.9903,0.018
8469,348.586945,-64.573555,318.693903,-49.477869,1,nan,0.7521,0.0183,43.3379,0.018
8487,53.613281,-26.944359,222.237403,-53.863858,1,nan,0.6150,0.0095,42.8042,0.009
8492,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.9894,0.1246,44.0715,0.006
8512,52.558594,-27.279613,222.538937,-54.845107,1,0.8384,0.8562,0.0216,43.6841,0.008
8513,33.574219,-5.079716,168.448505,-60.407218,1,0.3943,0.4243,0.0320,41.8362,0.016
8521,33.750000,-4.630479,168.146242,-59.949072,1,0.4536,0.4521,0.0212,41.9996,0.019
8534,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.8932,0.0144,43.7973,0.018
8548,151.347656,3.583322,236.252362,43.918627,1,0.4318,0.5090,0.0311,42.3075,0.015
8549,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.7646,0.0436,43.3818,0.008
8553,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.4066,1.0187,41.7265,0.021
8565,359.058563,-45.191612,330.695783,-68.844915,1,0.6011,0.5779,0.1187,42.6400,0.011
8592,53.613281,-27.953188,223.929533,-54.024772,1,nan,0.8871,0.0176,43.7788,0.007
8619,53.789062,-27.784405,223.685697,-53.845803,1,nan,1.4192,0.1271,45.0406,0.009
8625,351.734680,-62.884678,318.284128,-51.651217,1,0.3067,0.3063,0.0081,41.0077,0.019
8644,52.207031,-28.630989,224.800211,-55.343637,1,0.6866,0.7308,0.0554,43.2613,0.009
8646,1.694561,-45.191612,326.278557,-69.858253,1,nan,1.3300,0.1125,44.8664,0.011
8661,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.9744,0.0308,44.0307,0.017
8664,52.558594,-27.279613,222.538937,-54.845107,1,nan,0.5444,0.0180,42.4836,0.008
8665,151.171875,2.537361,237.288526,43.169764,1,nan,1.2262,0.0861,44.6480,0.024
8672,35.683594,-5.379379,171.992947,-59.253501,1,nan,1.1123,0.0767,44.3861,0.020
8674,53.789062,-27.784405,223.685697,-53.845803,1,0.9353,0.7665,0.0891,43.3883,0.009
8682,349.046051,-61.943836,320.796530,-51.753706,1,nan,1.0543,0.0550,44.2423,0.017
8683,52.910156,-27.953188,223.774083,-54.639214,1,0.6644,0.6624,0.0227,43.0003,0.007
8684,51.855469,-27.953188,223.543603,-55.561470,1,nan,0.8518,0.0200,43.6702,0.008
8691,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.6565,0.0183,42.9769,0.019
8697,351.734680,-62.884678,318.284128,-51.651217,1,nan,1.4889,0.0787,45.1691,0.019
8701,347.812500,-63.448284,320.128971,-50.202348,1,0.2506,0.2395,0.0127,40.3969,0.021
8702,1.753247,-46.768478,324.030235,-68.498041,1,nan,1.2922,0.0134,44.7890,0.014
8705,150.820312,3.732834,235.666318,43.572109,1,nan,0.9669,0.0557,44.0099,0.016
8717,52.207031,-28.291550,224.208534,-55.300157,1,nan,1.4656,0.3245,45.1269,0.007
8724,2.071130,-45.191612,325.606223,-69.989264,1,0.2852,0.2994,0.0072,40.9502,0.011
8730,348.586945,-64.573555,318.693903,-49.477869,1,0.4718,2.9626,1.3143,46.9933,0.018
8734,348.586945,-64.573555,318.693903,-49.477869,1,0.4341,0.4524,0.0446,42.0014,0.018
8738,52.910156,-26.276812,220.926149,-54.363918,1,nan,1.0394,0.1965,44.2041,0.008
8739,53.789062,-27.784405,223.685697,-53.845803,1,nan,1.1723,0.2076,44.5273,0.009
8767,150.820312,3.134927,236.341348,43.230123,1,nan,0.6681,0.0088,43.0230,0.016
8803,2.097458,-45.783966,324.737840,-69.478613,1,nan,1.0540,0.2342,44.2414,0.011
8805,0.965665,-46.375080,325.845907,-68.579427,1,nan,1.1091,0.1745,44.3785,0.007
8806,352.398651,-62.696659,318.017427,-51.967966,1,nan,1.3680,0.2494,44.9420,0.020
8812,151.347656,4.181528,235.568369,44.259942,1,nan,1.2164,0.1126,44.6264,0.016
8816,149.414062,3.433834,234.919132,42.245550,1,nan,0.8846,0.0380,43.7713,0.027
8821,35.859375,-4.630479,171.270769,-58.580806,1,nan,0.7988,0.0369,43.4987,0.022
8837,351.299988,-62.320400,319.038597,-52.026867,1,nan,0.5320,0.0097,42.4234,0.018
8850,346.562500,-63.448284,320.824720,-49.866957,1,nan,1.0271,0.0826,44.1720,0.021
8857,150.117188,2.238686,236.784618,42.139082,1,nan,1.0907,0.1270,44.3334,0.016
8861,351.321442,-64.198746,317.458993,-50.429931,1,nan,0.3476,1.1210,41.3264,0.023
8871,32.695312,-4.929937,166.868469,-60.841230,1,nan,1.8695,0.2279,45.7776,0.018
8883,359.446716,-44.201530,331.730015,-69.805709,1,nan,1.3015,0.2137,44.8082,0.010
8902,151.171875,2.238686,237.619933,42.994783,1,nan,0.4347,0.0093,41.8984,0.024
8904,347.812500,-63.448284,320.128971,-50.202348,1,nan,1.1116,0.0337,44.3843,0.021
8917,34.101562,-5.829153,170.247753,-60.638325,1,0.5976,0.5969,0.0088,42.7253,0.019
8919,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.9152,0.0404,43.8626,0.010
8921,150.644531,3.583322,235.698235,43.342784,1,0.5296,0.5248,0.0115,42.3874,0.018
8933,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.0000,0.0000,nan,0.017
8947,33.574219,-5.379379,168.838090,-60.637536,1,0.3757,2.9135,0.9540,46.9496,0.017
8962,2.071130,-45.191612,325.606223,-69.989264,1,nan,1.2366,0.1848,44.6708,0.011
8978,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.5523,0.1275,42.5212,0.016
8983,53.085938,-28.122234,224.100909,-54.509752,1,0.4014,0.3910,0.0331,41.6261,0.007
8987,0.574468,-45.981140,327.041068,-68.778764,1,nan,1.0629,0.0133,44.2641,0.006
9033,0.190678,-45.783966,327.956322,-68.803772,1,0.3132,0.2593,0.0293,40.5923,0.005
9041,347.617462,-62.508568,321.121462,-50.904708,1,nan,1.0424,0.0183,44.2118,0.019
9051,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.9828,0.0093,44.0536,0.009
9053,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.5661,0.0926,42.5860,0.011
9080,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.8653,0.0487,43.7124,0.022
9084,52.910156,-26.276812,220.926149,-54.363918,1,nan,0.8458,0.0301,43.6512,0.008
9099,352.398651,-62.696659,318.017427,-51.967966,1,nan,0.5841,0.0141,42.6684,0.020
9107,150.468750,1.641510,237.714575,42.075234,1,0.4687,0.5719,0.2556,42.6127,0.017
9110,0.929752,-44.597992,328.531426,-70.083244,1,0.2531,0.2978,0.0321,40.9371,0.011
9115,53.261719,-27.615883,223.280041,-54.281374,1,nan,0.5434,0.6581,42.4785,0.006
9124,34.277344,-5.679190,170.314930,-60.410322,1,nan,1.5382,0.2744,45.2564,0.020
9138,352.132874,-63.636005,317.424173,-51.095855,1,0.2703,0.2829,0.0150,40.8088,0.021
9165,349.429535,-62.508568,320.039643,-51.393745,1,nan,1.1058,0.0670,44.3703,0.020
9167,346.130127,-63.072620,321.423103,-50.042305,1,nan,1.1829,0.0376,44.5516,0.020
9170,346.500000,-62.320400,321.951129,-50.736054,1,nan,0.5880,0.0130,42.6856,0.020
9187,35.859375,-4.630479,171.270769,-58.580806,1,nan,0.3500,0.0275,41.3440,0.022
9197,2.071130,-45.191612,325.606223,-69.989264,1,nan,1.3572,0.2236,44.9207,0.011
9209,150.644531,3.583322,235.698235,43.342784,1,nan,0.5872,0.0397,42.6820,0.018
9210,359.415588,-46.768478,327.729895,-67.686097,1,nan,1.2507,0.0994,44.7013,0.009
9216,359.811707,-45.191612,329.485675,-69.150905,1,0.9399,1.5923,0.1936,45.3491,0.010
9220,52.207031,-28.630989,224.800211,-55.343637,1,nan,0.6692,0.0244,43.0276,0.009
9229,52.910156,-27.953188,223.774083,-54.639214,1,0.3075,0.3138,0.0076,41.0683,0.007
9237,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.5799,0.2003,42.6494,0.011
9277,149.414062,1.940072,236.565366,41.393323,1,nan,0.7257,0.0452,43.2426,0.018
9293,151.171875,2.238686,237.619933,42.994783,1,0.5639,0.5209,0.5082,42.3682,0.024
9302,351.321442,-64.198746,317.458993,-50.429931,1,nan,1.3310,0.0582,44.8683,0.023
9303,34.453125,-5.229529,169.987075,-59.956185,1,nan,1.0260,0.0163,44.1691,0.019
9316,33.574219,-5.379379,168.838090,-60.637536,1,nan,0.6163,0.0159,42.8095,0.017
9322,33.574219,-5.079716,168.448505,-60.407218,1,nan,1.3730,0.1469,44.9518,0.016
9346,351.321442,-64.198746,317.458993,-50.429931,1,nan,1.3099,0.0995,44.8254,0.023
9362,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.2692,0.0170,40.6858,0.008
9365,34.277344,-5.679190,170.314930,-60.410322,1,0.2935,0.2933,0.0217,40.8992,0.020
9374,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.7189,0.0162,43.2179,0.007
9380,346.655182,-63.260487,320.952196,-50.040935,1,nan,0.7849,0.0428,43.4518,0.019
9408,346.500000,-62.320400,321.951129,-50.736054,1,nan,0.9664,0.0147,44.0085,0.020
9423,351.382965,-64.011238,317.574052,-50.604657,1,nan,0.5558,0.0329,42.5379,0.023
9444,349.046051,-61.943836,320.796530,-51.753706,1,nan,1.1578,0.0579,44.4939,0.017
9451,351.382965,-64.011238,317.574052,-50.604657,1,nan,0.9855,0.0096,44.0612,0.023
9461,358.665253,-45.783966,330.353593,-68.203652,1,nan,1.1498,0.0929,44.4752,0.009
9468,51.855469,-26.276812,220.627031,-55.293792,1,nan,0.7969,0.0460,43.4923,0.014
9469,32.871094,-4.780192,166.959493,-60.615132,1,0.8180,0.7700,0.0220,43.4006,0.017
9482,53.085938,-28.122234,224.100909,-54.509752,1,nan,0.7314,0.0368,43.2634,0.007
9497,51.328125,-27.784405,223.130589,-55.999499,1,0.4664,0.4234,0.0109,41.8306,0.013
9507,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.7375,0.0178,43.2857,0.010
9550,346.130127,-63.072620,321.423103,-50.042305,1,0.6995,0.7070,0.0097,43.1732,0.020
9554,51.679688,-27.447618,222.618229,-55.642263,1,0.6646,0.6800,0.0238,43.0701,0.010
9562,349.160583,-64.760857,318.219706,-49.458924,1,nan,0.3891,0.0546,41.6136,0.020
9566,52.910156,-25.944481,220.366350,-54.301439,1,nan,0.8552,0.0311,43.6808,0.010
9588,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.8782,0.0321,43.7520,0.009
9597,53.964844,-28.630989,225.142950,-53.813613,1,nan,0.8452,0.2344,43.6493,0.009
9603,349.046051,-61.943836,320.796530,-51.753706,1,nan,1.6743,0.3261,45.4832,0.017
9626,150.644531,3.583322,235.698235,43.342784,1,nan,1.0665,0.0241,44.2730,0.018
9652,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.6624,0.0296,43.0006,0.006
9662,53.613281,-26.944359,222.237403,-53.863858,1,nan,1.4334,0.0635,45.0673,0.009
9676,1.753247,-46.768478,324.030235,-68.498041,1,0.6764,0.7221,0.0336,43.2295,0.014
9678,151.523438,3.134927,236.900695,43.803170,1,nan,1.1508,0.7055,44.4777,0.019
9699,2.097458,-45.783966,324.737840,-69.478613,1,nan,0.6400,0.1023,42.9095,0.011
9705,352.711273,-63.823658,316.922299,-51.059403,1,0.5977,0.5685,0.0384,42.5972,0.024
9720,351.734680,-62.884678,318.284128,-51.651217,1,0.6071,0.5353,0.0256,42.4395,0.019
9725,350.230255,-61.943836,320.053946,-52.070537,1,nan,1.2775,0.0723,44.7581,0.017
9726,358.312500,-44.993881,332.185785,-68.685906,1,nan,0.7580,0.0243,43.3588,0.009
9761,52.207031,-28.291550,224.208534,-55.300157,1,nan,1.4229,0.1718,45.0476,0.007
9772,53.613281,-26.944359,222.237403,-53.863858,1,nan,0.5526,0.0238,42.5224,0.009
9776,351.953644,-62.132156,318.777388,-52.347124,1,0.4229,0.4114,0.0259,41.7568,0.019
9782,149.589844,3.583322,234.885369,42.474696,1,nan,1.1776,0.1586,44.5394,0.024
9799,346.276581,-64.011238,320.448031,-49.344136,1,nan,0.9878,0.3118,44.0673,0.019
9806,0.965665,-46.375080,325.845907,-68.579427,1,0.4619,0.5359,0.0762,42.4424,0.007
9809,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.6301,0.0342,42.8682,0.021
9839,53.613281,-27.953188,223.929533,-54.024772,1,nan,1.5628,0.1334,45.2989,0.007
9864,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.7718,0.0734,43.4067,0.011
9865,150.117188,3.732834,235.120533,42.993809,1,0.4651,2.3854,0.8502,46.4240,0.020
9868,54.667969,-27.615883,223.610785,-53.050840,1,nan,0.7611,0.0247,43.3696,0.009
9884,51.328125,-27.447618,222.535046,-55.950727,1,0.6206,0.6001,0.0161,42.7395,0.013
9893,33.750000,-4.630479,168.146242,-59.949072,1,0.4883,0.5053,0.0956,42.2888,0.019
9916,349.891296,-64.573555,317.972107,-49.786192,1,nan,0.6122,0.0358,42.7920,0.023
9924,349.285706,-62.884678,319.786163,-51.046461,1,0.7986,0.8711,0.0680,43.7303,0.018
9950,51.328125,-27.447618,222.535046,-55.950727,1,nan,0.5462,0.0962,42.4921,0.013
9951,346.276581,-64.011238,320.448031,-49.344136,1,0.8417,0.7427,0.0547,43.3044,0.019
9961,53.789062,-27.784405,223.685697,-53.845803,1,nan,1.0492,0.2680,44.2292,0.009
9967,150.292969,2.686724,236.427488,42.541447,1,nan,1.4395,0.2623,45.0786,0.016
9989,346.130127,-63.072620,321.423103,-50.042305,1,nan,0.9517,0.2190,43.9673,0.020
9998,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.5127,0.0376,42.3265,0.011
10021,52.207031,-28.630989,224.800211,-55.343637,1,nan,1.4337,0.0978,45.0679,0.009
10056,52.558594,-27.279613,222.538937,-54.845107,1,0.4511,0.3564,0.0517,41.3900,0.008
10071,0.949367,-45.586655,326.991548,-69.251686,1,0.3219,0.4590,0.0597,42.0387,0.013
10102,359.058563,-45.191612,330.695783,-68.844915,1,nan,0.7450,0.0248,43.3128,0.011
10103,359.805206,-46.768478,327.135979,-67.829903,1,nan,0.8130,0.0375,43.5455,0.011
10110,54.667969,-27.615883,223.610785,-53.050840,1,0.7638,0.8007,0.0367,43.5050,0.009
10119,351.953644,-62.132156,318.777388,-52.347124,1,0.6209,0.6434,0.0311,42.9234,0.019
10133,352.398651,-62.696659,318.017427,-51.967966,1,nan,0.9030,0.0216,43.8266,0.020
10139,350.230255,-61.943836,320.053946,-52.070537,1,nan,1.3210,0.0562,44.8481,0.017
10146,346.500000,-62.320400,321.951129,-50.736054,1,nan,0.5522,0.0066,42.5205,0.020
10150,359.814819,-44.399834,330.775011,-69.801007,1,0.4395,2.7379,0.4044,46.7868,0.009
10155,0.589520,-47.161343,325.385896,-67.769893,1,nan,1.0100,0.2301,44.1270,0.009
10180,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.5829,0.0112,42.6631,0.010
10213,359.058563,-45.191612,330.695783,-68.844915,1,nan,0.5634,0.0192,42.5732,0.011
10218,52.558594,-27.279613,222.538937,-54.845107,1,0.3131,0.3414,0.0364,41.2809,0.008
10228,151.171875,1.342993,238.602520,42.464379,1,nan,0.7524,0.0449,43.3390,0.026
10244,53.789062,-27.784405,223.685697,-53.845803,1,nan,0.7687,0.0207,43.3962,0.009


================================================
FILE: examples/data/plasticc_training_set_1k.csv
================================================
object_id,mjd,passband,flux,flux_err,detected
615,59750.4229,2,-544.810303,3.622952,1
615,59750.4306,1,-816.434326,5.553370,1
615,59750.4383,3,-471.385529,3.801213,1
615,59750.4450,4,-388.984985,11.395031,1
615,59752.4070,2,-681.858887,4.041204,1
615,59752.4147,1,-1061.457031,6.472994,1
615,59752.4224,3,-524.954590,3.552751,1
615,59752.4334,4,-393.480225,3.599346,1
615,59752.4435,5,-355.886780,10.421921,1
615,59767.2968,2,-548.013550,3.462291,1
615,59767.3045,1,-815.188599,5.293019,1
615,59767.3122,3,-475.516052,3.340643,1
615,59767.3233,4,-405.663818,3.496113,1
615,59767.3343,5,-421.199066,6.377517,1
615,59770.2179,2,-554.903198,3.927843,1
615,59770.2256,1,-820.042786,5.875329,1
615,59770.2334,3,-477.004730,3.736262,1
615,59770.2445,4,-400.270386,3.834955,1
615,59770.2557,5,-415.286896,7.435979,1
615,59779.3188,2,-630.523682,4.333287,1
615,59779.3265,1,-921.002502,6.306800,1
615,59779.3342,3,-518.533997,3.915225,1
615,59779.3452,4,-422.184509,4.089213,1
615,59779.3562,5,-422.815094,8.124096,1
615,59782.1897,2,-280.039520,2.819228,1
615,59782.1974,1,-449.095612,4.028310,1
615,59782.2051,3,-316.704865,3.491153,1
615,59782.2162,4,-332.885437,4.021619,1
615,59782.2274,5,-365.075775,8.514805,1
615,59797.2861,2,391.399231,3.098059,1
615,59797.2938,1,35.511822,3.163646,1
615,59797.3015,3,330.623901,3.388776,1
615,59797.3126,4,360.397858,3.980607,1
615,59797.3237,5,369.439667,8.207490,1
615,59800.3168,2,168.739899,3.128495,1
615,59800.3244,1,129.541901,4.358776,1
615,59800.3320,3,30.120724,3.396606,1
615,59800.3429,4,-60.942333,3.704243,1
615,59800.3539,5,-128.920334,7.495701,1
615,59807.1738,2,-256.660980,2.781354,1
615,59807.1815,1,-420.796417,4.037735,1
615,59807.1892,3,-298.936859,3.041390,1
615,59807.2003,4,-311.977783,3.318007,1
615,59807.2114,5,-344.536072,6.367201,1
615,59810.1045,2,-342.819763,2.808321,1
615,59810.1122,1,-527.020325,4.204173,1
615,59810.1200,3,-363.282532,3.237536,1
615,59810.1311,4,-348.628662,3.774855,1
615,59810.1422,5,-391.271271,7.657067,1
615,59813.1044,2,-678.045715,4.032819,1
615,59813.1122,1,-1100.440063,6.709106,1
615,59813.1199,3,-506.687408,3.547398,1
615,59813.1310,4,-304.049713,3.590496,1
615,59813.1422,5,-187.285919,6.984019,1
615,59819.1532,0,6.878784,3.633152,0
615,59820.1047,0,39.364853,3.775619,1
615,59821.1026,0,-10.422381,4.172683,0
615,59822.1105,0,-65.485130,4.362876,1
615,59823.1505,0,-113.349159,4.069051,1
615,59835.0600,2,-54.949490,2.575779,1
615,59835.0678,1,-178.149399,3.809858,1
615,59835.0755,3,-140.818436,3.149077,1
615,59835.0866,4,-200.294128,3.578005,1
615,59835.0978,5,-263.578430,6.954262,1
615,59839.0306,2,-639.035950,3.928531,1
615,59839.0384,1,-953.883728,6.035410,1
615,59839.0461,3,-518.293274,3.595869,1
615,59839.0573,4,-418.723907,3.536483,1
615,59839.0684,5,-418.799927,6.580595,1
615,59842.0207,2,-502.215332,3.348443,1
615,59842.0285,1,-1003.971497,6.256784,1
615,59842.0362,3,-233.167755,2.872840,1
615,59842.0473,4,111.507675,3.229112,1
615,59842.0585,5,206.425323,6.615822,1
615,59851.1114,0,-68.502457,3.338555,1
615,59854.0796,2,459.452667,3.336711,1
615,59854.0873,1,217.894211,3.332742,1
615,59854.0950,3,361.023438,3.237847,1
615,59854.1061,4,374.446442,3.622074,1
615,59854.1172,5,370.346283,6.789766,1
615,59857.0453,2,599.812195,4.121032,1
615,59857.0531,1,646.523193,5.291624,1
615,59857.0608,3,354.961365,3.407785,1
615,59857.0719,4,293.879608,3.581862,1
615,59857.0830,5,232.535995,6.761845,1
615,59864.0162,2,-637.105347,3.818432,1
615,59864.0239,1,-942.167908,5.916004,1
615,59864.0316,3,-524.586548,3.538931,1
615,59864.0428,4,-414.447723,3.635253,1
615,59864.0539,5,-408.089233,7.119730,1
615,59867.0178,2,-332.763123,2.872951,1
615,59867.0255,1,-910.677734,5.852032,1
615,59867.0332,3,-62.065010,3.073413,1
615,59867.0443,4,202.288223,4.026751,1
615,59867.0554,5,270.584869,10.321785,1
615,59870.0194,2,604.344543,3.901750,1
615,59870.0272,1,659.486694,4.991051,1
615,59870.0349,3,373.986511,3.294667,1
615,59870.0459,4,322.604034,3.716555,1
615,59870.0571,5,263.481476,7.193131,1
615,59873.0212,2,4.656033,2.130510,0
615,59873.0289,1,-98.796974,3.034533,1
615,59873.0366,3,-93.732880,2.499724,1
615,59873.0477,4,-165.793457,2.906058,1
615,59873.0588,5,-233.501724,6.087882,1
615,59874.0599,0,-97.353195,3.133990,1
615,59875.0311,0,-97.523880,2.963075,1
615,59876.0231,0,-108.672577,3.449714,1
615,59877.0238,0,-116.913223,3.097836,1
615,59878.0246,0,-102.768921,3.135772,1
615,59879.0248,0,-52.407089,3.261559,1
615,59880.0258,0,55.567715,3.355268,1
615,59884.0823,2,-274.711029,2.572093,1
615,59884.0900,1,-437.425110,3.831595,1
615,59884.0976,3,-310.010925,2.957125,1
615,59884.1085,4,-317.630920,3.213168,1
615,59884.1195,5,-351.278198,6.231324,1
615,59887.0298,2,-491.146423,3.592675,1
615,59887.0375,1,-743.267212,5.624708,1
615,59887.0451,3,-449.714752,3.728483,1
615,59887.0562,4,-393.971649,3.532816,1
615,59887.0673,5,-406.549103,6.648589,1
615,60118.4163,0,-107.080536,3.102513,1
615,60124.2541,2,-588.397949,4.176047,1
615,60124.2618,1,-878.043396,6.210247,1
615,60124.2695,3,-495.472015,4.011444,1
615,60124.2807,4,-417.145325,4.094360,1
615,60124.2918,5,-413.673431,7.793959,1
615,60140.2290,0,-88.981155,3.468430,1
615,60141.2225,0,-50.179337,4.734193,1
615,60142.2202,0,50.008640,4.636651,1
615,60143.2212,0,110.753555,4.380840,1
615,60144.2186,0,120.867218,4.103332,1
615,60145.2123,0,111.464226,4.367030,1
615,60153.2274,2,-322.420471,2.833071,1
615,60153.2351,1,-917.875488,5.951387,1
615,60153.2428,3,-52.056461,2.698249,1
615,60153.2539,4,205.180893,3.169676,1
615,60153.2650,5,269.709167,6.682271,1
615,60162.1477,2,31.499735,2.753767,1
615,60162.1554,1,-62.120552,3.818642,1
615,60162.1631,3,-72.958771,3.515574,1
615,60162.1742,4,-151.126511,3.838288,1
615,60162.1853,5,-216.914032,8.832489,1
615,60165.1369,2,-568.408875,3.553168,1
615,60165.1446,1,-836.233154,5.405759,1
615,60165.1524,3,-483.071381,3.387615,1
615,60165.1635,4,-409.470642,3.541994,1
615,60165.1746,5,-412.820221,6.907444,1
615,60168.1260,2,-628.321350,3.804775,1
615,60168.1337,1,-1077.347900,6.591075,1
615,60168.1414,3,-421.859406,3.336656,1
615,60168.1525,4,-93.729095,3.211201,1
615,60168.1637,5,31.207939,6.615005,0
615,60176.1332,0,-49.905262,3.874426,1
615,60177.1370,0,-87.160583,4.078375,1
615,60181.3147,2,-180.729568,2.498579,1
615,60181.3223,1,-339.875153,3.626661,1
615,60181.3299,3,-249.205673,3.280824,1
615,60181.3409,4,-275.762329,4.694962,1
615,60181.3518,5,-330.891327,9.580047,1
615,60184.3625,2,-555.853943,3.979171,1
615,60184.3701,1,-1028.441528,6.719577,1
615,60184.3777,3,-306.200500,3.557627,1
615,60184.3887,4,49.555847,3.627351,1
615,60184.3996,5,154.876785,7.988054,1
615,60194.1575,2,469.654999,3.386857,1
615,60194.1652,1,276.757751,3.594162,1
615,60194.1729,3,374.669556,3.437137,1
615,60194.1839,4,374.948822,4.267094,1
615,60194.1926,5,363.130493,12.845472,1
615,60197.1181,2,607.786804,3.960346,1
615,60197.1258,1,650.984314,4.970811,1
615,60197.1335,3,365.408752,3.197298,1
615,60197.1446,4,305.330750,3.360043,1
615,60197.1557,5,256.966217,6.443069,1
615,60198.1077,0,100.129280,4.266314,1
615,60199.0914,0,86.776741,4.679742,1
615,60200.0650,0,82.078186,4.342434,1
615,60201.0680,0,41.947815,4.467065,1
615,60202.0552,0,9.061676,3.831397,0
615,60206.1107,0,-83.072884,3.130236,1
615,60207.1469,0,108.483109,4.458607,1
615,60208.0229,2,-672.681335,4.138056,1
615,60208.0307,1,-1094.027588,6.717340,1
615,60208.0384,3,-503.870422,3.665424,1
615,60208.0495,4,-284.747498,3.666287,1
615,60208.0606,5,-176.409851,8.308295,1
615,60211.0124,2,208.281052,3.229784,1
615,60211.0202,1,-370.189575,4.525907,1
615,60211.0279,3,269.200806,3.688238,1
615,60211.0390,4,326.272308,4.424663,1
615,60211.0502,5,358.320099,10.152412,1
615,60221.0153,2,-648.682312,3.906177,1
615,60221.0230,1,-1086.777710,6.620100,1
615,60221.0308,3,-455.588196,3.295532,1
615,60221.0419,4,-145.305023,3.087424,1
615,60221.0530,5,2.939076,6.798505,0
615,60224.0140,2,491.748383,3.509416,1
615,60224.0217,1,346.335083,3.835582,1
615,60224.0294,3,384.185303,3.529593,1
615,60224.0405,4,381.953735,3.885009,1
615,60224.0516,5,378.118225,7.311360,1
615,60227.0151,2,341.057709,2.940147,1
615,60227.0228,1,356.632690,3.856145,1
615,60227.0305,3,153.004929,2.847803,1
615,60227.0416,4,52.912033,3.109148,1
615,60227.0527,5,-19.384567,6.713308,0
615,60228.0187,0,6.768485,4.174600,0
615,60229.0162,0,-35.149330,4.086384,1
615,60234.0265,0,-52.922794,3.681808,1
615,60237.2206,2,-676.669189,4.009161,1
615,60237.2283,1,-1098.651489,6.689435,1
615,60237.2359,3,-511.148254,3.546333,1
615,60237.2468,4,-347.090027,3.624199,1
615,60237.2578,5,-240.316895,8.035271,1
615,60240.0223,2,85.162651,2.829378,1
615,60240.0300,1,14.526012,3.783879,0
615,60240.0377,3,-24.350578,3.397041,1
615,60240.0488,4,-111.062698,3.698180,1
615,60240.0598,5,-180.234787,6.894514,1
615,60249.0338,2,611.984558,3.908728,1
615,60249.0415,1,660.626343,4.961018,1
615,60249.0492,3,386.311920,3.240422,1
615,60249.0602,4,325.401184,3.454910,1
615,60249.0712,5,280.721069,6.623785,1
615,60260.0423,0,108.020546,4.337497,1
615,60261.0361,0,125.182808,3.909554,1
615,60262.0367,0,107.649780,3.796611,1
615,60263.0373,0,61.068066,3.877589,1
615,60264.0465,0,-9.100937,3.518127,0
615,60490.2647,2,-408.570984,3.169784,1
615,60490.2725,1,-624.518799,4.704853,1
615,60490.2802,3,-405.614258,3.513195,1
615,60490.2913,4,-371.286377,3.985296,1
615,60490.3024,5,-395.406128,8.139952,1
615,60493.2372,2,-680.489441,4.065931,1
615,60493.2450,1,-1031.102905,6.378702,1
615,60493.2527,3,-530.644592,3.672556,1
615,60493.2639,4,-406.733521,3.772714,1
615,60493.2750,5,-358.876160,7.310321,1
615,60499.2467,0,106.447296,4.481476,1
615,60500.2437,0,67.234062,4.020935,1
615,60501.2385,0,24.868933,4.027500,1
615,60502.2355,0,-15.392517,4.142292,0
615,60508.2638,2,365.607056,3.696270,1
615,60508.2715,1,-32.986282,4.440859,0
615,60508.2792,3,319.249847,3.828632,1
615,60508.2903,4,360.507599,4.336362,1
615,60508.3014,5,370.305267,8.601955,1
615,60524.2390,0,89.070496,3.901179,1
615,60525.1736,0,118.935989,4.737393,1
615,60532.3489,2,510.690094,3.489832,1
615,60532.3565,1,566.281433,4.607503,1
615,60532.3641,3,271.663910,3.007311,1
615,60532.3751,4,204.409866,3.353202,1
615,60532.3860,5,128.521851,7.134325,1
615,60535.1253,2,-664.729675,4.282414,1
615,60535.1330,1,-1084.891113,6.952323,1
615,60535.1408,3,-488.010925,3.928481,1
615,60535.1519,4,-222.254257,4.034600,1
615,60535.1630,5,-85.524307,8.625449,1
615,60538.2351,2,113.021248,2.712380,1
615,60538.2428,1,51.060081,3.762334,1
615,60538.2505,3,-4.268328,2.895656,0
615,60538.2615,4,-96.020035,3.141703,1
615,60538.2725,5,-175.912643,6.308159,1
615,60546.3406,2,178.624359,2.905459,1
615,60546.3482,1,142.089966,4.065646,1
615,60546.3558,3,41.418739,3.163731,1
615,60546.3668,4,-52.460590,3.784039,1
615,60546.3777,5,-112.286079,8.527776,1
615,60549.0879,2,-629.010254,3.867215,1
615,60549.0956,1,-1076.652100,6.604701,1
615,60549.1034,3,-435.558533,3.417534,1
615,60549.1145,4,-111.499573,3.424588,1
615,60549.1256,5,30.267401,7.478198,0
615,60554.0964,0,82.168922,4.318140,1
615,60555.0951,0,49.886921,3.917516,1
615,60556.0879,0,9.075453,4.103900,0
615,60557.0831,0,-30.764908,3.555157,1
615,60558.1093,0,-101.419899,3.653430,1
615,60559.1097,0,-110.688477,3.426444,1
615,60560.1065,0,-114.774445,4.013463,1
615,60567.2821,2,-447.681580,3.368270,1
615,60567.2897,1,-972.201111,6.347886,1
615,60567.2973,3,-176.163651,2.960412,1
615,60567.3083,4,140.860611,3.302721,1
615,60567.3192,5,228.033112,6.797573,1
615,60574.1118,2,-143.843872,2.547544,1
615,60574.1195,1,-812.792908,5.570116,1
615,60574.1272,3,86.606873,2.966459,1
615,60574.1383,4,257.570221,3.575394,1
615,60574.1493,5,302.167328,7.100554,1
615,60577.0186,2,-425.988464,3.239578,1
615,60577.0263,1,-963.216980,6.134610,1
615,60577.0340,3,-148.178238,3.247297,1
615,60577.0451,4,161.872543,4.228243,1
615,60577.0563,5,238.576889,9.461221,1
615,60580.0095,2,586.178345,3.996895,1
615,60580.0173,1,655.284058,5.148244,1
615,60580.0250,3,445.737061,3.952905,1
615,60580.0361,4,361.595764,4.508256,1
615,60580.0472,5,328.836731,9.460338,1
615,60582.0840,0,-51.614189,3.517908,1
615,60583.0169,0,20.364273,4.460314,0
615,60584.0117,0,-24.682575,3.866380,1
615,60585.0117,0,-63.546600,3.497667,1
615,60586.0123,0,-101.819290,3.383004,1
615,60587.0127,0,-110.978699,3.555624,1
615,60588.0131,0,-113.588432,3.241369,1
615,60593.0636,2,226.696259,2.514855,1
615,60593.0713,1,205.029755,3.258004,1
615,60593.0790,3,73.384720,2.584785,1
615,60593.0901,4,-19.212976,2.899512,0
615,60593.1012,5,-83.394951,6.073453,1
615,60596.0304,2,-224.917938,3.388916,1
615,60596.0381,1,-388.231476,4.931039,1
615,60596.0458,3,-274.108429,3.720238,1
615,60596.0569,4,-292.558990,4.188871,1
615,60596.0680,5,-354.074280,8.392479,1
615,60603.0208,2,404.391388,3.043772,1
615,60603.0286,1,70.494507,3.060846,1
615,60603.0363,3,338.994537,3.051842,1
615,60603.0473,4,362.888550,3.381572,1
615,60603.0584,5,378.188141,6.295821,1
615,60606.0225,2,422.610779,3.198191,1
615,60606.0303,1,457.502197,4.173640,1
615,60606.0379,3,205.937546,2.957614,1
615,60606.0490,4,123.048210,3.402847,1
615,60606.0601,5,33.726837,7.368811,0
615,60609.0247,2,-355.611389,2.720825,1
615,60609.0323,1,-537.169312,4.090708,1
615,60609.0400,3,-372.485565,2.985755,1
615,60609.0510,4,-350.518677,3.225662,1
615,60609.0621,5,-371.873230,6.150734,1
615,60612.0266,0,-110.649872,2.844200,1
615,60613.0269,0,-89.973892,2.937887,1
615,60614.0276,0,-10.015225,3.212408,0
615,60615.0375,0,99.438087,3.662484,1
615,60616.0290,0,120.849113,3.776495,1
615,60617.0295,0,121.411896,3.569777,1
615,60621.1734,2,56.559818,2.259825,1
615,60621.1810,1,-607.040771,4.452463,1
615,60621.1886,3,208.770279,2.959783,1
615,60621.1996,4,297.624725,3.718585,1
615,60621.2105,5,332.919006,8.157172,1
615,60624.1760,2,552.150269,3.917989,1
615,60624.1836,1,607.047668,5.140991,1
615,60624.1913,3,296.946533,3.475000,1
615,60624.2022,4,235.489929,3.926538,1
615,60624.2132,5,157.080200,8.453112,1
713,59825.2600,2,9.110147,1.013889,0
713,59825.2676,1,7.615042,1.160329,1
713,59825.2752,3,6.673631,1.932316,1
713,59825.2862,4,5.214194,3.018003,0
713,59825.2971,5,12.060948,7.163382,0
713,59839.2161,2,4.953065,1.196956,0
713,59839.2236,1,3.131028,1.351706,0
713,59839.2313,3,6.108739,1.789895,0
713,59839.2422,4,8.283792,2.527953,0
713,59839.2532,5,9.686500,5.882469,0
713,59842.1987,2,6.472355,1.110572,0
713,59842.2064,1,5.914848,1.134476,1
713,59842.2140,3,5.311658,1.683777,0
713,59842.2250,4,4.680908,2.349234,0
713,59842.2359,5,6.921503,5.573885,0
713,59851.2006,0,7.267655,2.866838,0
713,59854.2089,2,3.945918,0.922779,0
713,59854.2165,1,2.956484,0.953529,0
713,59854.2242,3,4.768611,1.422675,0
713,59854.2351,4,4.065430,1.943197,0
713,59854.2461,5,1.290383,3.980583,0
713,59857.1879,2,3.937931,2.088610,0
713,59857.1956,1,-2.223347,2.861396,0
713,59857.2032,3,-0.476698,2.280299,0
713,59857.2141,4,8.054095,2.712542,0
713,59857.2251,5,-9.332252,5.174713,0
713,59867.1600,2,1.380378,1.105223,0
713,59867.1676,1,1.569406,1.369900,0
713,59867.1753,3,1.433712,1.730335,0
713,59867.1862,4,5.299760,2.530279,0
713,59867.1971,5,2.355590,5.574841,0
713,59870.1521,2,3.404463,0.889940,0
713,59870.1597,1,2.961649,0.946383,0
713,59870.1673,3,2.594970,1.322884,0
713,59870.1782,4,2.805032,1.753258,0
713,59870.1892,5,3.101222,4.041600,0
713,59873.1442,2,3.869869,0.763644,0
713,59873.1519,1,4.354049,0.790372,1
713,59873.1595,3,2.949366,1.101570,0
713,59873.1704,4,3.117238,1.623630,0
713,59873.1814,5,4.010789,3.832515,0
713,59874.1612,0,4.171277,2.180456,0
713,59875.1175,0,2.671449,2.536783,0
713,59876.1160,0,-0.354117,2.566333,0
713,59877.1178,0,2.332870,2.217193,0
713,59878.1127,0,3.797837,2.270967,0
713,59879.1104,0,4.900619,2.339577,0
713,59880.1181,0,1.331082,1.865762,0
713,59884.1292,2,3.091794,0.995209,0
713,59884.1368,1,1.712878,1.129837,0
713,59884.1444,3,2.552051,1.229161,0
713,59884.1554,4,2.099711,1.737169,0
713,59884.1663,5,5.788035,3.869600,0
713,59887.0951,2,5.011691,1.668699,0
713,59887.1027,1,3.719429,2.314904,0
713,59887.1103,3,10.036420,1.852814,0
713,59887.1213,4,5.964674,2.277389,0
713,59887.1322,5,8.822542,4.767565,0
713,59896.0839,2,5.064992,0.999215,0
713,59896.0915,1,5.780192,1.083338,0
713,59896.0992,3,3.587355,1.642954,1
713,59896.1101,4,6.185760,2.277758,0
713,59896.1211,5,-4.762829,5.042903,0
713,59899.0854,2,4.823127,1.244829,0
713,59899.0930,1,6.899071,1.246326,0
713,59899.1007,3,3.249064,2.133093,0
713,59899.1116,4,7.382133,3.388385,0
713,59899.1226,5,-4.677240,9.115748,0
713,59902.0445,2,9.166100,1.426165,0
713,59902.0522,1,8.076466,1.395627,0
713,59902.0598,3,11.330316,2.051576,1
713,59902.0707,4,9.245844,2.876306,0
713,59902.0817,5,0.942024,7.248375,0
713,59904.0584,0,3.223553,2.679078,0
713,59905.0468,0,14.509829,3.098125,0
713,59906.0474,0,5.995616,2.589032,0
713,59907.0480,0,5.440472,2.469325,0
713,59908.0487,0,5.961231,3.348282,0
713,59909.0494,0,10.137896,2.151001,0
713,59910.0590,0,8.248549,2.160179,0
713,59913.2446,2,5.475236,0.822163,1
713,59913.2522,1,6.833441,0.969664,0
713,59913.2599,3,6.275328,1.430679,0
713,59913.2708,4,4.298039,2.311868,0
713,59913.2818,5,3.143612,5.875287,0
713,59916.0544,2,10.529041,1.787002,0
713,59916.0621,1,9.129021,2.415574,0
713,59916.0697,3,5.509865,2.141148,0
713,59916.0806,4,9.827934,2.274502,0
713,59916.0915,5,2.627945,4.551546,0
713,59924.0589,2,5.190053,0.786980,1
713,59924.0665,1,6.531730,0.851491,1
713,59924.0742,3,9.141804,1.210878,1
713,59924.0851,4,9.810373,1.739901,0
713,59924.0961,5,2.349317,4.040898,0
713,59927.0604,2,5.366942,0.863455,0
713,59927.0680,1,4.619713,0.947374,0
713,59927.0756,3,6.296741,1.472587,0
713,59927.0866,4,2.465199,2.073566,0
713,59927.0975,5,-0.702472,6.396966,0
713,59930.0619,2,2.780317,0.759708,1
713,59930.0695,1,4.959312,0.809846,0
713,59930.0771,3,4.033259,1.196190,0
713,59930.0881,4,4.485665,1.901773,0
713,59930.1063,5,6.218721,4.791905,0
713,59933.0632,2,1.830853,0.639458,0
713,59933.0709,1,1.716145,0.707228,0
713,59933.0785,3,4.893567,0.968482,0
713,59933.0944,4,3.197614,1.429430,0
713,59933.1150,5,3.335699,3.523145,0
713,59935.0739,0,0.554208,1.573855,0
713,59936.0735,0,2.584441,1.804314,0
713,59937.0743,0,6.470248,1.848658,0
713,59938.0754,0,0.724684,2.076312,0
713,59939.0808,0,2.375108,2.243821,0
713,59942.0746,2,-0.148046,0.800387,0
713,59942.0889,1,0.648101,0.878962,0
713,59942.0965,3,0.328905,1.080046,0
713,59942.1074,4,-4.550706,1.669870,0
713,59942.1184,5,2.364145,4.108390,0
713,59945.0770,2,-3.002108,1.474453,0
713,59945.0846,1,-1.725136,2.059556,0
713,59945.0922,3,-1.422123,1.846779,0
713,59945.1032,4,-6.208874,2.036851,0
713,59945.1141,5,-2.945050,4.744831,0
713,60192.2930,2,7.250862,1.972519,0
713,60192.3006,1,7.834616,2.653803,0
713,60192.3082,3,6.543319,2.216304,0
713,60192.3192,4,7.960829,2.695718,0
713,60192.3301,5,3.404367,5.894906,0
713,60195.2343,2,4.326025,1.101823,0
713,60195.2419,1,3.302556,1.100982,1
713,60195.2496,3,5.533146,1.655807,0
713,60195.2605,4,5.854890,2.367182,0
713,60195.2715,5,3.825871,5.774144,0
713,60198.2332,0,2.208139,3.192551,0
713,60199.2358,0,8.620851,2.547614,0
713,60200.2314,0,3.770694,2.643626,0
713,60201.2281,0,6.634655,2.975509,0
713,60202.2255,0,9.813441,2.750465,0
713,60209.2281,2,3.984369,1.161990,0
713,60209.2357,1,1.381281,1.139709,0
713,60209.2433,3,1.715379,1.724909,0
713,60209.2543,4,0.545876,2.622813,0
713,60209.2652,5,6.504875,6.293293,0
713,60212.2147,2,2.873843,1.790648,0
713,60212.2223,1,1.546698,2.668681,0
713,60212.2300,3,5.084908,2.077699,0
713,60212.2409,4,3.087726,2.654123,0
713,60212.2519,5,-2.272981,5.844298,0
713,60223.1948,2,0.405613,1.246678,0
713,60223.2024,1,1.120193,1.277229,0
713,60223.2100,3,1.011539,1.974625,0
713,60223.2210,4,3.507817,2.882992,0
713,60223.2319,5,14.770886,6.656366,0
713,60226.2721,2,1.071414,0.746168,0
713,60226.2797,1,1.648819,0.776689,0
713,60226.2931,3,1.727918,1.133994,0
713,60226.3129,4,-0.916487,1.736045,0
713,60226.3238,5,3.996732,4.304620,0
713,60236.1862,0,4.067199,2.328237,0
713,60238.2696,2,-0.087907,0.758784,0
713,60238.2803,1,-0.829578,0.881391,0
713,60238.2879,3,-0.576265,1.265385,0
713,60238.2988,4,0.329135,1.983817,0
713,60238.3098,5,-4.923808,4.832184,0
713,60241.1342,2,2.333379,1.636026,0
713,60241.1418,1,-0.293893,2.097461,0
713,60241.1495,3,3.571144,1.816384,0
713,60241.1604,4,1.146531,2.471305,0
713,60241.1713,5,-7.436915,5.122927,0
713,60260.0773,0,1.232121,3.040076,0
713,60261.0632,0,2.412768,2.796987,0
713,60262.0637,0,-1.678317,2.631186,0
713,60263.0643,0,-0.390618,2.617705,0
713,60264.0716,0,1.131548,2.522264,0
713,60265.0867,0,-2.189290,2.832789,0
713,60267.0443,2,-2.785007,1.335972,0
713,60267.0519,1,-1.797494,1.524965,0
713,60267.0595,3,-4.881196,1.920069,0
713,60267.0705,4,3.231234,2.734247,0
713,60267.0814,5,-12.699218,5.767424,0
713,60270.1082,2,0.393975,2.344530,0
713,60270.1158,1,2.370688,3.545214,0
713,60270.1234,3,2.489378,2.970400,0
713,60270.1344,4,-7.822262,3.554679,0
713,60270.1453,5,-10.877887,6.829591,0
713,60278.0525,2,-0.607012,0.969379,0
713,60278.0601,1,-0.027766,1.022582,0
713,60278.0677,3,2.024312,1.532588,0
713,60278.0786,4,-2.256550,2.154194,0
713,60278.0896,5,-1.112494,5.843420,0
713,60281.0552,2,-1.695972,1.034572,0
713,60281.0629,1,-1.833499,1.044974,0
713,60281.0705,3,-0.919016,1.470630,0
713,60281.0814,4,-1.391540,2.003621,0
713,60281.0924,5,-3.945375,4.607381,0
713,60284.0557,2,-2.572076,0.783297,0
713,60284.0633,1,-2.387862,0.801296,0
713,60284.0709,3,-5.832908,1.240340,0
713,60284.0819,4,-1.649157,1.857165,0
713,60284.0928,5,8.627832,4.482957,0
713,60287.0577,2,-0.764727,0.906658,0
713,60287.0653,1,-1.477176,0.966182,0
713,60287.0729,3,-0.536819,1.463827,0
713,60287.0839,4,-0.667864,2.361719,0
713,60287.0948,5,-5.286497,6.810267,0
713,60290.0641,0,-1.816348,2.038470,0
713,60291.0599,0,-5.132619,1.989085,0
713,60292.0607,0,-5.080487,1.908693,0
713,60293.0607,0,-5.075594,1.939040,0
713,60294.0616,0,-0.566193,1.833754,0
713,60295.0621,0,-3.857503,2.114682,0
713,60297.1169,2,-4.830737,0.921245,0
713,60297.1245,1,-4.334117,1.068175,0
713,60297.1321,3,-4.767125,1.234546,0
713,60297.1431,4,-4.473659,1.723665,0
713,60297.1540,5,-10.414721,4.258311,0
713,60300.0641,2,-5.492156,1.607434,0
713,60300.0717,1,-3.599649,2.103016,0
713,60300.0793,3,-3.476922,1.725975,0
713,60300.1031,4,-2.745461,2.269754,0
713,60300.1160,5,7.738044,4.946638,0
713,60554.2916,0,-9.100129,2.393532,0
713,60555.2620,0,-8.218450,2.308315,1
713,60556.2548,0,-5.576579,2.770439,0
713,60557.2501,0,-9.173389,2.218352,0
713,60558.2534,0,-13.083604,2.663738,0
713,60559.2490,0,-9.237353,2.428750,0
713,60560.2424,0,-10.050170,3.275514,0
713,60567.2231,2,-8.265152,1.515329,0
713,60567.2308,1,-8.954789,2.105672,1
713,60567.2384,3,-8.418892,1.630414,0
713,60567.2493,4,-12.286801,1.978125,1
713,60567.2603,5,-11.054881,4.445991,0
713,60578.2746,2,-4.951467,1.229683,0
713,60578.2822,1,-7.403615,1.305094,0
713,60578.2898,3,-5.050255,1.839125,1
713,60578.3008,4,-7.385537,2.667687,0
713,60578.3117,5,-6.356452,6.387929,0
713,60581.1779,2,-5.760825,1.288651,1
713,60581.1855,1,-7.428378,1.275975,0
713,60581.1931,3,-6.902376,1.927237,0
713,60581.2041,4,-9.594004,2.818656,0
713,60581.2150,5,-14.211164,6.624023,0
713,60582.2087,0,-11.829331,2.358846,0
713,60583.1842,0,-9.363182,3.042286,0
713,60584.1807,0,-9.220502,2.544668,0
713,60585.1757,0,-3.587870,2.280919,0
713,60586.1765,0,-9.129416,2.146863,0
713,60587.1702,0,-5.876253,2.481174,0
713,60588.1666,0,-9.116284,2.157747,0
713,60593.1682,2,-9.569608,0.985850,1
713,60593.1758,1,-8.809836,1.078624,1
713,60593.1834,3,-9.553467,1.347112,1
713,60593.1944,4,-9.193518,1.914358,1
713,60593.2053,5,-8.280509,4.493694,0
713,60596.1820,2,-8.760753,2.037911,0
713,60596.1896,1,-4.396494,2.863201,0
713,60596.1972,3,-11.907238,2.252078,1
713,60596.2081,4,-4.786119,2.784098,0
713,60596.2191,5,-5.489277,6.255779,0
713,60605.1380,2,-9.696579,0.873996,1
713,60605.1456,1,-11.159884,0.883977,1
713,60605.1532,3,-12.394593,1.305202,1
713,60605.1642,4,-9.511388,1.906236,0
713,60605.1751,5,-6.906372,4.587698,0
713,60608.1308,2,-9.163915,0.800012,1
713,60608.1384,1,-11.715749,0.823976,1
713,60608.1460,3,-11.449253,1.202452,0
713,60608.1569,4,-12.221146,1.749559,1
713,60608.1679,5,-2.633516,4.132709,0
713,60611.1227,2,-10.067919,0.717739,1
713,60611.1303,1,-9.289042,0.761477,1
713,60611.1380,3,-10.801243,1.080986,1
713,60611.1489,4,-11.623042,1.560488,1
713,60611.1599,5,-7.861447,3.710802,0
713,60612.1183,0,-11.605895,1.778605,1
713,60613.1019,0,-11.340659,1.930082,1
713,60614.0960,0,-10.934606,2.143276,1
713,60615.0917,0,-14.735178,2.326417,0
713,60616.0927,0,-12.353376,2.357691,1
713,60617.0896,0,-6.599936,2.023456,0
713,60620.1350,0,-6.110061,2.056073,0
713,60621.1263,2,-5.537477,0.829998,1
713,60621.1339,1,-7.972793,0.877838,1
713,60621.1416,3,-7.565215,1.233034,1
713,60621.1525,4,-6.638791,1.780862,0
713,60621.1635,5,-9.333499,4.242186,0
713,60624.0821,2,-4.490414,1.314625,0
713,60624.0897,1,-5.545699,1.695655,0
713,60624.0974,3,-7.286825,1.600662,0
713,60624.1083,4,-6.478677,2.086655,0
713,60624.1193,5,-7.099849,4.810002,0
713,60627.2801,2,-6.219934,2.530638,0
713,60627.2877,1,-5.039655,3.228468,0
713,60627.2954,3,-0.950650,2.782719,0
713,60627.3063,4,-4.143107,3.379841,0
713,60627.3173,5,0.243241,7.639313,0
713,60632.0400,2,-5.855491,1.492071,0
713,60632.0476,1,-6.847743,1.453141,0
713,60632.0552,3,-9.374930,2.217679,0
713,60632.0662,4,-10.557325,3.237312,0
713,60632.0771,5,-2.921649,7.857955,0
713,60635.0469,2,-6.480945,1.091159,1
713,60635.0545,1,-6.966879,1.087843,1
713,60635.0621,3,-6.185159,1.629475,1
713,60635.0731,4,-5.490345,2.377301,0
713,60635.0840,5,-2.292507,5.646507,0
713,60640.0504,2,-7.312206,0.974580,1
713,60640.0580,1,-7.250492,0.991461,0
713,60640.0656,3,-10.161006,1.479101,1
713,60640.0766,4,-6.631466,2.145102,0
713,60640.0875,5,-10.591419,5.138685,0
713,60643.0609,0,-9.289350,1.992813,1
713,60644.0533,0,-8.482151,2.118450,0
713,60645.0537,0,-2.605739,2.197297,1
713,60646.0548,0,-8.104684,2.135281,0
713,60647.0546,0,-7.506279,2.275638,0
713,60648.0553,0,-10.602926,1.838902,1
713,60649.0561,0,-12.232555,1.708795,0
713,60651.1265,2,-9.331477,0.865811,1
713,60651.1451,1,-10.061421,0.932510,1
713,60651.1527,3,-9.335849,1.315029,1
713,60651.1637,4,-6.167844,1.952829,0
713,60651.1746,5,-10.171921,4.815349,0
713,60654.0597,2,-9.607999,1.647062,1
713,60654.0673,1,-6.258916,2.222855,0
713,60654.0749,3,-9.524345,1.901351,0
713,60654.0859,4,-9.513783,2.389906,0
713,60654.0968,5,-9.744430,5.404162,0
713,60662.1451,2,-5.698765,1.334831,0
713,60662.1527,1,-4.317381,2.017339,0
713,60662.1603,3,-6.093997,1.675434,0
713,60662.1713,4,-5.760686,2.093239,0
713,60662.1822,5,-5.400730,4.721159,0
713,60665.0637,2,-6.826318,0.676851,1
713,60665.0713,1,-6.498077,0.740772,1
713,60665.0789,3,-5.938825,1.048616,1
713,60665.1017,4,-6.440791,1.543503,1
713,60665.1156,5,-13.727009,3.770338,0
713,60668.0647,2,-6.938087,0.920544,1
713,60668.0723,1,-8.995543,0.954973,1
713,60668.0893,3,-10.263328,1.437371,1
713,60668.1055,4,-5.455149,2.164149,0
713,60668.1165,5,-9.138229,5.354884,0
713,60671.0655,0,-10.165054,1.726118,1
713,60672.0693,0,-10.828177,1.470152,1
713,60673.0745,0,-12.148479,2.243120,0
713,60674.0798,0,-8.669188,2.216094,0
730,59798.3205,2,1.177371,1.364300,0
730,59798.3281,1,2.320849,1.159247,0
730,59798.3357,3,2.939447,1.771328,0
730,59798.3466,4,2.128097,2.610659,0
730,59798.3576,5,-12.809639,5.380097,0
730,59801.3553,2,0.111235,2.460576,0
730,59801.3629,1,-3.393080,3.564052,0
730,59801.3705,3,-1.899219,2.292693,0
730,59801.3815,4,2.284906,2.523534,0
730,59801.3924,5,5.203419,5.395980,0
730,59818.2740,0,-2.342200,1.801066,0
730,59819.2541,0,3.380978,2.469600,0
730,59820.2522,0,-2.230815,1.915426,0
730,59821.2478,0,1.159034,2.461736,0
730,59822.2433,0,5.942166,2.901580,0
730,59823.2659,0,-0.180970,2.714361,0
730,59826.3105,2,0.521923,0.925337,0
730,59826.3181,1,-1.421768,0.929596,0
730,59826.3258,3,0.972355,1.513987,0
730,59826.3367,4,-0.570261,2.162375,0
730,59826.3477,5,-2.301237,5.548611,0
730,59842.2456,2,0.156290,0.853800,0
730,59842.2532,1,-0.567360,0.819375,0
730,59842.2608,3,-0.251899,1.325633,0
730,59842.2718,4,2.019500,2.173066,0
730,59842.2827,5,10.142254,6.086383,0
730,59851.1792,0,-1.472170,2.597541,0
730,59854.1485,2,0.368931,1.230250,0
730,59854.1563,1,0.664051,1.345911,0
730,59854.1640,3,3.201455,1.909905,0
730,59854.1750,4,3.012713,2.778862,0
730,59854.1860,5,3.750187,5.803461,0
730,59857.1408,2,1.076537,2.141015,0
730,59857.1485,1,5.693109,2.937809,0
730,59857.1563,3,-2.640246,2.100464,0
730,59857.1673,4,0.402461,2.684283,0
730,59857.1782,5,-4.509360,6.643411,0
730,59867.1112,2,-0.449365,1.088300,0
730,59867.1189,1,0.282022,0.981426,0
730,59867.1267,3,0.026595,1.544194,0
730,59867.1377,4,0.956947,2.364042,0
730,59867.1487,5,2.724518,5.845339,0
730,59870.1049,2,1.070328,0.976301,0
730,59870.1126,1,0.511964,0.828288,0
730,59870.1204,3,-0.505236,1.377689,0
730,59870.1314,4,0.251195,2.289763,0
730,59870.1424,5,4.119082,5.293428,0
730,59873.0971,2,-0.211154,1.045822,0
730,59873.1049,1,-1.287062,1.048773,0
730,59873.1126,3,-1.557674,1.446841,0
730,59873.1236,4,-0.739414,2.074561,0
730,59873.1346,5,5.151175,4.601235,0
730,59874.1461,0,0.412505,1.627923,0
730,59875.0995,0,-2.200486,2.037783,0
730,59876.0980,0,-2.931559,2.450620,0
730,59877.0976,0,2.024089,1.789397,0
730,59878.0964,0,-1.250103,2.029308,0
730,59879.0895,0,-0.671039,1.877854,0
730,59880.1017,0,0.189355,1.384724,0
730,59884.1760,2,-0.108323,0.771566,0
730,59884.1836,1,-1.113737,0.892852,0
730,59884.1913,3,-0.427802,1.142666,0
730,59884.2022,4,1.402694,1.614300,0
730,59884.2132,5,-1.060647,4.220271,0
730,59887.2856,2,1.474370,1.835391,0
730,59887.2933,1,-3.203188,2.221069,0
730,59887.3009,3,-5.435799,2.359130,0
730,59887.3118,4,0.192088,3.154000,0
730,59887.3228,5,-1.082339,7.966248,0
730,59896.1307,2,1.145174,0.826742,0
730,59896.1384,1,-0.032153,0.707979,0
730,59896.1460,3,-0.357363,1.398256,0
730,59896.1569,4,-2.788487,2.198583,0
730,59896.1679,5,2.883538,5.962979,0
730,59899.1519,2,-1.456884,1.371527,0
730,59899.1595,1,-0.707794,1.396877,0
730,59899.1672,3,5.298447,2.388603,0
730,59899.1781,4,6.412822,3.720956,0
730,59899.1891,5,1.091714,7.924479,0
730,59902.1384,2,-0.887660,1.191683,0
730,59902.1460,1,0.168580,1.085883,0
730,59902.1537,3,-1.369444,1.950019,0
730,59902.1646,4,0.303218,2.770533,0
730,59902.1755,5,11.777126,7.044582,0
730,59904.1053,0,-0.620050,2.301550,0
730,59905.0555,0,0.922903,2.527480,0
730,59906.0562,0,-1.153271,2.043133,0
730,59907.0567,0,0.449173,1.764913,0
730,59908.0681,0,0.837362,2.710272,0
730,59909.0582,0,-0.985495,1.850359,0
730,59910.0503,0,-0.355463,1.880359,0
730,59914.0526,2,-0.833646,1.445693,0
730,59914.0602,1,0.370377,1.970406,0
730,59914.0678,3,-1.719942,1.692403,0
730,59914.0788,4,1.004354,2.274112,0
730,59914.0897,5,0.138586,5.367689,0
730,59924.1060,2,-0.787230,1.137160,0
730,59924.1136,1,-1.572903,1.584968,0
730,59924.1212,3,0.555294,1.735223,0
730,59924.1322,4,-2.475216,2.533980,0
730,59924.1431,5,-0.816748,5.644574,0
730,59927.1074,2,-0.474538,1.196533,0
730,59927.1151,1,0.973025,1.142775,0
730,59927.1227,3,-2.039601,1.598035,0
730,59927.1336,4,-1.036243,2.305239,0
730,59927.1446,5,-3.438392,5.903537,0
730,59930.1236,2,-0.114812,0.945627,0
730,59930.1312,1,0.475511,0.835235,0
730,59930.1388,3,0.226621,1.287869,0
730,59930.1498,4,-3.755495,2.037717,0
730,59930.1607,5,2.542647,5.343603,0
730,59933.1249,2,0.603719,0.695106,0
730,59933.1325,1,-0.226574,0.698751,0
730,59933.1401,3,0.106692,1.273440,0
730,59933.1511,4,0.993756,1.919590,0
730,59933.1620,5,5.318815,5.441072,0
730,59934.0638,0,1.190260,1.159169,0
730,59935.0646,0,-0.320948,1.132809,0
730,59936.0642,0,-1.230814,1.533033,0
730,59937.0650,0,-0.751357,1.654440,0
730,59938.0647,0,-3.109122,2.015928,0
730,59939.0650,0,1.571790,2.219707,0
730,60165.3032,2,-0.502432,1.200698,0
730,60165.3109,1,-2.832010,1.356671,0
730,60165.3186,3,1.843434,1.524752,0
730,60165.3295,4,3.196369,2.029726,0
730,60165.3405,5,-8.889149,4.793297,0
730,60168.2892,2,-0.176546,0.837958,0
730,60168.2970,1,0.201754,0.793672,0
730,60168.3047,3,0.369397,1.305260,0
730,60168.3157,4,-2.235131,2.071596,0
730,60168.3267,5,-5.047883,4.827778,0
730,60176.2820,0,1.753881,2.391554,0
730,60177.2726,0,-1.260694,2.790846,0
730,60181.4088,2,1.103341,0.929294,0
730,60181.4164,1,1.391831,1.272189,0
730,60181.4232,3,-4.766650,3.249354,0
730,60183.2660,2,-2.848838,1.924783,0
730,60183.2736,1,-3.133007,2.955767,0
730,60183.2812,3,2.159384,2.557742,0
730,60183.2922,4,-5.836310,3.003132,0
730,60183.3031,5,-8.716421,6.192660,0
730,60195.2812,2,-0.162802,0.937681,0
730,60195.2888,1,0.864197,1.028533,0
730,60195.2964,3,-2.294667,1.631539,0
730,60195.3073,4,-2.693345,2.345676,0
730,60195.3183,5,-0.570636,6.289552,0
730,60198.2690,0,1.026459,2.192766,0
730,60199.2186,0,0.527036,2.176080,0
730,60200.2139,0,-1.516695,2.611164,0
730,60201.2072,0,0.150572,2.513518,0
730,60202.2089,0,-1.982165,2.505382,0
730,60209.1811,2,-0.462435,1.572888,0
730,60209.1888,1,2.153122,1.421764,0
730,60209.1965,3,0.765511,1.838664,0
730,60209.2075,4,-1.591831,2.917069,0
730,60209.2184,5,0.939232,6.712256,0
730,60212.1675,2,-2.669531,1.944909,0
730,60212.1753,1,3.706729,2.772595,0
730,60212.1830,3,-1.340184,2.303333,0
730,60212.1941,4,-3.525083,2.598093,0
730,60212.2050,5,7.831807,5.678200,0
730,60223.2416,2,1.328195,1.188578,0
730,60223.2493,1,-0.298775,1.249490,0
730,60223.2569,3,2.987647,1.656098,0
730,60223.2678,4,4.241424,2.481011,0
730,60223.2788,5,9.333996,5.811805,0
730,60226.3337,2,-0.679090,0.949060,0
730,60226.3413,1,1.282680,0.952563,0
730,60226.3489,3,0.970081,1.800158,0
730,60226.3599,4,1.366870,3.079182,0
730,60226.3708,5,-4.552550,8.128254,0
730,60238.3197,2,-0.599053,1.058323,0
730,60238.3273,1,0.734427,1.161131,0
730,60238.3349,3,2.553997,2.179708,0
730,60238.3459,4,1.377842,3.840058,0
730,60238.3568,5,-19.159811,11.281384,0
730,60241.0870,2,0.823192,2.008905,0
730,60241.0948,1,3.386674,3.088520,0
730,60241.1025,3,0.043122,2.453789,0
730,60241.1136,4,-1.052531,2.925313,0
730,60241.1245,5,-8.036972,5.953956,0
730,60250.1708,2,-0.962673,2.112349,0
730,60250.1957,1,0.580816,2.829899,0
730,60250.2034,3,6.845217,2.940232,0
730,60250.2143,4,0.204509,3.994097,0
730,60250.2253,5,1.290714,7.954757,0
730,60261.1296,0,1.237353,2.094631,0
730,60262.0550,0,3.469973,2.338792,0
730,60263.0556,0,2.352035,1.998888,0
730,60264.0559,0,-2.396658,2.192123,0
730,60265.0780,0,3.070599,2.439756,0
730,60268.0449,2,-0.465229,1.410433,0
730,60268.0525,1,0.174091,1.861911,0
730,60268.0601,3,0.538344,2.137292,0
730,60268.0711,4,-3.556071,2.741589,0
730,60268.0820,5,-3.639747,6.760314,0
730,60278.0993,2,2.521567,1.652593,0
730,60278.1069,1,-2.468382,2.289480,0
730,60278.1145,3,-1.407348,2.177464,0
730,60278.1255,4,3.475310,3.068326,0
730,60278.1364,5,8.474236,7.658961,0
730,60281.1023,2,-1.139811,0.801878,0
730,60281.1099,1,-1.247972,0.800422,0
730,60281.1175,3,-1.347594,1.374244,0
730,60281.1285,4,-0.890039,1.996277,0
730,60281.1394,5,2.285095,5.189152,0
730,60284.1027,2,-0.679968,0.840813,0
730,60284.1104,1,-0.530991,0.766401,0
730,60284.1180,3,-1.148911,1.164351,0
730,60284.1289,4,0.493227,1.707686,0
730,60284.1399,5,-4.683412,4.445528,0
730,60287.1047,2,0.288175,1.256500,0
730,60287.1123,1,-2.067724,1.090506,0
730,60287.1200,3,-0.846692,1.802521,0
730,60287.1309,4,-5.051833,2.972183,0
730,60287.1418,5,-13.252449,7.029711,0
730,60290.0761,0,-3.000368,1.929932,0
730,60291.0689,0,1.081815,1.681175,0
730,60292.0699,0,2.211185,1.882060,0
730,60293.0699,0,0.182480,1.575780,0
730,60294.0708,0,1.605139,1.418435,0
730,60532.3019,2,20.994711,1.047298,1
730,60532.3097,1,1.504146,0.960956,0
730,60532.3173,3,31.523088,1.569497,1
730,60532.3282,4,41.159981,2.310168,1
730,60532.3392,5,46.795868,5.458707,1
730,60535.2802,2,20.880348,1.426747,0
730,60535.2879,1,2.271271,1.348233,0
730,60535.2957,3,30.361010,2.107024,1
730,60535.3068,4,40.715591,3.043571,1
730,60535.3177,5,47.310059,7.197146,1
730,60538.2826,2,19.450977,1.865142,1
730,60538.2903,1,3.462672,2.695356,0
730,60538.2980,3,33.572102,1.944897,1
730,60538.3089,4,38.518837,2.334413,1
730,60538.3199,5,40.146099,5.039364,1
730,60554.2651,0,0.190944,2.266587,0
730,60555.2411,0,0.098122,2.049620,0
730,60556.2370,0,-0.253067,2.551228,0
730,60557.2322,0,-2.200897,1.848830,0
730,60558.2332,0,-3.459960,2.511074,0
730,60559.2274,0,0.328893,2.224590,0
730,60560.2268,0,2.453341,3.110694,0
730,60567.3291,2,15.044784,0.951184,1
730,60567.3368,1,-0.142653,1.050350,0
730,60567.3444,3,18.416132,1.262663,1
730,60567.3553,4,28.234451,1.676854,1
730,60567.3663,5,31.623583,4.281011,1
730,60580.1736,2,12.164557,1.463993,1
730,60580.1813,1,2.065962,1.402610,0
730,60580.1889,3,10.053763,2.203885,1
730,60580.1999,4,19.975168,3.213686,1
730,60580.2108,5,24.093925,7.662856,0
730,60582.1681,0,-0.473370,2.422541,0
730,60583.1640,0,-3.070249,3.006098,0
730,60584.1591,0,0.970706,2.362254,0
730,60585.1601,0,-0.533032,1.881978,0
730,60586.1564,0,-0.049936,1.830623,0
730,60587.1540,0,-2.202578,2.138732,0
730,60588.1461,0,1.361049,1.798501,0
730,60593.1209,2,6.307311,1.155241,1
730,60593.1287,1,0.462838,1.313489,0
730,60593.1365,3,8.789671,1.545997,1
730,60593.1476,4,10.031554,2.179338,0
730,60593.1585,5,10.850924,5.118365,0
730,60596.1351,2,8.231540,2.483539,0
730,60596.1427,1,1.623348,3.539990,0
730,60596.1504,3,9.673650,2.658536,0
730,60596.1613,4,8.778720,3.238315,0
730,60596.1723,5,10.870938,7.199404,0
730,60605.0908,2,5.607800,0.980471,1
730,60605.0986,1,-0.587054,0.918425,0
730,60605.1063,3,6.155015,1.461859,1
730,60605.1174,4,7.274523,2.152366,0
730,60605.1283,5,2.950838,5.145659,0
730,60608.0836,2,5.938226,0.889949,0
730,60608.0913,1,0.898013,0.825604,0
730,60608.0991,3,5.509429,1.330889,0
730,60608.1101,4,9.166319,1.962560,0
730,60608.1211,5,3.346682,4.667600,0
730,60611.0756,2,2.112415,0.773398,1
730,60611.0833,1,0.247475,0.704158,0
730,60611.0911,3,1.898379,1.172223,0
730,60611.1021,4,4.244992,1.745410,0
730,60611.1130,5,6.172510,4.175368,0
730,60612.0813,0,1.228119,1.461220,0
730,60613.0818,0,1.540095,1.541647,0
730,60614.0803,0,1.231758,1.758784,0
730,60615.0761,0,-0.502854,1.998764,0
730,60616.0769,0,-2.247711,2.024976,0
730,60617.0737,0,-1.035569,1.574103,0
730,60620.1444,0,-1.018565,1.722706,0
730,60621.2673,2,2.330264,0.944892,0
730,60621.2749,1,-0.153496,0.950369,0
730,60621.2825,3,3.589653,1.651967,0
730,60621.2934,4,1.950011,2.746167,0
730,60621.3044,5,6.258384,7.527862,0


================================================
FILE: examples/data/plasticc_training_set_metadata_1k.csv
================================================
object_id,ra,decl,gal_l,gal_b,ddf,hostgal_specz,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,target
615,349.046051,-61.943836,320.796530,-51.753706,1,0.0000,0.0000,0.0000,nan,0.017,92
713,53.085938,-27.784405,223.525509,-54.460748,1,1.8181,1.6267,0.2552,45.4063,0.007,88
730,33.574219,-6.579593,170.455585,-61.548219,1,0.2320,0.2262,0.0157,40.2561,0.021,42
745,0.189873,-45.586655,328.254458,-68.969298,1,0.3037,0.2813,1.1523,40.7951,0.007,90
1124,352.711273,-63.823658,316.922299,-51.059403,1,0.1934,0.2415,0.0176,40.4166,0.024,90
1227,35.683594,-5.379379,171.992947,-59.253501,1,0.0000,0.0000,0.0000,nan,0.020,65
1598,347.846710,-64.760857,318.929827,-49.143596,1,0.1352,0.1820,0.0304,39.7279,0.019,90
1632,348.595886,-63.072620,320.023289,-50.713060,1,0.6857,0.7014,0.0100,43.1524,0.021,42
1920,149.414062,3.433834,234.919132,42.245550,1,0.3088,0.3229,0.3360,41.1401,0.027,90
1926,149.414062,1.940072,236.565366,41.393323,1,0.0000,0.0000,0.0000,nan,0.018,65
2072,0.965665,-46.375080,325.845907,-68.579427,1,0.1516,0.1900,0.0104,39.8317,0.007,90
2103,346.500000,-62.320400,321.951129,-50.736054,1,0.1695,0.5409,0.2283,42.4667,0.020,42
2300,359.446716,-44.201530,331.730015,-69.805709,1,0.2360,2.7474,0.5335,46.7959,0.010,42
2330,359.805206,-46.768478,327.135979,-67.829903,1,0.4541,0.5736,0.2827,42.6207,0.011,90
2624,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,65
2677,53.964844,-28.630989,225.142950,-53.813613,1,0.0000,0.0000,0.0000,nan,0.009,16
2922,352.398651,-62.696659,318.017427,-51.967966,1,0.1539,0.1469,0.0094,39.2171,0.020,67
3041,346.130127,-63.072620,321.423103,-50.042305,1,0.1069,0.1274,0.0198,38.8800,0.020,67
3285,150.820312,1.641510,237.994507,42.358984,1,0.1610,0.1818,0.0079,39.7258,0.020,42
3423,349.615387,-63.636005,318.927246,-50.506542,1,1.9876,1.1213,0.1591,44.4078,0.018,95
3489,150.117188,2.836105,236.124718,42.483719,1,1.1330,1.4377,0.2168,45.0753,0.016,88
3910,0.589520,-47.161343,325.385896,-67.769893,1,0.1969,2.6766,0.5926,46.7274,0.009,62
4088,0.965665,-46.375080,325.845907,-68.579427,1,0.4833,0.4644,0.0321,42.0691,0.007,88
4132,359.811707,-45.191612,329.485675,-69.150905,1,0.0561,0.0556,0.0301,36.9750,0.010,42
4171,2.097458,-45.783966,324.737840,-69.478613,1,0.0000,0.0000,0.0000,nan,0.011,16
4173,152.050781,3.284369,237.157374,44.318466,1,0.5149,0.5512,0.0221,42.5158,0.019,15
4220,358.648071,-46.375080,329.462659,-67.716008,1,0.1197,0.1322,0.3351,38.9679,0.009,42
4389,151.699219,3.583322,236.533224,44.205648,1,0.2333,0.2205,0.9667,40.1939,0.016,90
4595,349.615387,-63.636005,318.927246,-50.506542,1,0.5919,0.5995,0.0127,42.7370,0.018,90
4819,35.332031,-5.979157,172.286722,-59.931743,1,0.3053,0.2870,0.0076,40.8445,0.022,90
5527,347.861847,-61.943836,321.519104,-51.424048,1,0.1315,0.2487,0.8604,40.4896,0.017,42
6180,33.222656,-4.780192,167.515653,-60.396584,1,0.3201,0.2685,0.5211,40.6793,0.018,90
6266,0.929752,-44.597992,328.531426,-70.083244,1,0.0000,0.0000,0.0000,nan,0.011,65
6762,348.595886,-63.072620,320.023289,-50.713060,1,0.3863,0.3983,0.0132,41.6735,0.021,90
6947,34.277344,-5.679190,170.314930,-60.410322,1,0.5680,0.5667,0.0181,42.5888,0.020,90
7033,52.207031,-28.291550,224.208534,-55.300157,1,0.0826,0.0850,0.0073,37.9414,0.007,42
7164,347.861847,-61.943836,321.519104,-51.424048,1,0.4299,0.4245,0.0288,41.8371,0.017,90
7315,2.071130,-45.191612,325.606223,-69.989264,1,0.1330,0.1337,0.0171,38.9942,0.011,88
7409,352.398651,-62.696659,318.017427,-51.967966,1,3.4451,0.5176,1.2609,42.3516,0.020,88
7566,359.446716,-44.201530,331.730015,-69.805709,1,0.0000,0.0000,0.0000,nan,0.010,16
7698,347.013428,-62.508568,321.472056,-50.735330,1,0.2628,0.1876,0.0216,39.8011,0.018,90
7703,53.085938,-28.122234,224.100909,-54.509752,1,0.0830,0.0820,0.2257,37.8568,0.007,62
7756,149.414062,2.238686,236.239766,41.565558,1,0.0000,0.0000,0.0000,nan,0.017,16
8328,1.694561,-45.191612,326.278557,-69.858253,1,0.3779,0.4808,0.2970,42.1592,0.011,90
8688,32.695312,-4.929937,166.868469,-60.841230,1,0.0000,0.0000,0.0000,nan,0.018,65
8745,349.966217,-62.696659,319.542989,-51.376556,1,0.6276,0.6136,0.0129,42.7983,0.021,90
8784,34.101562,-5.829153,170.247753,-60.638325,1,0.0000,0.0000,0.0000,nan,0.019,16
9006,34.277344,-5.079716,169.526841,-59.956640,1,0.0000,0.0000,0.0000,nan,0.019,65
9172,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,65
9184,0.949367,-45.586655,326.991548,-69.251686,1,1.4031,1.2719,0.4971,44.7463,0.013,88
9203,51.855469,-27.953188,223.543603,-55.561470,1,0.2138,0.1111,0.0626,38.5591,0.008,90
9543,352.132874,-63.636005,317.424173,-51.095855,1,0.0000,0.0000,0.0000,nan,0.021,65
9936,32.871094,-4.780192,166.959493,-60.615132,1,0.1633,0.0719,0.0389,37.5580,0.017,42
9985,150.820312,3.732834,235.666318,43.572109,1,0.0000,0.0000,0.0000,nan,0.016,65
10321,358.312500,-44.993881,332.185785,-68.685906,1,1.0833,1.1162,0.1020,44.3954,0.009,95
10337,54.667969,-27.615883,223.610785,-53.050840,1,0.6830,0.6725,0.0089,43.0404,0.009,90
10349,34.980469,-6.279288,172.180075,-60.389399,1,0.0000,0.0000,0.0000,nan,0.023,65
10478,52.910156,-27.953188,223.774083,-54.639214,1,0.5552,0.2233,0.2002,40.2248,0.007,90
10586,358.636353,-46.768478,328.890146,-67.388837,1,0.6052,0.6017,0.0153,42.7467,0.008,88
10757,52.910156,-26.276812,220.926149,-54.363918,1,0.1699,0.1711,0.0185,39.5801,0.008,52
10796,52.910156,-25.944481,220.366350,-54.301439,1,0.0000,0.0000,0.0000,nan,0.010,65
10798,351.299988,-62.320400,319.038597,-52.026867,1,0.1778,0.1872,0.0121,39.7959,0.018,42
11165,150.996094,2.985506,236.647967,43.287350,1,0.0000,0.0000,0.0000,nan,0.020,16
11359,349.966217,-62.696659,319.542989,-51.376556,1,0.1529,0.1415,0.0072,39.1281,0.021,42
11507,53.085938,-28.122234,224.100909,-54.509752,1,0.3312,0.5095,0.0718,42.3102,0.007,90
11770,346.130127,-63.072620,321.423103,-50.042305,1,0.1415,0.2171,0.4350,40.1560,0.020,62
11773,150.644531,3.583322,235.698235,43.342784,1,0.2207,0.5279,0.1679,42.4027,0.018,52
11931,149.589844,3.583322,234.885369,42.474696,1,0.0000,0.0000,0.0000,nan,0.024,65
11978,358.648071,-46.375080,329.462659,-67.716008,1,0.4920,0.4605,0.0179,42.0472,0.009,90
12695,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,92
12872,347.861847,-61.943836,321.519104,-51.424048,1,0.0000,0.0000,0.0000,nan,0.017,65
13079,151.699219,3.583322,236.533224,44.205648,1,0.2019,2.4470,1.0434,46.4913,0.016,90
13138,346.655182,-63.260487,320.952196,-50.040935,1,0.0756,0.5192,0.2158,42.3596,0.019,52
13194,53.789062,-27.784405,223.685697,-53.845803,1,0.5195,0.5624,0.2843,42.5685,0.009,90
13459,150.117188,2.836105,236.124718,42.483719,1,0.3495,0.3449,0.6556,41.3068,0.016,90
13482,33.750000,-4.630479,168.146242,-59.949072,1,0.2929,0.3115,0.0205,41.0501,0.019,90
13504,1.363636,-46.768478,324.669342,-68.371416,1,0.4469,0.3816,0.0766,41.5643,0.008,90
14080,150.996094,4.181528,235.291975,43.970869,1,0.0000,0.0000,0.0000,nan,0.015,65
14156,53.085938,-27.111860,222.384291,-54.355086,1,0.0000,0.0000,0.0000,nan,0.007,65
14279,54.667969,-27.615883,223.610785,-53.050840,1,0.3434,0.5728,0.4518,42.6167,0.009,52
14398,2.071130,-45.191612,325.606223,-69.989264,1,0.2812,0.2634,1.0581,40.6310,0.011,90
14539,150.644531,3.583322,235.698235,43.342784,1,0.2882,0.2359,0.0434,40.3590,0.018,88
14553,359.805206,-46.768478,327.135979,-67.829903,1,1.1897,1.1667,0.1717,44.5143,0.011,95
14601,32.695312,-4.929937,166.868469,-60.841230,1,0.3837,0.3653,0.2005,41.4527,0.018,90
14674,33.750000,-4.630479,168.146242,-59.949072,1,0.2012,0.0567,0.4176,37.0171,0.019,90
14983,349.615387,-63.636005,318.927246,-50.506542,1,0.3391,0.3238,0.0255,41.1476,0.018,90
15002,349.046051,-61.943836,320.796530,-51.753706,1,0.3409,0.3512,0.0531,41.3530,0.017,90
15251,32.871094,-4.780192,166.959493,-60.615132,1,0.4653,2.3270,0.6097,46.3585,0.017,90
15475,351.382965,-64.011238,317.574052,-50.604657,1,0.0000,0.0000,0.0000,nan,0.023,65
15626,346.130127,-63.072620,321.423103,-50.042305,1,0.0000,0.0000,0.0000,nan,0.020,16
15674,0.965665,-46.375080,325.845907,-68.579427,1,0.2927,0.2727,0.3286,40.7172,0.007,90
15700,359.415588,-46.768478,327.729895,-67.686097,1,0.0000,0.0000,0.0000,nan,0.009,16
15718,51.855469,-27.953188,223.543603,-55.561470,1,0.1193,2.3179,0.7672,46.3482,0.008,52
15845,53.789062,-27.784405,223.685697,-53.845803,1,0.3174,0.3471,0.8216,41.3232,0.009,90
15968,149.414062,2.238686,236.239766,41.565558,1,0.3509,0.4729,0.4544,42.1164,0.017,90
16339,51.328125,-27.447618,222.535046,-55.950727,1,0.0000,0.0000,0.0000,nan,0.013,16
16349,150.820312,3.134927,236.341348,43.230123,1,0.0000,0.0000,0.0000,nan,0.016,16
16463,151.699219,3.583322,236.533224,44.205648,1,0.2023,0.1805,0.0254,39.7082,0.016,90
16496,359.415588,-46.768478,327.729895,-67.686097,1,0.3391,0.3895,0.2635,41.6162,0.009,52
16802,53.437500,-29.142223,225.908120,-54.336118,1,0.3145,0.3319,0.0234,41.2094,0.008,90
16983,150.117188,3.732834,235.120533,42.993809,1,0.2899,0.2762,0.1879,40.7495,0.020,90
17094,52.207031,-28.291550,224.208534,-55.300157,1,0.0000,0.0000,0.0000,nan,0.007,16
17172,53.437500,-29.142223,225.908120,-54.336118,1,0.0000,0.0000,0.0000,nan,0.008,16
17285,148.710938,2.836105,235.050801,41.328739,1,0.3073,0.3057,0.0484,41.0025,0.031,90
17366,349.285706,-62.884678,319.786163,-51.046461,1,0.2387,0.2024,0.0247,39.9853,0.018,90
17370,0.949367,-45.586655,326.991548,-69.251686,1,0.3138,0.3391,0.4176,41.2636,0.013,62
17515,52.207031,-28.630989,224.800211,-55.343637,1,0.3577,0.3487,0.0073,41.3345,0.009,90
18029,359.415588,-46.768478,327.729895,-67.686097,1,0.3525,0.3609,0.0112,41.4219,0.009,90
18507,352.711273,-63.823658,316.922299,-51.059403,1,0.3755,0.3457,0.0230,41.3125,0.024,88
18556,51.855469,-26.276812,220.627031,-55.293792,1,0.0000,0.0000,0.0000,nan,0.014,6
18645,358.636353,-46.768478,328.890146,-67.388837,1,0.1640,2.3025,1.1022,46.3306,0.008,62
18706,34.277344,-5.679190,170.314930,-60.410322,1,0.1706,0.1766,0.0158,39.6556,0.020,62
18937,348.595886,-63.072620,320.023289,-50.713060,1,0.2142,0.2222,0.0102,40.2123,0.021,90
18952,151.699219,3.583322,236.533224,44.205648,1,0.2800,0.2658,1.1944,40.6541,0.016,90
19154,351.382965,-64.011238,317.574052,-50.604657,1,0.2354,2.4138,0.5022,46.4553,0.023,67
19213,1.753247,-46.768478,324.030235,-68.498041,1,0.1254,0.1484,0.0086,39.2403,0.014,62
19866,359.814819,-44.399834,330.775011,-69.801007,1,0.2608,0.2877,0.0235,40.8505,0.009,90
20567,351.259003,-64.386185,317.344860,-50.255113,1,0.1549,0.1481,0.2206,39.2350,0.020,62
20934,348.908447,-63.823658,319.169886,-50.176186,1,0.0999,2.5704,1.2137,46.6209,0.018,42
21335,33.574219,-5.379379,168.838090,-60.637536,1,0.1542,0.2082,0.4220,40.0542,0.017,90
22184,358.312500,-44.993881,332.185785,-68.685906,1,0.3508,0.3850,0.6064,41.5869,0.009,90
22574,150.996094,2.985506,236.647967,43.287350,1,0.0000,0.0000,0.0000,nan,0.020,16
22901,151.171875,1.342993,238.602520,42.464379,1,0.2581,0.2502,0.0061,40.5039,0.026,90
23116,53.261719,-27.615883,223.280041,-54.281374,1,0.8237,0.7520,0.0300,43.3376,0.006,15
23127,149.414062,3.433834,234.919132,42.245550,1,0.3221,0.4025,0.7933,41.7004,0.027,52
23299,33.222656,-4.780192,167.515653,-60.396584,1,0.5869,0.5400,0.0151,42.4624,0.018,88
23373,150.117188,3.732834,235.120533,42.993809,1,0.5442,0.5636,0.2043,42.5744,0.020,88
23396,359.811707,-45.191612,329.485675,-69.150905,1,0.5667,0.6192,0.1193,42.8220,0.010,90
23409,348.595886,-63.072620,320.023289,-50.713060,1,0.1407,0.1392,0.0136,39.0882,0.021,52
23539,34.277344,-5.079716,169.526841,-59.956640,1,0.4550,0.2524,0.3112,40.5254,0.019,95
23795,51.855469,-26.276812,220.627031,-55.293792,1,0.0000,0.0000,0.0000,nan,0.014,65
23822,2.457983,-45.389202,324.632685,-69.945696,1,0.2411,0.2420,0.9270,40.4218,0.011,52
23848,33.925781,-5.979157,170.179895,-60.866303,1,0.3316,0.3185,1.0181,41.1057,0.022,90
23857,151.699219,3.583322,236.533224,44.205648,1,0.2988,0.4769,0.0894,42.1379,0.016,90
23931,32.695312,-4.929937,166.868469,-60.841230,1,0.6282,0.6337,0.0073,42.8832,0.018,88
24193,152.050781,2.985506,237.495952,44.143927,1,2.0958,1.3937,0.2518,44.9919,0.019,88
24236,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,65
24592,349.966217,-62.696659,319.542989,-51.376556,1,0.2901,0.2846,0.0249,40.8234,0.021,90
24849,53.085938,-27.111860,222.384291,-54.355086,1,0.0000,0.0000,0.0000,nan,0.007,16
24903,52.031250,-26.443335,220.963669,-55.168557,1,0.0000,0.0000,0.0000,nan,0.014,65
24947,150.117188,2.238686,236.784618,42.139082,1,0.4723,0.4521,0.0193,41.9998,0.016,90
24989,34.804688,-5.829153,171.307861,-60.174401,1,0.4468,0.4763,0.0117,42.1349,0.023,90
25003,359.814819,-44.399834,330.775011,-69.801007,1,0.3137,0.2996,0.0218,40.9523,0.009,90
25039,346.562500,-63.448284,320.824720,-49.866957,1,0.3161,0.2675,1.1577,40.6696,0.021,90
25474,151.523438,3.134927,236.900695,43.803170,1,0.5236,0.5626,0.0155,42.5697,0.019,90
25529,358.312500,-44.993881,332.185785,-68.685906,1,0.2835,0.5789,0.2180,42.6448,0.009,90
25577,348.529419,-61.755440,321.293980,-51.763351,1,0.4028,0.3918,0.0170,41.6314,0.016,90
25783,150.820312,3.134927,236.341348,43.230123,1,0.1040,0.1439,0.0116,39.1669,0.016,42
25920,150.644531,3.583322,235.698235,43.342784,1,0.0000,0.0000,0.0000,nan,0.018,16
25925,35.332031,-5.979157,172.286722,-59.931743,1,1.7327,1.7075,0.1320,45.5358,0.022,88
26161,359.415588,-46.768478,327.729895,-67.686097,1,0.0000,0.0000,0.0000,nan,0.009,92
26338,151.171875,2.537361,237.288526,43.169764,1,0.1892,0.2250,0.0141,40.2436,0.024,62
26352,1.708861,-45.586655,325.688716,-69.520253,1,0.0000,0.0000,0.0000,nan,0.011,65
26401,151.699219,3.583322,236.533224,44.205648,1,0.0000,0.0000,0.0000,nan,0.016,16
26531,351.259003,-64.386185,317.344860,-50.255113,1,2.5314,2.4324,0.2792,46.4755,0.020,88
26660,347.846710,-64.760857,318.929827,-49.143596,1,0.0000,0.0000,0.0000,nan,0.019,65
26783,150.820312,1.641510,237.994507,42.358984,1,0.0000,0.0000,0.0000,nan,0.020,92
27124,351.299988,-62.320400,319.038597,-52.026867,1,0.0000,0.0000,0.0000,nan,0.018,16
27339,51.855469,-26.276812,220.627031,-55.293792,1,0.1432,0.1625,0.0226,39.4561,0.014,90
27941,149.414062,1.940072,236.565366,41.393323,1,0.3632,0.3746,0.0319,41.5166,0.018,90
28220,1.694561,-45.191612,326.278557,-69.858253,1,0.2985,0.3605,0.3149,41.4193,0.011,90
28301,0.189873,-45.586655,328.254458,-68.969298,1,0.3606,0.2852,1.3620,40.8288,0.007,90
28391,351.953644,-62.132156,318.777388,-52.347124,1,0.0000,0.0000,0.0000,nan,0.019,92
28636,51.855469,-28.630989,224.733260,-55.649872,1,0.1743,0.4412,0.3366,41.9364,0.009,67
28843,151.171875,2.537361,237.288526,43.169764,1,0.3664,0.3611,0.0225,41.4234,0.024,90
28915,53.789062,-27.784405,223.685697,-53.845803,1,0.0000,0.0000,0.0000,nan,0.009,16
29088,52.558594,-27.279613,222.538937,-54.845107,1,0.3037,0.3244,0.0203,41.1521,0.008,90
29252,51.855469,-28.630989,224.733260,-55.649872,1,0.1439,0.1421,0.0233,39.1376,0.009,42
29416,1.694561,-45.191612,326.278557,-69.858253,1,0.2168,0.1921,0.0349,39.8588,0.011,90
29420,2.097458,-45.783966,324.737840,-69.478613,1,0.5849,0.5559,0.0102,42.5385,0.011,90
29576,346.655182,-63.260487,320.952196,-50.040935,1,0.2362,2.5224,1.0484,46.5713,0.019,90
29668,151.699219,3.583322,236.533224,44.205648,1,0.1461,0.1584,0.0175,39.3960,0.016,42
29670,1.694561,-45.191612,326.278557,-69.858253,1,0.1135,0.1208,0.0198,38.7544,0.011,62
30066,351.259003,-64.386185,317.344860,-50.255113,1,0.0000,0.0000,0.0000,nan,0.020,65
30172,33.574219,-5.379379,168.838090,-60.637536,1,0.5444,0.5455,0.0094,42.4889,0.017,90
30191,150.117188,2.238686,236.784618,42.139082,1,1.5405,1.3073,0.1521,44.8201,0.016,88
30505,151.171875,2.238686,237.619933,42.994783,1,0.0000,0.0000,0.0000,nan,0.024,16
30545,2.071130,-45.191612,325.606223,-69.989264,1,0.2160,0.2221,0.0338,40.2113,0.011,90
30576,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,65
30673,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,16
30895,349.429535,-62.508568,320.039643,-51.393745,1,0.0000,0.0000,0.0000,nan,0.020,16
31033,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,65
31100,1.694561,-45.191612,326.278557,-69.858253,1,0.3614,0.3471,1.2589,41.3231,0.011,90
31310,149.238281,3.882372,234.283829,42.351155,1,0.0000,0.0000,0.0000,nan,0.033,92
31569,346.655182,-63.260487,320.952196,-50.040935,1,1.0133,1.0031,0.0118,44.1084,0.019,95
31605,150.292969,2.686724,236.427488,42.541447,1,0.0000,0.0000,0.0000,nan,0.016,65
31824,352.398651,-62.696659,318.017427,-51.967966,1,0.1838,0.0844,0.3311,37.9246,0.020,42
32238,34.101562,-5.829153,170.247753,-60.638325,1,0.0000,0.0000,0.0000,nan,0.019,16
32309,34.804688,-5.829153,171.307861,-60.174401,1,0.2251,0.2258,0.9011,40.2521,0.023,42
32375,53.964844,-28.630989,225.142950,-53.813613,1,0.0000,0.0000,0.0000,nan,0.009,65
32695,358.636353,-46.768478,328.890146,-67.388837,1,0.7689,0.7806,0.0113,43.4371,0.008,90
33088,351.259003,-64.386185,317.344860,-50.255113,1,0.3437,0.3495,0.0200,41.3400,0.020,90
33179,51.855469,-27.953188,223.543603,-55.561470,1,0.4407,0.4765,0.4079,42.1357,0.008,90
33191,151.171875,2.238686,237.619933,42.994783,1,0.4030,0.4039,0.0174,41.7094,0.024,42
33409,33.222656,-4.780192,167.515653,-60.396584,1,0.0000,0.0000,0.0000,nan,0.018,65
33419,150.820312,3.732834,235.666318,43.572109,1,0.7462,0.7461,0.0356,43.3165,0.016,90
33422,33.574219,-6.579593,170.455585,-61.548219,1,1.1111,1.1054,0.0101,44.3693,0.021,88
34012,35.683594,-5.379379,171.992947,-59.253501,1,0.0853,0.0793,0.0210,37.7805,0.020,52
34166,0.189873,-45.586655,328.254458,-68.969298,1,0.0873,0.0909,0.0246,38.0959,0.007,42
34243,34.101562,-5.829153,170.247753,-60.638325,1,0.1416,0.5642,0.3631,42.5771,0.019,88
34299,346.276581,-64.011238,320.448031,-49.344136,1,0.1901,0.2016,0.0086,39.9759,0.019,62
34437,152.050781,2.985506,237.495952,44.143927,1,0.2657,0.2629,1.0604,40.6263,0.019,67
35197,51.679688,-27.447618,222.618229,-55.642263,1,0.2509,0.2366,0.1269,40.3670,0.010,42
35315,150.468750,3.732834,235.392208,43.283244,1,1.8476,1.5239,0.2256,45.2314,0.020,95
35555,359.805206,-46.768478,327.135979,-67.829903,1,0.0000,0.0000,0.0000,nan,0.011,65
35743,34.277344,-5.679190,170.314930,-60.410322,1,0.0781,0.0752,0.0197,37.6598,0.020,42
35772,150.117188,2.836105,236.124718,42.483719,1,0.2385,0.2588,0.0217,40.5879,0.016,90
35855,0.929752,-44.597992,328.531426,-70.083244,1,0.3815,0.4401,0.5094,41.9300,0.011,90
36085,352.398651,-62.696659,318.017427,-51.967966,1,0.1689,0.1759,0.5357,39.6465,0.020,42
36153,150.468750,1.641510,237.714575,42.075234,1,0.2547,0.2589,0.0256,40.5887,0.017,52
36337,52.558594,-27.279613,222.538937,-54.845107,1,0.0000,0.0000,0.0000,nan,0.008,65
36362,53.085938,-27.784405,223.525509,-54.460748,1,0.0000,0.0000,0.0000,nan,0.007,65
36671,149.589844,3.583322,234.885369,42.474696,1,0.0000,0.0000,0.0000,nan,0.024,65
36783,349.966217,-62.696659,319.542989,-51.376556,1,0.1287,0.1431,0.0129,39.1539,0.021,90
37149,359.816315,-44.003082,331.451340,-70.123054,1,0.9435,0.9017,0.0524,43.8228,0.013,90
37168,53.613281,-27.953188,223.929533,-54.024772,1,0.0000,0.0000,0.0000,nan,0.007,65
37661,32.871094,-4.780192,166.959493,-60.615132,1,0.1226,0.0973,0.0168,38.2528,0.017,52
37776,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,16
37865,151.171875,2.238686,237.619933,42.994783,1,0.2263,0.2221,0.0172,40.2112,0.024,90
37872,150.820312,3.134927,236.341348,43.230123,1,0.2517,0.2448,0.0217,40.4506,0.016,67
38174,1.694561,-45.191612,326.278557,-69.858253,1,1.6152,1.7388,0.1564,45.5843,0.011,88
38205,33.750000,-4.630479,168.146242,-59.949072,1,0.2945,0.2311,1.2272,40.3089,0.019,42
38244,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,65
38690,33.222656,-4.780192,167.515653,-60.396584,1,0.1801,0.1274,0.0307,38.8795,0.018,90
38730,53.261719,-27.615883,223.280041,-54.281374,1,0.0000,0.0000,0.0000,nan,0.006,16
38754,33.574219,-6.579593,170.455585,-61.548219,1,0.2646,0.2656,0.0093,40.6515,0.021,90
38899,1.666667,-44.399834,327.519190,-70.529554,1,0.4828,0.4754,0.0332,42.1297,0.009,90
39223,150.996094,2.388015,237.313912,42.939977,1,0.0000,0.0000,0.0000,nan,0.021,65
39305,346.562500,-63.448284,320.824720,-49.866957,1,0.4045,0.2986,0.1602,40.9435,0.021,90
39398,51.679688,-27.447618,222.618229,-55.642263,1,0.3347,0.5555,0.5802,42.5365,0.010,90
39597,53.085938,-28.122234,224.100909,-54.509752,1,0.1280,0.1327,0.0064,38.9763,0.007,62
39626,149.414062,2.238686,236.239766,41.565558,1,0.5197,0.4293,0.7344,41.8662,0.017,90
39846,351.382965,-64.011238,317.574052,-50.604657,1,0.1886,0.2780,0.6915,40.7654,0.023,62
40290,35.859375,-4.630479,171.270769,-58.580806,1,0.3153,0.5118,0.8469,42.3221,0.022,42
41515,358.648071,-46.375080,329.462659,-67.716008,1,0.5720,0.5797,0.0188,42.6484,0.009,90
41738,150.117188,3.732834,235.120533,42.993809,1,0.1206,0.1277,0.0222,38.8865,0.020,42
42118,0.574468,-45.981140,327.041068,-68.778764,1,0.1801,0.1977,0.0131,39.9287,0.006,62
42224,51.328125,-27.784405,223.130589,-55.999499,1,0.1119,0.0888,0.1482,38.0396,0.013,42
42288,359.415588,-46.768478,327.729895,-67.686097,1,0.3487,0.3864,0.0241,41.5962,0.009,90
42333,346.562500,-63.448284,320.824720,-49.866957,1,0.1921,0.2046,0.0101,40.0116,0.021,67
42469,2.071130,-45.191612,325.606223,-69.989264,1,1.5989,1.4913,0.1216,45.1735,0.011,95
42689,346.562500,-63.448284,320.824720,-49.866957,1,0.0000,0.0000,0.0000,nan,0.021,65
42776,152.050781,3.284369,237.157374,44.318466,1,0.0000,0.0000,0.0000,nan,0.019,16
42852,351.321442,-64.198746,317.458993,-50.429931,1,0.6771,0.6680,0.0223,43.0226,0.023,88
43028,51.679688,-27.447618,222.618229,-55.642263,1,0.1366,0.1364,0.0092,39.0408,0.010,42
43151,34.980469,-6.279288,172.180075,-60.389399,1,0.1096,0.1352,0.0222,39.0199,0.023,52
43211,34.980469,-6.279288,172.180075,-60.389399,1,0.3321,0.3275,0.0164,41.1762,0.023,90
43337,51.328125,-27.447618,222.535046,-55.950727,1,0.1775,0.2488,0.0180,40.4902,0.013,90
43413,348.595886,-63.072620,320.023289,-50.713060,1,0.0000,0.0000,0.0000,nan,0.021,16
43509,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,92
43812,150.820312,3.134927,236.341348,43.230123,1,0.2783,0.2821,0.0219,40.8022,0.016,90
43962,347.861847,-61.943836,321.519104,-51.424048,1,0.6595,0.6813,0.0340,43.0750,0.017,90
44102,152.050781,3.284369,237.157374,44.318466,1,0.2450,0.2640,0.2125,40.6366,0.019,42
44217,51.855469,-27.953188,223.543603,-55.561470,1,0.4288,0.1772,0.6584,39.6645,0.008,90
44309,34.980469,-6.279288,172.180075,-60.389399,1,0.3131,0.3059,1.2366,41.0045,0.023,90
44480,53.964844,-28.630989,225.142950,-53.813613,1,0.0000,0.0000,0.0000,nan,0.009,16
44836,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,65
45060,346.276581,-64.011238,320.448031,-49.344136,1,0.3613,0.3300,0.1387,41.1948,0.019,62
45115,2.097458,-45.783966,324.737840,-69.478613,1,0.0000,0.0000,0.0000,nan,0.011,65
45127,35.859375,-4.630479,171.270769,-58.580806,1,0.1378,0.1359,0.0125,39.0331,0.022,90
45203,150.820312,3.134927,236.341348,43.230123,1,0.0000,0.0000,0.0000,nan,0.016,16
45319,348.595886,-63.072620,320.023289,-50.713060,1,0.1270,0.0737,0.0272,37.6138,0.021,42
45349,32.695312,-4.929937,166.868469,-60.841230,1,0.2821,2.6404,0.9837,46.6916,0.018,67
45549,52.207031,-28.291550,224.208534,-55.300157,1,0.6733,0.7639,0.0513,43.3795,0.007,42
46210,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,65
46567,149.414062,3.433834,234.919132,42.245550,1,0.0000,0.0000,0.0000,nan,0.027,16
46804,352.398651,-62.696659,318.017427,-51.967966,1,0.2933,2.3248,0.6338,46.3561,0.020,62
46958,150.820312,3.732834,235.666318,43.572109,1,0.0000,0.0000,0.0000,nan,0.016,65
47148,53.085938,-27.784405,223.525509,-54.460748,1,0.3153,0.3234,0.0167,41.1439,0.007,90
47725,351.259003,-64.386185,317.344860,-50.255113,1,0.3615,0.2950,0.7526,40.9133,0.020,90
48187,52.207031,-26.610098,221.298836,-55.042928,1,0.5466,0.5379,0.1063,42.4521,0.014,90
48260,52.207031,-28.291550,224.208534,-55.300157,1,0.1677,0.1746,0.4651,39.6283,0.007,42
48426,351.734680,-62.884678,318.284128,-51.651217,1,0.0000,0.0000,0.0000,nan,0.019,16
48473,51.855469,-27.953188,223.543603,-55.561470,1,0.0809,0.0825,0.0165,37.8722,0.008,42
48575,32.695312,-4.929937,166.868469,-60.841230,1,0.7796,0.7698,0.0437,43.4000,0.018,90
48687,347.812500,-63.448284,320.128971,-50.202348,1,0.7916,0.8371,0.0551,43.6237,0.021,88
48725,53.613281,-28.630989,225.073365,-54.119461,1,0.3658,0.3599,0.1804,41.4145,0.006,67
48749,348.529419,-61.755440,321.293980,-51.763351,1,1.6645,1.5782,0.0657,45.3252,0.016,88
48817,348.586945,-64.573555,318.693903,-49.477869,1,0.0962,0.1446,0.6309,39.1788,0.018,90
48981,358.648071,-46.375080,329.462659,-67.716008,1,0.0000,0.0000,0.0000,nan,0.009,16
49219,33.398438,-3.732834,166.492280,-59.466614,1,0.0000,0.0000,0.0000,nan,0.022,65
49389,349.285706,-62.884678,319.786163,-51.046461,1,0.0000,0.0000,0.0000,nan,0.018,92
49529,53.613281,-27.953188,223.929533,-54.024772,1,0.1974,0.2117,0.0123,40.0947,0.007,62
49783,51.328125,-27.784405,223.130589,-55.999499,1,0.0000,0.0000,0.0000,nan,0.013,65
49937,151.347656,3.583322,236.252362,43.918627,1,0.0947,0.1091,0.0218,38.5175,0.015,52
50277,349.046051,-61.943836,320.796530,-51.753706,1,0.3009,0.2478,1.3214,40.4807,0.017,67
50395,349.891296,-64.573555,317.972107,-49.786192,1,0.1552,0.1574,0.3022,39.3801,0.023,90
51178,52.558594,-27.279613,222.538937,-54.845107,1,0.9668,0.9534,0.0188,43.9721,0.008,88
51279,1.708861,-45.586655,325.688716,-69.520253,1,0.1760,0.1824,0.3979,39.7343,0.011,90
51318,34.277344,-5.679190,170.314930,-60.410322,1,0.2987,0.3038,0.0273,40.9868,0.020,62
51490,0.574468,-45.981140,327.041068,-68.778764,1,0.3707,0.4717,0.3810,42.1099,0.006,90
51987,352.711273,-63.823658,316.922299,-51.059403,1,0.0000,0.0000,0.0000,nan,0.024,92
52150,52.207031,-26.610098,221.298836,-55.042928,1,0.2746,0.3069,0.0656,41.0122,0.014,90
52320,52.910156,-25.944481,220.366350,-54.301439,1,0.1656,2.0324,0.6458,46.0000,0.010,88
52370,352.711273,-63.823658,316.922299,-51.059403,1,0.0000,0.0000,0.0000,nan,0.024,16
52425,52.910156,-26.276812,220.926149,-54.363918,1,0.1587,0.1182,0.6266,38.7042,0.008,42
52740,53.613281,-27.953188,223.929533,-54.024772,1,0.3653,0.4721,0.8955,42.1118,0.007,90
52854,149.414062,1.940072,236.565366,41.393323,1,0.4383,0.4343,0.0335,41.8957,0.018,90
53025,358.636353,-46.768478,328.890146,-67.388837,1,0.0000,0.0000,0.0000,nan,0.008,65
53249,349.285706,-62.884678,319.786163,-51.046461,1,0.3941,0.4211,0.4203,41.8166,0.018,90
53354,34.980469,-6.279288,172.180075,-60.389399,1,0.2231,0.2134,0.0125,40.1138,0.023,62
53525,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,16
53574,0.574468,-45.981140,327.041068,-68.778764,1,0.4176,0.4427,0.0420,41.9454,0.006,90
53782,53.261719,-27.615883,223.280041,-54.281374,1,0.3798,0.3714,0.0189,41.4948,0.006,90
53938,53.437500,-29.142223,225.908120,-54.336118,1,0.0000,0.0000,0.0000,nan,0.008,16
54416,347.812500,-63.448284,320.128971,-50.202348,1,0.3708,0.3734,0.0162,41.5087,0.021,42
54883,347.617462,-62.508568,321.121462,-50.904708,1,0.7844,0.8578,0.0392,43.6891,0.019,88
54915,148.886719,2.686724,235.347248,41.389003,1,0.0000,0.0000,0.0000,nan,0.028,65
55002,52.207031,-28.291550,224.208534,-55.300157,1,0.4345,0.4175,0.0286,41.7946,0.007,90
55018,350.230255,-61.943836,320.053946,-52.070537,1,0.0000,0.0000,0.0000,nan,0.017,65
55033,151.171875,1.342993,238.602520,42.464379,1,0.4920,0.5345,0.0204,42.4355,0.026,42
55060,51.855469,-28.630989,224.733260,-55.649872,1,0.0234,0.0824,0.0202,37.8680,0.009,42
55141,2.097458,-45.783966,324.737840,-69.478613,1,0.0000,0.0000,0.0000,nan,0.011,65
55155,34.101562,-5.829153,170.247753,-60.638325,1,0.0000,0.0000,0.0000,nan,0.019,65
55354,350.230255,-61.943836,320.053946,-52.070537,1,0.1958,2.3870,0.2904,46.4258,0.017,90
55419,150.820312,3.732834,235.666318,43.572109,1,0.0967,0.1092,0.0137,38.5190,0.016,62
55946,347.617462,-62.508568,321.121462,-50.904708,1,0.0000,0.0000,0.0000,nan,0.019,65
56053,150.292969,2.686724,236.427488,42.541447,1,0.8507,0.8283,0.0406,43.5954,0.016,90
56245,1.708861,-45.586655,325.688716,-69.520253,1,0.0000,0.0000,0.0000,nan,0.011,16
56334,358.312500,-44.993881,332.185785,-68.685906,1,0.1955,0.1948,0.0100,39.8925,0.009,62
56349,1.694561,-45.191612,326.278557,-69.858253,1,0.1646,0.2000,0.1961,39.9565,0.011,52
56461,347.812500,-63.448284,320.128971,-50.202348,1,0.1760,2.4036,1.0097,46.4440,0.021,90
56769,33.574219,-5.079716,168.448505,-60.407218,1,0.0000,0.0000,0.0000,nan,0.016,65
56821,52.207031,-26.610098,221.298836,-55.042928,1,0.0775,0.0884,0.0181,38.0309,0.014,62
56893,150.117188,2.836105,236.124718,42.483719,1,0.0000,0.0000,0.0000,nan,0.016,65
56987,33.574219,-6.579593,170.455585,-61.548219,1,0.2910,0.3152,0.0132,41.0794,0.021,62
57205,359.816315,-44.003082,331.451340,-70.123054,1,0.5891,0.6057,0.0306,42.7639,0.013,90
57237,52.207031,-26.610098,221.298836,-55.042928,1,0.2544,0.3807,0.6581,41.5580,0.014,42
57263,351.299988,-62.320400,319.038597,-52.026867,1,0.0000,0.0000,0.0000,nan,0.018,65
57561,152.050781,3.284369,237.157374,44.318466,1,0.0000,0.0000,0.0000,nan,0.019,65
57666,1.666667,-44.399834,327.519190,-70.529554,1,0.4399,0.4648,0.0183,42.0716,0.009,90
57784,348.529419,-61.755440,321.293980,-51.763351,1,0.0000,0.0000,0.0000,nan,0.016,65
58174,348.529419,-61.755440,321.293980,-51.763351,1,1.1032,1.1018,0.1226,44.3607,0.016,42
58265,0.190678,-45.783966,327.956322,-68.803772,1,0.0650,0.1027,1.1126,38.3778,0.005,67
58323,349.615387,-63.636005,318.927246,-50.506542,1,1.0155,1.0187,0.0471,44.1500,0.018,88
59068,34.277344,-5.079716,169.526841,-59.956640,1,0.5716,0.5429,0.1404,42.4762,0.019,42
59128,348.529419,-61.755440,321.293980,-51.763351,1,0.6794,0.4288,0.2152,41.8629,0.016,90
59163,150.644531,3.583322,235.698235,43.342784,1,0.0000,0.0000,0.0000,nan,0.018,65
59427,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,65
59463,52.031250,-26.443335,220.963669,-55.168557,1,0.1439,0.1438,0.0169,39.1654,0.014,42
59580,1.753247,-46.768478,324.030235,-68.498041,1,0.4178,0.5020,0.7485,42.2716,0.014,42
59644,34.980469,-6.279288,172.180075,-60.389399,1,0.4238,0.3347,0.1863,41.2308,0.023,90
59732,347.812500,-63.448284,320.128971,-50.202348,1,0.2489,2.7125,0.9243,46.7623,0.021,90
60023,359.814819,-44.399834,330.775011,-69.801007,1,0.6857,0.6858,0.0077,43.0925,0.009,90
60098,53.261719,-27.615883,223.280041,-54.281374,1,0.2529,0.2478,0.0697,40.4804,0.006,90
60340,148.710938,2.836105,235.050801,41.328739,1,0.3521,0.3472,0.0096,41.3237,0.031,67
60350,52.558594,-27.279613,222.538937,-54.845107,1,0.5295,0.5982,0.1284,42.7310,0.008,62
60376,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,16
60407,346.130127,-63.072620,321.423103,-50.042305,1,0.0000,0.0000,0.0000,nan,0.020,65
60554,347.013428,-62.508568,321.472056,-50.735330,1,0.2356,0.2869,0.0805,40.8438,0.018,67
60742,346.562500,-63.448284,320.824720,-49.866957,1,0.0000,0.0000,0.0000,nan,0.021,65
60976,358.648071,-46.375080,329.462659,-67.716008,1,0.0615,0.0821,0.0078,37.8606,0.009,52
61101,151.171875,2.537361,237.288526,43.169764,1,0.0000,0.0000,0.0000,nan,0.024,16
61165,53.789062,-27.784405,223.685697,-53.845803,1,0.1089,0.1122,0.0140,38.5832,0.009,62
61407,53.613281,-27.953188,223.929533,-54.024772,1,0.0000,0.0000,0.0000,nan,0.007,92
61763,52.207031,-28.291550,224.208534,-55.300157,1,0.5749,0.5237,0.0263,42.3819,0.007,90
62078,0.965665,-46.375080,325.845907,-68.579427,1,0.0000,0.0000,0.0000,nan,0.007,65
62187,33.398438,-3.732834,166.492280,-59.466614,1,0.1391,0.1301,1.1896,38.9300,0.022,64
62230,53.085938,-27.784405,223.525509,-54.460748,1,0.0000,0.0000,0.0000,nan,0.007,16
62253,51.328125,-27.447618,222.535046,-55.950727,1,0.7671,0.7610,0.0367,43.3693,0.013,90
62254,53.085938,-27.111860,222.384291,-54.355086,1,0.2738,0.2895,0.0180,40.8666,0.007,90
62384,351.321442,-64.198746,317.458993,-50.429931,1,0.0000,0.0000,0.0000,nan,0.023,65
62541,347.013428,-62.508568,321.472056,-50.735330,1,0.5490,0.3280,0.8012,41.1797,0.018,90
62908,150.820312,3.134927,236.341348,43.230123,1,0.2372,0.2111,0.0189,40.0878,0.016,62
63561,359.814819,-44.399834,330.775011,-69.801007,1,0.7386,0.7247,0.0129,43.2390,0.009,90
63718,0.965665,-46.375080,325.845907,-68.579427,1,0.2891,0.4200,0.0613,41.8100,0.007,90
63860,351.382965,-64.011238,317.574052,-50.604657,1,0.1617,0.1988,0.0128,39.9412,0.023,42
64248,2.097458,-45.783966,324.737840,-69.478613,1,0.1653,0.1710,0.0196,39.5787,0.011,52
64485,348.586945,-64.573555,318.693903,-49.477869,1,0.0000,0.0000,0.0000,nan,0.018,16
64854,1.723404,-45.981140,325.117958,-69.180825,1,0.1163,0.0705,0.0098,37.5117,0.010,62
64888,52.031250,-26.443335,220.963669,-55.168557,1,0.3802,0.4086,0.7097,41.7390,0.014,90
64896,347.846710,-64.760857,318.929827,-49.143596,1,0.0991,0.1224,1.1874,38.7854,0.019,42
64911,150.820312,1.641510,237.994507,42.358984,1,0.2375,2.2621,1.0009,46.2837,0.020,90
65745,53.085938,-28.122234,224.100909,-54.509752,1,0.0680,0.0552,1.1581,36.9555,0.007,90
65749,33.398438,-4.331149,167.226341,-59.936551,1,0.0000,0.0000,0.0000,nan,0.018,65
65877,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,65
66126,348.908447,-63.823658,319.169886,-50.176186,1,0.0601,0.0718,0.0163,37.5531,0.018,42
66325,53.261719,-27.615883,223.280041,-54.281374,1,0.0000,0.0000,0.0000,nan,0.006,65
66536,150.996094,2.388015,237.313912,42.939977,1,0.3144,0.3150,0.2975,41.0779,0.021,90
66548,347.617462,-62.508568,321.121462,-50.904708,1,0.3713,0.4282,1.3857,41.8598,0.019,90
66852,2.071130,-45.191612,325.606223,-69.989264,1,0.5802,0.5679,0.0103,42.5945,0.011,42
66904,349.285706,-62.884678,319.786163,-51.046461,1,0.0000,0.0000,0.0000,nan,0.018,16
66967,150.996094,2.388015,237.313912,42.939977,1,0.4580,0.4551,0.0163,42.0168,0.021,88
66999,33.222656,-4.780192,167.515653,-60.396584,1,0.3635,0.4147,0.0332,41.7770,0.018,90
67245,150.292969,2.686724,236.427488,42.541447,1,0.0482,0.0598,0.0147,37.1379,0.016,42
67480,1.753247,-46.768478,324.030235,-68.498041,1,0.0000,0.0000,0.0000,nan,0.014,65
67514,34.101562,-5.829153,170.247753,-60.638325,1,0.2756,0.5256,0.1081,42.3913,0.019,90
67686,359.415588,-46.768478,327.729895,-67.686097,1,0.1785,0.2040,0.0109,40.0042,0.009,90
67730,52.910156,-25.944481,220.366350,-54.301439,1,0.3927,0.3926,0.0321,41.6368,0.010,90
67898,347.812500,-63.448284,320.128971,-50.202348,1,0.3824,0.3802,0.0081,41.5546,0.021,42
67981,151.523438,3.134927,236.900695,43.803170,1,0.0000,0.0000,0.0000,nan,0.019,65
68003,150.820312,3.732834,235.666318,43.572109,1,0.4201,0.4099,0.0162,41.7474,0.016,90
68276,33.398438,-3.732834,166.492280,-59.466614,1,0.0251,0.0342,0.0167,35.8818,0.022,42
68298,349.429535,-62.508568,320.039643,-51.393745,1,0.6264,0.6544,0.1479,42.9683,0.020,90
68667,348.595886,-63.072620,320.023289,-50.713060,1,0.0000,0.0000,0.0000,nan,0.021,16
68835,348.908447,-63.823658,319.169886,-50.176186,1,0.0177,0.0504,0.0318,36.7530,0.018,42
68886,2.457983,-45.389202,324.632685,-69.945696,1,0.3991,0.4463,0.3629,41.9666,0.011,52
69271,148.710938,2.836105,235.050801,41.328739,1,0.4269,0.4491,0.8217,41.9826,0.031,90
69490,51.855469,-26.276812,220.627031,-55.293792,1,0.2919,0.4825,0.1693,42.1687,0.014,42
69767,1.694561,-45.191612,326.278557,-69.858253,1,0.1603,2.4521,1.2066,46.4968,0.011,62
70046,351.321442,-64.198746,317.458993,-50.429931,1,0.3441,0.3129,0.0280,41.0610,0.023,90
70135,0.949367,-45.586655,326.991548,-69.251686,1,0.4500,0.4728,0.0167,42.1160,0.013,42
70171,53.789062,-27.784405,223.685697,-53.845803,1,0.4708,0.5059,0.0358,42.2916,0.009,90
70272,34.277344,-5.079716,169.526841,-59.956640,1,0.3088,0.3314,0.0357,41.2058,0.019,88
70276,151.171875,1.342993,238.602520,42.464379,1,0.8102,0.7948,0.0774,43.4853,0.026,88
70430,150.820312,1.641510,237.994507,42.358984,1,0.0451,0.0858,0.0255,37.9624,0.020,42
70571,149.414062,1.940072,236.565366,41.393323,1,0.3902,0.4137,1.1078,41.7706,0.018,90
70816,349.429535,-62.508568,320.039643,-51.393745,1,0.0000,0.0000,0.0000,nan,0.020,65
70898,151.171875,2.238686,237.619933,42.994783,1,0.5346,0.5478,0.0157,42.4996,0.024,90
70977,51.328125,-27.784405,223.130589,-55.999499,1,0.3428,0.3404,0.0099,41.2737,0.013,90
71068,34.453125,-5.229529,169.987075,-59.956185,1,0.2726,0.2679,1.0038,40.6730,0.019,88
71080,53.964844,-28.630989,225.142950,-53.813613,1,0.0000,0.0000,0.0000,nan,0.009,92
71084,52.558594,-27.279613,222.538937,-54.845107,1,0.1522,0.1472,0.0208,39.2216,0.008,42
71126,53.964844,-28.630989,225.142950,-53.813613,1,0.3021,0.5146,0.5741,42.3363,0.009,62
71438,52.558594,-27.279613,222.538937,-54.845107,1,0.0000,0.0000,0.0000,nan,0.008,65
71676,53.437500,-29.142223,225.908120,-54.336118,1,0.4396,0.4103,0.0209,41.7496,0.008,88
71890,348.595886,-63.072620,320.023289,-50.713060,1,0.7036,0.7024,0.0078,43.1558,0.021,88
71954,1.666667,-44.399834,327.519190,-70.529554,1,0.2483,0.2571,0.6448,40.5711,0.009,90
72053,53.613281,-27.953188,223.929533,-54.024772,1,0.2832,0.2616,0.0190,40.6141,0.007,88
72256,358.636353,-46.768478,328.890146,-67.388837,1,0.0000,0.0000,0.0000,nan,0.008,65
72337,34.277344,-5.079716,169.526841,-59.956640,1,0.2449,0.2107,0.1165,40.0824,0.019,90
72385,150.117188,2.836105,236.124718,42.483719,1,0.3029,0.2983,0.3068,40.9409,0.016,67
72426,51.679688,-27.447618,222.618229,-55.642263,1,0.5166,0.3846,0.3149,41.5839,0.010,90
72428,33.574219,-5.379379,168.838090,-60.637536,1,0.2214,2.4663,0.4396,46.5120,0.017,90
72489,53.613281,-26.944359,222.237403,-53.863858,1,0.8358,0.8312,0.0197,43.6047,0.009,90
72525,34.101562,-5.829153,170.247753,-60.638325,1,0.2407,0.2580,0.0145,40.5802,0.019,90
72735,151.699219,3.583322,236.533224,44.205648,1,0.2648,0.2313,0.0265,40.3104,0.016,67
73031,34.277344,-5.079716,169.526841,-59.956640,1,0.3151,0.5833,0.2442,42.6648,0.019,52
73236,33.398438,-3.732834,166.492280,-59.466614,1,0.1398,0.5280,0.4528,42.4036,0.022,90
73339,351.299988,-62.320400,319.038597,-52.026867,1,0.5183,0.2010,0.3771,39.9686,0.018,90
73433,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,65
73509,34.453125,-5.229529,169.987075,-59.956185,1,0.2953,0.2879,0.0259,40.8522,0.019,42
73610,34.277344,-5.079716,169.526841,-59.956640,1,0.8134,1.5079,0.3508,45.2031,0.019,95
74093,351.259003,-64.386185,317.344860,-50.255113,1,0.3940,0.4643,1.0625,42.0686,0.020,90
75116,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,16
75223,351.382965,-64.011238,317.574052,-50.604657,1,0.5652,0.5648,0.0157,42.5800,0.023,90
75562,346.562500,-63.448284,320.824720,-49.866957,1,0.0000,0.0000,0.0000,nan,0.021,65
75598,1.723404,-45.981140,325.117958,-69.180825,1,0.1652,0.1477,0.0151,39.2287,0.010,42
75646,348.529419,-61.755440,321.293980,-51.763351,1,0.2103,0.2089,0.0083,40.0620,0.016,67
75754,151.347656,3.583322,236.252362,43.918627,1,0.0000,0.0000,0.0000,nan,0.015,16
75792,148.886719,2.686724,235.347248,41.389003,1,0.3699,0.3602,0.0451,41.4171,0.028,90
75886,358.636353,-46.768478,328.890146,-67.388837,1,0.3985,0.3735,0.0375,41.5091,0.008,90
75987,54.667969,-27.615883,223.610785,-53.050840,1,0.9014,0.8506,0.0414,43.6664,0.009,88
76242,152.050781,2.985506,237.495952,44.143927,1,0.4916,0.5572,0.3767,42.5443,0.019,90
76304,348.595886,-63.072620,320.023289,-50.713060,1,2.4303,2.6811,1.0262,46.7317,0.021,95
76305,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,16
76639,346.500000,-62.320400,321.951129,-50.736054,1,0.5322,0.5077,0.0148,42.3012,0.020,90
77010,34.453125,-5.229529,169.987075,-59.956185,1,0.0000,0.0000,0.0000,nan,0.019,65
77041,346.276581,-64.011238,320.448031,-49.344136,1,0.3793,0.4070,0.6742,41.7290,0.019,90
77157,51.328125,-27.784405,223.130589,-55.999499,1,0.0000,0.0000,0.0000,nan,0.013,6
77192,151.347656,4.181528,235.568369,44.259942,1,0.3227,0.2764,0.0329,40.7513,0.016,62
77222,151.171875,1.342993,238.602520,42.464379,1,0.4342,0.3228,0.3114,41.1398,0.026,90
77292,34.277344,-5.079716,169.526841,-59.956640,1,0.1250,0.1890,0.2309,39.8194,0.019,62
77306,148.710938,2.836105,235.050801,41.328739,1,0.7167,0.6876,0.0165,43.0996,0.031,90
77340,346.500000,-62.320400,321.951129,-50.736054,1,0.8207,0.8217,0.0401,43.5741,0.020,88
77391,346.130127,-63.072620,321.423103,-50.042305,1,1.3214,1.4667,0.1459,45.1288,0.020,95
77518,53.437500,-29.142223,225.908120,-54.336118,1,0.3531,0.3522,0.0125,41.3602,0.008,67
77623,0.190678,-45.783966,327.956322,-68.803772,1,0.2765,0.2994,0.0135,40.9507,0.005,42
77825,349.046051,-61.943836,320.796530,-51.753706,1,0.1071,0.1065,0.0160,38.4603,0.017,42
77906,359.811707,-45.191612,329.485675,-69.150905,1,0.1265,0.0738,0.0297,37.6154,0.010,90
77952,358.665253,-45.783966,330.353593,-68.203652,1,0.1258,0.0858,0.0192,37.9614,0.009,90
78095,151.699219,3.583322,236.533224,44.205648,1,0.0000,0.0000,0.0000,nan,0.016,65
78233,148.710938,2.836105,235.050801,41.328739,1,0.2391,0.2060,0.0706,40.0283,0.031,90
78677,53.437500,-29.142223,225.908120,-54.336118,1,0.0000,0.0000,0.0000,nan,0.008,65
78702,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,92
78705,350.230255,-61.943836,320.053946,-52.070537,1,0.0000,0.0000,0.0000,nan,0.017,92
78727,51.679688,-27.447618,222.618229,-55.642263,1,0.1592,0.1479,0.0203,39.2318,0.010,90
78974,152.050781,3.284369,237.157374,44.318466,1,0.6592,0.6572,0.0085,42.9797,0.019,90
79002,2.097458,-45.783966,324.737840,-69.478613,1,0.4446,0.2938,0.8441,40.9035,0.011,90
79155,1.666667,-44.399834,327.519190,-70.529554,1,0.4097,0.4154,0.0160,41.7815,0.009,42
79235,52.031250,-26.443335,220.963669,-55.168557,1,0.1639,0.1669,0.0564,39.5206,0.014,42
79428,33.398438,-3.732834,166.492280,-59.466614,1,0.0000,0.0000,0.0000,nan,0.022,65
79515,53.964844,-28.630989,225.142950,-53.813613,1,0.2207,0.2188,1.1526,40.1744,0.009,90
79743,32.695312,-4.929937,166.868469,-60.841230,1,0.6266,0.6062,0.0094,42.7661,0.018,90
79819,150.820312,1.641510,237.994507,42.358984,1,0.1742,0.1725,0.0134,39.5994,0.020,42
79921,352.132874,-63.636005,317.424173,-51.095855,1,0.1638,0.1709,0.0219,39.5766,0.021,42
80155,53.085938,-28.122234,224.100909,-54.509752,1,0.3628,0.3633,0.0418,41.4386,0.007,90
80205,33.925781,-5.979157,170.179895,-60.866303,1,0.8134,0.7508,0.1176,43.3333,0.022,95
80780,152.050781,3.284369,237.157374,44.318466,1,0.1684,0.1902,0.0581,39.8341,0.019,42
80832,150.117188,2.238686,236.784618,42.139082,1,0.3950,0.3665,0.0146,41.4614,0.016,90
80852,151.347656,3.583322,236.252362,43.918627,1,0.2569,0.2659,0.0082,40.6545,0.015,62
80903,52.910156,-26.276812,220.926149,-54.363918,1,0.4140,0.4279,0.0171,41.8576,0.008,90
81000,150.996094,2.985506,236.647967,43.287350,1,0.2989,0.3106,0.0100,41.0422,0.020,42
81252,33.574219,-4.780192,168.064587,-60.175886,1,0.1494,0.1737,0.6985,39.6157,0.019,62
81464,149.414062,3.433834,234.919132,42.245550,1,0.2854,0.2818,0.3207,40.7987,0.027,42
81665,53.789062,-27.784405,223.685697,-53.845803,1,0.0000,0.0000,0.0000,nan,0.009,65
82302,346.276581,-64.011238,320.448031,-49.344136,1,0.0000,0.0000,0.0000,nan,0.019,65
82401,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,16
82409,348.595886,-63.072620,320.023289,-50.713060,1,0.1147,2.6274,1.1973,46.6786,0.021,42
82702,35.683594,-5.379379,171.992947,-59.253501,1,0.3194,0.2986,0.0204,40.9437,0.020,90
82740,349.615387,-63.636005,318.927246,-50.506542,1,1.0263,1.0228,0.2611,44.1609,0.018,88
83348,349.429535,-62.508568,320.039643,-51.393745,1,0.0000,0.0000,0.0000,nan,0.020,6
83410,51.855469,-26.276812,220.627031,-55.293792,1,0.5138,0.5071,0.0093,42.2980,0.014,90
83462,150.820312,3.134927,236.341348,43.230123,1,0.3424,0.3239,0.0179,41.1483,0.016,90
83634,349.285706,-62.884678,319.786163,-51.046461,1,2.1107,1.3813,0.4158,44.9680,0.018,95
83821,359.415588,-46.768478,327.729895,-67.686097,1,0.0000,0.0000,0.0000,nan,0.009,65
83872,149.589844,3.583322,234.885369,42.474696,1,0.2160,0.2054,0.2819,40.0211,0.024,90
83954,346.276581,-64.011238,320.448031,-49.344136,1,0.4390,0.4535,0.0428,42.0079,0.019,90
83961,1.753247,-46.768478,324.030235,-68.498041,1,0.2177,0.2211,0.0151,40.2004,0.014,90
84306,151.171875,2.537361,237.288526,43.169764,1,0.0000,0.0000,0.0000,nan,0.024,16
84716,151.523438,3.134927,236.900695,43.803170,1,0.4303,0.2925,1.0152,40.8923,0.019,90
84758,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,65
85125,53.085938,-28.122234,224.100909,-54.509752,1,0.0000,0.0000,0.0000,nan,0.007,65
85470,33.398438,-4.331149,167.226341,-59.936551,1,0.0000,0.0000,0.0000,nan,0.018,65
85490,348.908447,-63.823658,319.169886,-50.176186,1,0.2537,0.5506,0.2880,42.5130,0.018,90
85789,53.437500,-29.142223,225.908120,-54.336118,1,0.2990,0.2893,0.0367,40.8644,0.008,62
86456,33.574219,-5.379379,168.838090,-60.637536,1,0.2127,0.2191,0.0141,40.1785,0.017,42
86487,0.574468,-45.981140,327.041068,-68.778764,1,0.3850,0.3847,0.0333,41.5849,0.006,90
86759,348.529419,-61.755440,321.293980,-51.763351,1,0.0000,0.0000,0.0000,nan,0.016,65
86834,149.589844,3.583322,234.885369,42.474696,1,0.3160,0.4622,0.4742,42.0568,0.024,90
87180,150.820312,1.641510,237.994507,42.358984,1,0.1782,0.1820,0.0103,39.7288,0.020,62
87467,150.820312,3.134927,236.341348,43.230123,1,0.0000,0.0000,0.0000,nan,0.016,65
87498,152.050781,2.985506,237.495952,44.143927,1,0.0000,0.0000,0.0000,nan,0.019,16
87608,358.665253,-45.783966,330.353593,-68.203652,1,0.1894,0.2101,0.0179,40.0763,0.009,90
87685,347.861847,-61.943836,321.519104,-51.424048,1,0.3743,0.3612,0.0219,41.4237,0.017,90
87703,51.855469,-26.276812,220.627031,-55.293792,1,0.3246,0.5019,0.3014,42.2711,0.014,90
88073,347.013428,-62.508568,321.472056,-50.735330,1,0.0000,0.0000,0.0000,nan,0.018,92
88180,149.414062,1.940072,236.565366,41.393323,1,0.2996,2.6936,1.3051,46.7439,0.018,67
88195,53.964844,-28.630989,225.142950,-53.813613,1,0.3403,0.3448,0.0304,41.3063,0.009,90
88511,53.613281,-28.630989,225.073365,-54.119461,1,0.0000,0.0000,0.0000,nan,0.006,65
88587,352.711273,-63.823658,316.922299,-51.059403,1,0.7839,0.7789,0.0120,43.4314,0.024,90
88600,351.321442,-64.198746,317.458993,-50.429931,1,0.3303,0.3150,0.3295,41.0778,0.023,90
88627,32.871094,-4.780192,166.959493,-60.615132,1,0.3934,0.4406,0.0433,41.9329,0.017,67
88980,351.382965,-64.011238,317.574052,-50.604657,1,0.0000,0.0000,0.0000,nan,0.023,16
89157,348.908447,-63.823658,319.169886,-50.176186,1,0.0000,0.0000,0.0000,nan,0.018,92
89298,359.811707,-45.191612,329.485675,-69.150905,1,0.0000,0.0000,0.0000,nan,0.010,16
89387,346.130127,-63.072620,321.423103,-50.042305,1,0.6210,0.4739,0.0648,42.1217,0.020,90
89455,0.189873,-45.586655,328.254458,-68.969298,1,0.2603,0.2485,0.0142,40.4873,0.007,90
89709,52.910156,-25.944481,220.366350,-54.301439,1,0.0000,0.0000,0.0000,nan,0.010,92
89999,149.238281,3.882372,234.283829,42.351155,1,0.2640,0.3194,0.0576,41.1125,0.033,90
90399,51.328125,-27.784405,223.130589,-55.999499,1,0.0000,0.0000,0.0000,nan,0.013,65
90534,152.050781,3.284369,237.157374,44.318466,1,0.1759,0.1913,0.0131,39.8481,0.019,62
90645,51.855469,-28.630989,224.733260,-55.649872,1,0.4496,0.4486,0.0173,41.9797,0.009,90
90814,348.595886,-63.072620,320.023289,-50.713060,1,0.2252,0.2208,0.0172,40.1971,0.021,62
90892,152.050781,3.284369,237.157374,44.318466,1,0.0322,0.0365,0.0161,36.0320,0.019,52
91219,150.820312,3.134927,236.341348,43.230123,1,0.2921,0.5086,0.1231,42.3058,0.016,90
91291,352.711273,-63.823658,316.922299,-51.059403,1,0.1826,0.1746,0.0132,39.6288,0.024,90
91335,151.699219,3.583322,236.533224,44.205648,1,1.0655,1.4889,0.2658,45.1692,0.016,88
91337,53.613281,-27.953188,223.929533,-54.024772,1,0.0000,0.0000,0.0000,nan,0.007,65
91460,53.613281,-28.630989,225.073365,-54.119461,1,0.2782,0.5294,0.2771,42.4103,0.006,90
91610,346.130127,-63.072620,321.423103,-50.042305,1,0.2326,0.2302,0.0070,40.2992,0.020,42
91644,349.891296,-64.573555,317.972107,-49.786192,1,0.1893,0.1839,0.1322,39.7535,0.023,90
91917,152.050781,2.985506,237.495952,44.143927,1,0.2448,0.2740,0.0117,40.7289,0.019,90
91988,150.468750,1.641510,237.714575,42.075234,1,0.0000,0.0000,0.0000,nan,0.017,65
92334,350.230255,-61.943836,320.053946,-52.070537,1,0.0000,0.0000,0.0000,nan,0.017,65
92354,51.328125,-27.784405,223.130589,-55.999499,1,0.5449,0.5807,0.0374,42.6530,0.013,88
92566,1.753247,-46.768478,324.030235,-68.498041,1,0.0000,0.0000,0.0000,nan,0.014,16
92577,351.734680,-62.884678,318.284128,-51.651217,1,0.4426,0.3854,0.6831,41.5893,0.019,90
92904,347.812500,-63.448284,320.128971,-50.202348,1,0.2536,0.2814,0.0325,40.7958,0.021,62
92929,348.908447,-63.823658,319.169886,-50.176186,1,0.0000,0.0000,0.0000,nan,0.018,65
93333,151.171875,1.342993,238.602520,42.464379,1,0.4029,0.3217,0.4659,41.1310,0.026,90
93362,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,92
93509,51.855469,-26.276812,220.627031,-55.293792,1,0.0587,0.0644,0.0151,37.3066,0.014,42
93663,53.964844,-28.630989,225.142950,-53.813613,1,0.4196,0.4099,0.0367,41.7471,0.009,90
94004,52.910156,-27.953188,223.774083,-54.639214,1,0.2354,0.2257,0.0162,40.2510,0.007,62
94107,34.980469,-6.279288,172.180075,-60.389399,1,0.0000,0.0000,0.0000,nan,0.023,16
94229,52.207031,-28.291550,224.208534,-55.300157,1,0.3853,0.4477,0.0268,41.9745,0.007,90
94613,2.457983,-45.389202,324.632685,-69.945696,1,0.0739,0.0616,0.0188,37.2044,0.011,62
94704,349.966217,-62.696659,319.542989,-51.376556,1,0.2722,0.2658,0.0080,40.6537,0.021,90
95127,351.299988,-62.320400,319.038597,-52.026867,1,1.8136,1.6691,0.2276,45.4751,0.018,95
95147,149.238281,3.882372,234.283829,42.351155,1,0.0000,0.0000,0.0000,nan,0.033,65
95369,53.613281,-28.630989,225.073365,-54.119461,1,0.1313,0.5834,0.7684,42.6652,0.006,62
95455,351.259003,-64.386185,317.344860,-50.255113,1,0.4942,0.5590,0.1899,42.5528,0.020,90
95483,150.820312,3.732834,235.666318,43.572109,1,0.8512,0.8164,0.0436,43.5568,0.016,88
95508,149.414062,2.238686,236.239766,41.565558,1,0.6107,0.5480,0.0306,42.5009,0.017,88
95566,33.574219,-5.079716,168.448505,-60.407218,1,0.3880,0.4580,0.0992,42.0331,0.016,90
95580,34.101562,-5.829153,170.247753,-60.638325,1,0.4200,0.4311,0.0132,41.8770,0.019,90
95690,351.734680,-62.884678,318.284128,-51.651217,1,0.3442,0.3507,0.0052,41.3494,0.019,42
95741,35.332031,-5.979157,172.286722,-59.931743,1,0.5064,0.5368,0.0209,42.4469,0.022,52
95864,53.085938,-27.111860,222.384291,-54.355086,1,0.0000,0.0000,0.0000,nan,0.007,92
96284,152.050781,3.284369,237.157374,44.318466,1,0.1593,2.4014,0.4125,46.4417,0.019,42
97053,150.117188,3.732834,235.120533,42.993809,1,0.0000,0.0000,0.0000,nan,0.020,65
97406,347.846710,-64.760857,318.929827,-49.143596,1,0.1163,0.0897,0.8592,38.0649,0.019,15
97687,346.655182,-63.260487,320.952196,-50.040935,1,0.4603,0.4626,0.0271,42.0594,0.019,90
97850,351.259003,-64.386185,317.344860,-50.255113,1,0.3090,0.3005,0.0119,40.9599,0.020,90
97920,150.820312,3.134927,236.341348,43.230123,1,0.3588,0.3524,0.0238,41.3611,0.016,67
97957,1.723404,-45.981140,325.117958,-69.180825,1,0.2947,0.2906,0.0093,40.8754,0.010,90
98533,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,65
98570,1.708861,-45.586655,325.688716,-69.520253,1,0.6197,0.5296,0.0328,42.4113,0.011,42
98749,33.750000,-4.630479,168.146242,-59.949072,1,0.1473,0.0973,0.6573,38.2516,0.019,67
99013,350.230255,-61.943836,320.053946,-52.070537,1,0.5255,0.5303,0.0220,42.4150,0.017,90
99050,52.207031,-26.610098,221.298836,-55.042928,1,0.2972,0.3036,0.1492,40.9851,0.014,42
99261,53.613281,-27.953188,223.929533,-54.024772,1,0.1411,0.0857,0.0282,37.9579,0.007,90
99280,359.811707,-45.191612,329.485675,-69.150905,1,0.2037,0.2150,0.0079,40.1324,0.010,62
99293,347.846710,-64.760857,318.929827,-49.143596,1,0.3106,0.3644,0.0251,41.4467,0.019,90
99294,348.529419,-61.755440,321.293980,-51.763351,1,0.5552,0.5204,0.0185,42.3656,0.016,90
99452,352.711273,-63.823658,316.922299,-51.059403,1,0.8420,0.8479,0.0465,43.6581,0.024,88
99642,347.846710,-64.760857,318.929827,-49.143596,1,0.2320,0.2369,1.1397,40.3699,0.019,90
99862,52.207031,-28.291550,224.208534,-55.300157,1,0.5810,0.5733,0.0141,42.6194,0.007,42
99932,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,65
100057,346.130127,-63.072620,321.423103,-50.042305,1,0.8320,0.8006,0.0197,43.5045,0.020,90
100097,348.529419,-61.755440,321.293980,-51.763351,1,0.1566,0.1745,0.0186,39.6271,0.016,42
100133,346.655182,-63.260487,320.952196,-50.040935,1,0.2309,0.2215,0.0190,40.2046,0.019,42
100331,52.207031,-26.610098,221.298836,-55.042928,1,0.1684,0.1999,0.0173,39.9556,0.014,42
101050,32.695312,-4.929937,166.868469,-60.841230,1,0.2222,0.2529,0.0906,40.5303,0.018,90
101298,34.277344,-5.079716,169.526841,-59.956640,1,0.1848,0.1983,0.0069,39.9355,0.019,90
101374,51.855469,-26.276812,220.627031,-55.293792,1,0.0000,0.0000,0.0000,nan,0.014,16
101489,359.816315,-44.003082,331.451340,-70.123054,1,0.0721,0.0695,0.0025,37.4803,0.013,42
101508,358.648071,-46.375080,329.462659,-67.716008,1,0.0656,0.0249,0.0130,35.1822,0.009,90
101890,346.276581,-64.011238,320.448031,-49.344136,1,0.0000,0.0000,0.0000,nan,0.019,65
102036,53.789062,-27.784405,223.685697,-53.845803,1,0.2331,0.2269,0.0197,40.2635,0.009,42
102330,148.886719,2.686724,235.347248,41.389003,1,0.0000,0.0000,0.0000,nan,0.028,16
102343,51.328125,-27.447618,222.535046,-55.950727,1,0.1081,0.1422,0.0164,39.1394,0.013,67
102363,349.160583,-64.760857,318.219706,-49.458924,1,0.8046,0.7835,0.0167,43.4470,0.020,90
102745,349.615387,-63.636005,318.927246,-50.506542,1,0.2460,0.2333,1.0359,40.3315,0.018,90
102823,347.861847,-61.943836,321.519104,-51.424048,1,0.0000,0.0000,0.0000,nan,0.017,65
102864,0.574468,-45.981140,327.041068,-68.778764,1,0.2123,0.2254,0.0097,40.2473,0.006,42
103023,359.811707,-45.191612,329.485675,-69.150905,1,0.3221,0.2994,0.0125,40.9500,0.010,90
103026,348.529419,-61.755440,321.293980,-51.763351,1,0.1949,0.2006,0.0174,39.9637,0.016,42
103100,34.101562,-5.829153,170.247753,-60.638325,1,0.5057,0.4350,0.0240,41.9001,0.019,90
103145,349.160583,-64.760857,318.219706,-49.458924,1,0.6603,0.6380,0.0433,42.9010,0.020,90
103162,350.230255,-61.943836,320.053946,-52.070537,1,0.1101,0.1207,0.0096,38.7539,0.017,90
103171,1.753247,-46.768478,324.030235,-68.498041,1,0.0000,0.0000,0.0000,nan,0.014,92
103350,359.446716,-44.201530,331.730015,-69.805709,1,0.6777,0.6935,0.0507,43.1220,0.010,90
103354,150.117188,2.836105,236.124718,42.483719,1,2.7124,2.7655,0.0801,46.8131,0.016,88
103572,52.207031,-28.291550,224.208534,-55.300157,1,0.0000,0.0000,0.0000,nan,0.007,65
103927,150.820312,1.641510,237.994507,42.358984,1,0.2130,0.2282,0.0218,40.2779,0.020,90
103948,33.574219,-4.780192,168.064587,-60.175886,1,0.2257,0.2319,0.1182,40.3169,0.019,52
103967,0.190678,-45.783966,327.956322,-68.803772,1,0.5094,0.4789,0.0608,42.1493,0.005,42
104212,53.789062,-27.784405,223.685697,-53.845803,1,0.3895,0.3850,1.0136,41.5869,0.009,90
104397,349.966217,-62.696659,319.542989,-51.376556,1,0.3220,0.3038,0.0187,40.9870,0.021,90
104476,51.855469,-28.630989,224.733260,-55.649872,1,1.4820,1.4819,0.1602,45.1565,0.009,95
104498,149.238281,3.882372,234.283829,42.351155,1,0.1312,0.1100,0.0094,38.5371,0.033,67
104523,152.050781,3.284369,237.157374,44.318466,1,0.0000,0.0000,0.0000,nan,0.019,16
104526,349.615387,-63.636005,318.927246,-50.506542,1,0.2353,0.2389,0.0148,40.3902,0.018,42
104701,352.132874,-63.636005,317.424173,-51.095855,1,0.0000,0.0000,0.0000,nan,0.021,65
105744,348.908447,-63.823658,319.169886,-50.176186,1,0.3087,0.3318,1.0382,41.2090,0.018,90
106177,358.312500,-44.993881,332.185785,-68.685906,1,0.6593,0.5956,0.1033,42.7194,0.009,90
106429,152.050781,2.985506,237.495952,44.143927,1,0.0000,0.0000,0.0000,nan,0.019,16
106434,33.574219,-4.780192,168.064587,-60.175886,1,0.2861,0.2795,0.0098,40.7789,0.019,62
106594,0.589520,-47.161343,325.385896,-67.769893,1,0.2403,0.2370,1.0591,40.3710,0.009,88
106730,347.013428,-62.508568,321.472056,-50.735330,1,0.1576,2.2437,0.3333,46.2621,0.018,52
106743,0.574468,-45.981140,327.041068,-68.778764,1,0.2154,0.2539,0.0150,40.5405,0.006,90
106818,348.595886,-63.072620,320.023289,-50.713060,1,0.0000,0.0000,0.0000,nan,0.021,92
106937,53.085938,-28.122234,224.100909,-54.509752,1,0.3656,0.3882,0.8841,41.6079,0.007,52
107193,359.058563,-45.191612,330.695783,-68.844915,1,0.2109,0.1761,0.0082,39.6492,0.011,90
107439,150.468750,3.732834,235.392208,43.283244,1,0.9187,1.5247,0.3061,45.2328,0.020,88
107451,347.861847,-61.943836,321.519104,-51.424048,1,0.0000,0.0000,0.0000,nan,0.017,65
107568,33.750000,-4.630479,168.146242,-59.949072,1,0.1042,0.0802,0.0143,37.8074,0.019,67
107615,150.117188,2.836105,236.124718,42.483719,1,0.1323,2.3872,0.9055,46.4260,0.016,42
107712,53.261719,-27.615883,223.280041,-54.281374,1,0.1282,0.1450,0.0286,39.1863,0.006,90
107901,359.058563,-45.191612,330.695783,-68.844915,1,0.0000,0.0000,0.0000,nan,0.011,65
108021,150.996094,4.181528,235.291975,43.970869,1,0.2815,0.3756,0.8142,41.5232,0.015,62
108141,53.789062,-27.784405,223.685697,-53.845803,1,0.2179,0.2239,0.0151,40.2317,0.009,90
108229,351.321442,-64.198746,317.458993,-50.429931,1,0.1858,0.2022,1.1539,39.9826,0.023,90
108358,349.891296,-64.573555,317.972107,-49.786192,1,0.0769,0.2433,1.0869,40.4350,0.023,52
108487,359.816315,-44.003082,331.451340,-70.123054,1,0.1787,0.2401,0.6645,40.4026,0.013,42
108554,33.222656,-4.780192,167.515653,-60.396584,1,0.4478,0.4542,0.0258,42.0120,0.018,90
108693,0.574468,-45.981140,327.041068,-68.778764,1,0.4868,0.4183,0.0364,41.7991,0.006,90
108739,53.085938,-28.122234,224.100909,-54.509752,1,0.4453,0.4388,0.0272,41.9226,0.007,90
108888,358.648071,-46.375080,329.462659,-67.716008,1,0.6247,0.5951,0.0599,42.7172,0.009,90
109036,1.753247,-46.768478,324.030235,-68.498041,1,0.1924,0.1862,0.2684,39.7838,0.014,90
109057,348.595886,-63.072620,320.023289,-50.713060,1,0.4499,0.3307,1.0499,41.2005,0.021,90
109294,359.814819,-44.399834,330.775011,-69.801007,1,0.3014,0.3643,0.8070,41.4461,0.009,90
109516,1.753247,-46.768478,324.030235,-68.498041,1,0.3828,0.3941,0.0095,41.6466,0.014,95
109654,347.013428,-62.508568,321.472056,-50.735330,1,0.1395,0.1231,0.0131,38.8003,0.018,90
109860,0.929752,-44.597992,328.531426,-70.083244,1,0.2708,0.2639,0.0184,40.6357,0.011,42
109903,150.996094,2.388015,237.313912,42.939977,1,0.3892,0.3579,0.0137,41.4006,0.021,62
109937,149.414062,3.433834,234.919132,42.245550,1,0.6620,0.6423,0.0188,42.9190,0.027,88
110241,34.453125,-5.229529,169.987075,-59.956185,1,0.1072,0.0924,0.4007,38.1327,0.019,42
110257,148.886719,2.686724,235.347248,41.389003,1,0.9318,0.8495,0.0379,43.6631,0.028,88
110270,54.667969,-27.615883,223.610785,-53.050840,1,0.8259,0.8541,0.0446,43.6776,0.009,90
110304,53.261719,-27.615883,223.280041,-54.281374,1,0.2475,0.5457,0.2676,42.4899,0.006,62
110387,151.347656,4.181528,235.568369,44.259942,1,0.4318,0.4218,0.6713,41.8205,0.016,90
110551,51.328125,-27.447618,222.535046,-55.950727,1,0.4861,0.3027,0.3931,40.9783,0.013,88
110768,351.734680,-62.884678,318.284128,-51.651217,1,2.9378,2.8626,1.1139,46.9035,0.019,88
110958,349.615387,-63.636005,318.927246,-50.506542,1,0.4630,0.4742,0.5031,42.1236,0.018,90
111281,0.589520,-47.161343,325.385896,-67.769893,1,1.4967,1.6014,0.0895,45.3643,0.009,42
111283,150.468750,1.641510,237.714575,42.075234,1,0.6565,0.6363,0.0252,42.8941,0.017,90
111448,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,65
111650,351.734680,-62.884678,318.284128,-51.651217,1,0.0000,0.0000,0.0000,nan,0.019,65
111795,34.453125,-5.229529,169.987075,-59.956185,1,0.4070,0.3077,1.0584,41.0187,0.019,90
111799,52.558594,-27.279613,222.538937,-54.845107,1,0.2813,0.5741,0.4749,42.6227,0.008,42
112151,349.966217,-62.696659,319.542989,-51.376556,1,0.5341,0.5419,0.0204,42.4716,0.021,90
112462,33.574219,-4.780192,168.064587,-60.175886,1,0.4045,0.4356,0.0299,41.9040,0.019,90
112629,346.276581,-64.011238,320.448031,-49.344136,1,0.0000,0.0000,0.0000,nan,0.019,16
112717,35.683594,-5.379379,171.992947,-59.253501,1,0.2867,0.3138,0.8216,41.0680,0.020,67
112764,347.812500,-63.448284,320.128971,-50.202348,1,0.3208,0.2995,0.0255,40.9517,0.021,90
112782,32.871094,-4.780192,166.959493,-60.615132,1,0.4211,0.4038,0.0224,41.7088,0.017,90
112886,351.953644,-62.132156,318.777388,-52.347124,1,0.0000,0.0000,0.0000,nan,0.019,65
113028,151.699219,3.583322,236.533224,44.205648,1,0.0000,0.0000,0.0000,nan,0.016,65
113206,52.207031,-28.291550,224.208534,-55.300157,1,0.2446,0.2208,0.0078,40.1969,0.007,42
113335,358.636353,-46.768478,328.890146,-67.388837,1,0.0000,0.0000,0.0000,nan,0.008,65
113625,149.589844,3.583322,234.885369,42.474696,1,0.2615,0.2381,0.0160,40.3824,0.024,90
113669,351.299988,-62.320400,319.038597,-52.026867,1,0.2914,0.2905,0.0061,40.8748,0.018,15
113982,359.805206,-46.768478,327.135979,-67.829903,1,0.7438,0.9216,0.1482,43.8813,0.011,90
114191,33.574219,-6.579593,170.455585,-61.548219,1,0.0000,0.0000,0.0000,nan,0.021,65
114341,151.699219,3.583322,236.533224,44.205648,1,0.1709,2.3232,0.5051,46.3542,0.016,42
114626,2.071130,-45.191612,325.606223,-69.989264,1,0.5702,0.5408,0.0178,42.4659,0.011,90
114670,150.996094,2.388015,237.313912,42.939977,1,0.2930,0.2607,0.0670,40.6057,0.021,90
114715,0.965665,-46.375080,325.845907,-68.579427,1,0.2141,0.2020,0.0362,39.9807,0.007,67
114808,52.207031,-26.610098,221.298836,-55.042928,1,0.3037,0.2944,0.0104,40.9082,0.014,90
115053,33.925781,-5.979157,170.179895,-60.866303,1,0.1649,0.1424,0.2329,39.1427,0.022,42
115079,1.694561,-45.191612,326.278557,-69.858253,1,0.2207,0.2325,0.0072,40.3232,0.011,90
115157,2.457983,-45.389202,324.632685,-69.945696,1,0.6382,0.5628,0.0442,42.5706,0.011,90
115336,351.734680,-62.884678,318.284128,-51.651217,1,1.7312,1.7123,0.0766,45.5432,0.019,95
115638,358.665253,-45.783966,330.353593,-68.203652,1,0.6365,0.6239,0.0079,42.8420,0.009,90
115670,151.347656,3.583322,236.252362,43.918627,1,0.0000,0.0000,0.0000,nan,0.015,16
115792,352.711273,-63.823658,316.922299,-51.059403,1,0.2389,0.2859,0.0505,40.8350,0.024,90
115859,148.886719,2.686724,235.347248,41.389003,1,1.4248,1.2871,0.1757,44.7782,0.028,88
115937,51.679688,-27.447618,222.618229,-55.642263,1,0.0000,0.0000,0.0000,nan,0.010,65
116132,150.292969,2.686724,236.427488,42.541447,1,0.0878,0.0965,0.0207,38.2336,0.016,62
116212,150.644531,3.583322,235.698235,43.342784,1,0.2344,0.2757,1.1756,40.7443,0.018,90
116570,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,16
116720,34.980469,-6.279288,172.180075,-60.389399,1,0.4081,0.3212,0.0367,41.1269,0.023,90
116818,359.805206,-46.768478,327.135979,-67.829903,1,0.6528,0.7091,0.0211,43.1811,0.011,88
117016,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,92
117104,346.130127,-63.072620,321.423103,-50.042305,1,0.0000,0.0000,0.0000,nan,0.020,92
117184,352.132874,-63.636005,317.424173,-51.095855,1,0.0000,0.0000,0.0000,nan,0.021,16
117393,52.910156,-25.944481,220.366350,-54.301439,1,0.8259,0.8295,0.0204,43.5993,0.010,90
117461,53.261719,-27.615883,223.280041,-54.281374,1,0.0000,0.0000,0.0000,nan,0.006,92
117513,151.347656,3.583322,236.252362,43.918627,1,0.8515,0.8845,0.0148,43.7712,0.015,90
117774,346.276581,-64.011238,320.448031,-49.344136,1,0.5589,0.5608,0.1532,42.5611,0.019,90
118211,151.699219,3.583322,236.533224,44.205648,1,0.2420,0.2139,1.1778,40.1199,0.016,90
118422,149.589844,3.583322,234.885369,42.474696,1,0.0000,0.0000,0.0000,nan,0.024,65
118455,347.861847,-61.943836,321.519104,-51.424048,1,0.1346,0.1396,0.3139,39.0951,0.017,90
118770,53.085938,-28.122234,224.100909,-54.509752,1,0.2642,0.6375,0.0267,42.8991,0.007,90
118868,350.230255,-61.943836,320.053946,-52.070537,1,0.0000,0.0000,0.0000,nan,0.017,65
118979,2.457983,-45.389202,324.632685,-69.945696,1,0.2374,0.5280,0.1420,42.4036,0.011,67
119215,52.031250,-26.443335,220.963669,-55.168557,1,0.2490,0.2486,0.0069,40.4887,0.014,90
119383,0.949367,-45.586655,326.991548,-69.251686,1,0.3501,0.2602,0.6723,40.6012,0.013,42
119494,150.117188,2.836105,236.124718,42.483719,1,0.2113,0.2256,0.0219,40.2502,0.016,90
119647,151.523438,3.134927,236.900695,43.803170,1,0.1844,0.1571,0.0210,39.3762,0.019,62
119811,348.586945,-64.573555,318.693903,-49.477869,1,0.7287,0.7966,0.0308,43.4912,0.018,90
119882,358.312500,-44.993881,332.185785,-68.685906,1,0.0000,0.0000,0.0000,nan,0.009,65
120356,32.695312,-4.929937,166.868469,-60.841230,1,0.0582,0.0543,0.0185,36.9203,0.018,42
120927,152.050781,3.284369,237.157374,44.318466,1,0.4807,0.4897,0.0334,42.2071,0.019,90
121107,35.332031,-5.979157,172.286722,-59.931743,1,0.5440,0.2209,0.2405,40.1982,0.022,90
121182,53.085938,-28.122234,224.100909,-54.509752,1,0.0000,0.0000,0.0000,nan,0.007,65
121224,359.811707,-45.191612,329.485675,-69.150905,1,0.1244,2.3005,0.4304,46.3283,0.010,42
121266,151.347656,3.583322,236.252362,43.918627,1,0.4062,0.5002,0.0481,42.2623,0.015,42
121301,52.207031,-28.630989,224.800211,-55.343637,1,0.1459,0.1449,1.1250,39.1845,0.009,62
121440,53.613281,-28.630989,225.073365,-54.119461,1,0.2230,0.2219,0.1530,40.2089,0.006,42
121447,0.189873,-45.586655,328.254458,-68.969298,1,0.2571,0.2465,0.0106,40.4674,0.007,90
121704,359.805206,-46.768478,327.135979,-67.829903,1,1.5357,1.5245,0.1053,45.2324,0.011,88
121705,352.132874,-63.636005,317.424173,-51.095855,1,0.1125,0.1219,0.0187,38.7767,0.021,42
121783,35.332031,-5.979157,172.286722,-59.931743,1,0.1324,0.0956,0.0132,38.2109,0.022,90
121803,348.586945,-64.573555,318.693903,-49.477869,1,0.2563,0.3703,1.1769,41.4870,0.018,90
121883,150.117188,2.836105,236.124718,42.483719,1,0.0000,0.0000,0.0000,nan,0.016,65
122235,53.085938,-28.122234,224.100909,-54.509752,1,0.0000,0.0000,0.0000,nan,0.007,65
122275,54.667969,-27.615883,223.610785,-53.050840,1,0.3172,0.5059,0.1684,42.2916,0.009,90
122716,33.398438,-3.732834,166.492280,-59.466614,1,0.0000,0.0000,0.0000,nan,0.022,65
122965,53.085938,-28.122234,224.100909,-54.509752,1,0.7135,0.7416,0.0359,43.3006,0.007,90
123035,150.468750,3.732834,235.392208,43.283244,1,0.2740,0.3135,0.5471,41.0660,0.020,52
123151,52.207031,-28.291550,224.208534,-55.300157,1,0.2548,0.2657,0.0184,40.6528,0.007,62
123211,150.996094,2.985506,236.647967,43.287350,1,1.6177,1.6654,0.0205,45.4691,0.020,88
123244,1.363636,-46.768478,324.669342,-68.371416,1,0.4582,0.3682,0.3963,41.4728,0.008,42
123437,348.595886,-63.072620,320.023289,-50.713060,1,0.1987,0.2137,0.0164,40.1178,0.021,52
123493,34.804688,-5.829153,171.307861,-60.174401,1,1.0395,0.9736,0.2348,44.0284,0.023,95
123743,152.050781,2.985506,237.495952,44.143927,1,0.2016,0.2129,0.0188,40.1084,0.019,90
123926,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,16
123927,150.468750,3.732834,235.392208,43.283244,1,0.0000,0.0000,0.0000,nan,0.020,16
124006,2.457983,-45.389202,324.632685,-69.945696,1,0.3574,0.3496,0.7517,41.3410,0.011,90
124183,33.574219,-6.579593,170.455585,-61.548219,1,0.0000,0.0000,0.0000,nan,0.021,65
124188,348.908447,-63.823658,319.169886,-50.176186,1,0.0000,0.0000,0.0000,nan,0.018,65
124361,51.855469,-27.953188,223.543603,-55.561470,1,0.3256,0.3420,0.8779,41.2856,0.008,88
124394,52.207031,-26.610098,221.298836,-55.042928,1,0.9198,1.4761,0.2825,45.1459,0.014,90
124679,346.276581,-64.011238,320.448031,-49.344136,1,0.3602,0.3409,0.0345,41.2771,0.019,52
124762,347.861847,-61.943836,321.519104,-51.424048,1,0.1442,2.8043,0.6276,46.8496,0.017,42
125095,352.711273,-63.823658,316.922299,-51.059403,1,0.0000,0.0000,0.0000,nan,0.024,65
125242,349.891296,-64.573555,317.972107,-49.786192,1,0.2721,0.5662,0.2621,42.5866,0.023,90
125258,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,65
125426,349.429535,-62.508568,320.039643,-51.393745,1,1.1327,1.1093,0.0315,44.3789,0.020,88
125470,34.277344,-5.079716,169.526841,-59.956640,1,0.5731,0.5848,0.0092,42.6713,0.019,90
125518,359.805206,-46.768478,327.135979,-67.829903,1,0.0000,0.0000,0.0000,nan,0.011,65
125743,347.013428,-62.508568,321.472056,-50.735330,1,0.9950,1.0046,0.0133,44.1126,0.018,42
125762,1.753247,-46.768478,324.030235,-68.498041,1,0.0000,0.0000,0.0000,nan,0.014,65
126061,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,65
126084,149.238281,3.882372,234.283829,42.351155,1,0.1250,0.1223,0.4578,38.7846,0.033,90
126970,150.996094,2.985506,236.647967,43.287350,1,0.0000,0.0000,0.0000,nan,0.020,16
127056,34.453125,-5.229529,169.987075,-59.956185,1,0.1629,0.1385,0.0373,39.0772,0.019,42
127488,348.908447,-63.823658,319.169886,-50.176186,1,0.0000,0.0000,0.0000,nan,0.018,65
127773,53.085938,-27.111860,222.384291,-54.355086,1,0.0000,0.0000,0.0000,nan,0.007,65
127942,348.595886,-63.072620,320.023289,-50.713060,1,0.3899,0.5026,0.2876,42.2748,0.021,90
127996,346.562500,-63.448284,320.824720,-49.866957,1,0.6993,0.6759,0.0121,43.0539,0.021,95
128339,0.965665,-46.375080,325.845907,-68.579427,1,0.2192,2.1719,1.1420,46.1760,0.007,67
128405,352.398651,-62.696659,318.017427,-51.967966,1,0.0000,0.0000,0.0000,nan,0.020,65
128488,150.468750,1.641510,237.714575,42.075234,1,0.1333,0.1194,0.0257,38.7277,0.017,42
128518,34.101562,-5.829153,170.247753,-60.638325,1,0.5644,0.5449,0.0420,42.4859,0.019,90
128564,150.117188,2.238686,236.784618,42.139082,1,0.4541,2.8846,1.1987,46.9235,0.016,90
128737,52.558594,-27.279613,222.538937,-54.845107,1,0.4204,0.4270,0.0598,41.8523,0.008,90
128746,152.050781,2.985506,237.495952,44.143927,1,0.3408,0.3671,0.5585,41.4654,0.019,52
128967,150.644531,3.583322,235.698235,43.342784,1,0.3145,0.3149,0.0153,41.0770,0.018,90
129179,346.130127,-63.072620,321.423103,-50.042305,1,0.1422,0.1600,0.0245,39.4198,0.020,52
129490,34.453125,-5.229529,169.987075,-59.956185,1,0.3803,0.4821,0.2384,42.1664,0.019,42
129503,150.292969,2.686724,236.427488,42.541447,1,0.1939,0.1882,0.0140,39.8092,0.016,52
129637,33.925781,-5.979157,170.179895,-60.866303,1,0.0000,0.0000,0.0000,nan,0.022,92
129648,348.529419,-61.755440,321.293980,-51.763351,1,0.3613,2.9518,0.4926,46.9838,0.016,62
129861,351.321442,-64.198746,317.458993,-50.429931,1,0.5180,0.5215,0.0294,42.3710,0.023,90
130220,346.276581,-64.011238,320.448031,-49.344136,1,0.5002,0.4635,0.0365,42.0643,0.019,42
130404,150.820312,3.732834,235.666318,43.572109,1,0.2531,0.2629,0.0072,40.6262,0.016,42
130502,35.683594,-5.379379,171.992947,-59.253501,1,0.2141,0.1909,0.0935,39.8439,0.020,90
130625,1.708861,-45.586655,325.688716,-69.520253,1,0.0000,0.0000,0.0000,nan,0.011,16
130750,359.816315,-44.003082,331.451340,-70.123054,1,0.2536,0.2917,0.7396,40.8850,0.013,90
131075,52.910156,-25.944481,220.366350,-54.301439,1,0.1709,0.1776,0.0174,39.6689,0.010,42
131181,34.101562,-5.829153,170.247753,-60.638325,1,0.0000,0.0000,0.0000,nan,0.019,16
131305,351.382965,-64.011238,317.574052,-50.604657,1,0.2846,0.3113,0.0407,41.0484,0.023,90
131368,150.996094,2.388015,237.313912,42.939977,1,0.0000,0.0000,0.0000,nan,0.021,65
131488,0.965665,-46.375080,325.845907,-68.579427,1,0.2691,0.2621,0.0183,40.6187,0.007,90
131492,150.820312,3.732834,235.666318,43.572109,1,0.0000,0.0000,0.0000,nan,0.016,16
131629,33.574219,-5.079716,168.448505,-60.407218,1,1.1119,1.0817,0.0329,44.3113,0.016,95
131814,151.699219,3.583322,236.533224,44.205648,1,0.2823,0.5387,0.2038,42.4557,0.016,90
131815,32.695312,-4.929937,166.868469,-60.841230,1,0.3521,0.3520,0.0254,41.3584,0.018,90
132021,359.058563,-45.191612,330.695783,-68.844915,1,0.2525,0.6394,0.1855,42.9069,0.011,90
132278,359.415588,-46.768478,327.729895,-67.686097,1,0.0000,0.0000,0.0000,nan,0.009,92
133074,347.617462,-62.508568,321.121462,-50.904708,1,0.2610,0.2672,0.8507,40.6666,0.019,90
133191,346.276581,-64.011238,320.448031,-49.344136,1,0.4648,0.4423,0.0247,41.9430,0.019,90
133234,151.523438,3.134927,236.900695,43.803170,1,0.2288,0.2866,0.0891,40.8416,0.019,15
133354,358.665253,-45.783966,330.353593,-68.203652,1,0.4990,0.5076,0.0086,42.3007,0.009,90
133513,34.980469,-6.279288,172.180075,-60.389399,1,0.4337,0.4221,0.0715,41.8224,0.023,90
133773,149.414062,3.433834,234.919132,42.245550,1,0.0000,0.0000,0.0000,nan,0.027,53
134380,150.996094,2.985506,236.647967,43.287350,1,0.3943,0.3670,0.0425,41.4645,0.020,90
134824,351.734680,-62.884678,318.284128,-51.651217,1,0.1196,0.1143,0.0103,38.6270,0.019,42
135054,350.230255,-61.943836,320.053946,-52.070537,1,0.3893,0.4001,0.3373,41.6849,0.017,90
135067,148.886719,2.686724,235.347248,41.389003,1,0.5285,0.5059,0.6484,42.2917,0.028,90
135097,151.171875,1.342993,238.602520,42.464379,1,0.2066,0.3032,0.0238,40.9819,0.026,42
135357,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,65
135588,151.523438,3.134927,236.900695,43.803170,1,0.0000,0.0000,0.0000,nan,0.019,16
135790,150.117188,2.238686,236.784618,42.139082,1,0.4136,0.3284,0.7703,41.1829,0.016,90
135813,0.589520,-47.161343,325.385896,-67.769893,1,0.2577,0.2445,0.0139,40.4473,0.009,52
136110,53.261719,-27.615883,223.280041,-54.281374,1,0.3785,0.4183,0.7565,41.7995,0.006,90
136352,51.328125,-27.784405,223.130589,-55.999499,1,0.0000,0.0000,0.0000,nan,0.013,65
136407,33.574219,-4.780192,168.064587,-60.175886,1,0.1110,0.0954,0.0100,38.2070,0.019,42
136704,150.820312,3.732834,235.666318,43.572109,1,0.2725,1.2302,1.1813,44.6568,0.016,62
136931,52.558594,-27.279613,222.538937,-54.845107,1,0.3304,0.3783,0.1057,41.5421,0.008,52
136949,351.321442,-64.198746,317.458993,-50.429931,1,0.0000,0.0000,0.0000,nan,0.023,65
137510,346.562500,-63.448284,320.824720,-49.866957,1,0.0000,0.0000,0.0000,nan,0.021,65
137645,51.855469,-26.276812,220.627031,-55.293792,1,0.1590,0.1754,0.0130,39.6390,0.014,42
138010,52.207031,-28.630989,224.800211,-55.343637,1,0.2411,0.2709,0.0169,40.7009,0.009,42
138068,150.292969,2.686724,236.427488,42.541447,1,0.2270,2.7496,0.8355,46.7979,0.016,42
138263,51.328125,-27.784405,223.130589,-55.999499,1,0.4085,0.4242,0.0398,41.8353,0.013,90
138415,349.429535,-62.508568,320.039643,-51.393745,1,0.2761,0.2500,0.0203,40.5026,0.020,42
138553,347.812500,-63.448284,320.128971,-50.202348,1,0.3010,0.2608,0.2763,40.6065,0.021,90
138947,349.160583,-64.760857,318.219706,-49.458924,1,0.5965,0.5575,0.2970,42.5456,0.020,42
139016,151.523438,3.134927,236.900695,43.803170,1,0.0000,0.0000,0.0000,nan,0.019,16
139329,151.699219,3.583322,236.533224,44.205648,1,0.5029,0.5454,0.2843,42.4885,0.016,90
139362,149.238281,3.882372,234.283829,42.351155,1,0.0809,0.0779,0.0097,37.7408,0.033,64
139405,1.666667,-44.399834,327.519190,-70.529554,1,0.3701,0.4458,0.8403,41.9636,0.009,90
139637,359.816315,-44.003082,331.451340,-70.123054,1,0.4142,0.4028,0.3993,41.7023,0.013,90
140096,2.457983,-45.389202,324.632685,-69.945696,1,0.2211,0.2342,0.0540,40.3413,0.011,90
140472,52.558594,-27.279613,222.538937,-54.845107,1,0.0000,0.0000,0.0000,nan,0.008,65
140948,1.666667,-44.399834,327.519190,-70.529554,1,0.2048,0.1926,0.0117,39.8645,0.009,62
141212,53.613281,-28.630989,225.073365,-54.119461,1,0.1256,2.3091,0.5229,46.3382,0.006,90
141302,150.292969,2.686724,236.427488,42.541447,1,0.4316,0.4620,0.3119,42.0560,0.016,42
141334,149.238281,3.882372,234.283829,42.351155,1,0.0000,0.0000,0.0000,nan,0.033,65
141686,53.085938,-28.122234,224.100909,-54.509752,1,0.3431,0.3179,0.2942,41.1008,0.007,67
141937,151.523438,3.134927,236.900695,43.803170,1,0.4487,0.4658,0.0138,42.0770,0.019,90
142099,52.207031,-26.610098,221.298836,-55.042928,1,0.0000,0.0000,0.0000,nan,0.014,65
142254,52.910156,-25.944481,220.366350,-54.301439,1,0.4858,0.5210,0.3055,42.3685,0.010,90
142368,347.013428,-62.508568,321.472056,-50.735330,1,0.2586,0.0878,0.7050,38.0148,0.018,90
142866,150.996094,2.388015,237.313912,42.939977,1,0.6327,0.6476,0.0215,42.9408,0.021,42
142867,349.966217,-62.696659,319.542989,-51.376556,1,0.1548,0.1591,0.0118,39.4056,0.021,42
142885,52.207031,-28.291550,224.208534,-55.300157,1,0.4057,0.3852,0.0218,41.5882,0.007,90
143066,0.189873,-45.586655,328.254458,-68.969298,1,0.1937,0.5270,0.1240,42.3983,0.007,42
143275,2.071130,-45.191612,325.606223,-69.989264,1,0.1114,0.1030,0.0117,38.3830,0.011,42
143651,34.101562,-5.829153,170.247753,-60.638325,1,0.3590,0.3440,0.0396,41.3003,0.019,90
143865,33.925781,-5.979157,170.179895,-60.866303,1,0.0000,0.0000,0.0000,nan,0.022,16
144204,53.085938,-27.111860,222.384291,-54.355086,1,0.4825,0.5378,0.3853,42.4513,0.007,90
144244,33.222656,-4.780192,167.515653,-60.396584,1,0.2120,0.6014,0.1268,42.7453,0.018,90
145107,34.980469,-6.279288,172.180075,-60.389399,1,0.0000,0.0000,0.0000,nan,0.023,16
145160,0.189873,-45.586655,328.254458,-68.969298,1,0.0000,0.0000,0.0000,nan,0.007,65
145257,52.207031,-28.291550,224.208534,-55.300157,1,0.2570,0.2708,0.2312,40.7001,0.007,90
145675,51.855469,-27.953188,223.543603,-55.561470,1,0.8655,0.8841,0.0293,43.7698,0.008,90
145859,34.980469,-6.279288,172.180075,-60.389399,1,0.2213,0.2411,0.0230,40.4126,0.023,90
145926,152.050781,3.284369,237.157374,44.318466,1,0.2061,0.2269,0.0092,40.2640,0.019,42
145990,33.222656,-4.780192,167.515653,-60.396584,1,0.4648,0.4366,0.0319,41.9096,0.018,90
146187,33.398438,-3.732834,166.492280,-59.466614,1,0.0000,0.0000,0.0000,nan,0.022,92
146410,359.805206,-46.768478,327.135979,-67.829903,1,0.2555,0.4043,0.6020,41.7117,0.011,90
146429,150.117188,2.238686,236.784618,42.139082,1,0.1260,0.1220,0.0168,38.7782,0.016,62
147214,51.855469,-27.953188,223.543603,-55.561470,1,0.1677,0.1843,0.0127,39.7584,0.008,90
147571,0.190678,-45.783966,327.956322,-68.803772,1,0.1885,0.2575,0.1606,40.5748,0.005,90
147642,52.910156,-26.276812,220.926149,-54.363918,1,0.2453,0.2263,0.0090,40.2576,0.008,62
147752,151.523438,3.134927,236.900695,43.803170,1,0.2042,0.1823,0.0158,39.7324,0.019,90
147816,34.277344,-5.079716,169.526841,-59.956640,1,0.0000,0.0000,0.0000,nan,0.019,92
148204,151.171875,2.238686,237.619933,42.994783,1,0.0000,0.0000,0.0000,nan,0.024,65
148466,151.171875,2.238686,237.619933,42.994783,1,0.5915,0.5684,0.0143,42.5965,0.024,90
148535,34.277344,-5.679190,170.314930,-60.410322,1,0.1966,0.1835,0.0107,39.7482,0.020,42
148543,349.046051,-61.943836,320.796530,-51.753706,1,0.4717,0.5098,0.0652,42.3120,0.017,90
148976,53.964844,-28.630989,225.142950,-53.813613,1,0.6993,0.6715,0.0353,43.0365,0.009,90
148996,51.855469,-26.276812,220.627031,-55.293792,1,0.0258,0.0954,0.0390,38.2066,0.014,15
149129,0.589520,-47.161343,325.385896,-67.769893,1,0.1440,2.5349,0.7043,46.5843,0.009,90
149130,151.171875,2.537361,237.288526,43.169764,1,0.1925,0.1847,0.0130,39.7633,0.024,90
149478,2.071130,-45.191612,325.606223,-69.989264,1,0.1744,0.5308,0.1019,42.4174,0.011,42
149673,33.398438,-4.331149,167.226341,-59.936551,1,0.3411,0.3499,0.0077,41.3436,0.018,90
150266,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,16
150344,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,65
150561,53.261719,-27.615883,223.280041,-54.281374,1,0.2265,0.2307,0.0085,40.3046,0.006,42
150765,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,65
150818,53.613281,-26.944359,222.237403,-53.863858,1,0.1401,0.1231,0.0150,38.8002,0.009,90
150880,351.382965,-64.011238,317.574052,-50.604657,1,0.1168,0.0932,0.0179,38.1533,0.023,42
151356,51.679688,-27.447618,222.618229,-55.642263,1,0.0000,0.0000,0.0000,nan,0.010,16
151427,0.574468,-45.981140,327.041068,-68.778764,1,0.0370,0.0313,0.0117,35.6883,0.006,42
151458,53.261719,-27.615883,223.280041,-54.281374,1,0.0000,0.0000,0.0000,nan,0.006,65
151462,33.574219,-4.780192,168.064587,-60.175886,1,0.2901,0.3886,0.4161,41.6102,0.019,42
151498,359.446716,-44.201530,331.730015,-69.805709,1,0.4319,0.4487,0.5711,41.9804,0.010,90
151694,54.667969,-27.615883,223.610785,-53.050840,1,0.0000,0.0000,0.0000,nan,0.009,92
151704,150.292969,2.686724,236.427488,42.541447,1,0.1558,0.1295,0.6463,38.9183,0.016,52
151973,150.996094,4.181528,235.291975,43.970869,1,0.0000,0.0000,0.0000,nan,0.015,92
152079,33.574219,-5.379379,168.838090,-60.637536,1,0.3503,0.3429,0.0254,41.2924,0.017,90
152083,150.468750,3.732834,235.392208,43.283244,1,0.5001,0.5041,0.0165,42.2827,0.020,90
152300,33.574219,-6.579593,170.455585,-61.548219,1,0.7306,0.6981,0.0380,43.1398,0.021,42
152425,150.820312,3.732834,235.666318,43.572109,1,0.0000,0.0000,0.0000,nan,0.016,65
152453,52.910156,-27.953188,223.774083,-54.639214,1,0.2741,0.5064,0.2047,42.2944,0.007,90
152567,150.996094,4.181528,235.291975,43.970869,1,0.0000,0.0000,0.0000,nan,0.015,16
152618,33.574219,-5.079716,168.448505,-60.407218,1,0.0000,0.0000,0.0000,nan,0.016,16
152640,33.574219,-4.780192,168.064587,-60.175886,1,0.0000,0.0000,0.0000,nan,0.019,65
152682,51.679688,-27.447618,222.618229,-55.642263,1,0.0000,0.0000,0.0000,nan,0.010,16
152756,150.996094,4.181528,235.291975,43.970869,1,0.1326,0.1837,0.4894,39.7513,0.015,62
152787,351.299988,-62.320400,319.038597,-52.026867,1,0.0000,0.0000,0.0000,nan,0.018,65
152812,32.695312,-4.929937,166.868469,-60.841230,1,2.2378,2.2813,0.0673,46.3061,0.018,88
153089,35.332031,-5.979157,172.286722,-59.931743,1,0.0000,0.0000,0.0000,nan,0.022,16
153539,54.667969,-27.615883,223.610785,-53.050840,1,0.3371,0.3157,0.0155,41.0832,0.009,88
153880,51.679688,-27.447618,222.618229,-55.642263,1,0.3501,0.3152,0.0731,41.0793,0.010,90
154053,34.101562,-5.829153,170.247753,-60.638325,1,0.1526,0.1479,0.0167,39.2327,0.019,42
154402,151.171875,2.238686,237.619933,42.994783,1,0.0294,0.0619,0.0172,37.2157,0.024,90
154631,358.648071,-46.375080,329.462659,-67.716008,1,0.9576,0.8148,0.0688,43.5515,0.009,90
154648,359.811707,-45.191612,329.485675,-69.150905,1,1.4174,1.4408,0.0548,45.0811,0.010,88
154762,35.859375,-4.630479,171.270769,-58.580806,1,0.3187,0.2788,1.0862,40.7724,0.022,90
154986,150.996094,2.985506,236.647967,43.287350,1,0.0000,0.0000,0.0000,nan,0.020,16
155110,148.886719,2.686724,235.347248,41.389003,1,0.0000,0.0000,0.0000,nan,0.028,65
155380,150.996094,2.388015,237.313912,42.939977,1,0.2147,0.2288,0.0127,40.2844,0.021,88
155468,150.996094,4.181528,235.291975,43.970869,1,0.0000,0.0000,0.0000,nan,0.015,65
155541,151.347656,4.181528,235.568369,44.259942,1,0.5115,0.4788,0.0312,42.1482,0.016,90
155613,148.710938,2.836105,235.050801,41.328739,1,0.3585,0.3694,0.0090,41.4811,0.031,90
155778,53.085938,-27.111860,222.384291,-54.355086,1,0.2234,0.5912,0.2372,42.6999,0.007,42
156386,151.523438,3.134927,236.900695,43.803170,1,1.4308,1.2425,0.1528,44.6834,0.019,88
156537,352.132874,-63.636005,317.424173,-51.095855,1,2.0260,2.3090,0.0769,46.3380,0.021,88
156739,351.321442,-64.198746,317.458993,-50.429931,1,0.3405,0.4842,0.8467,42.1775,0.023,90
157120,51.328125,-27.447618,222.535046,-55.950727,1,0.2126,0.2539,0.5364,40.5402,0.013,42
157299,34.453125,-5.229529,169.987075,-59.956185,1,0.5504,0.6351,0.0335,42.8890,0.019,42
157477,359.805206,-46.768478,327.135979,-67.829903,1,0.9586,0.7920,0.0736,43.4756,0.011,95
157746,149.414062,1.940072,236.565366,41.393323,1,0.2167,0.5603,0.2926,42.5588,0.018,52
158042,1.666667,-44.399834,327.519190,-70.529554,1,0.1965,0.2093,0.0089,40.0667,0.009,90
158241,347.013428,-62.508568,321.472056,-50.735330,1,0.0424,0.0344,0.0149,35.8982,0.018,62
158507,351.299988,-62.320400,319.038597,-52.026867,1,0.6109,0.7017,0.0936,43.1535,0.018,90
158515,52.910156,-26.276812,220.926149,-54.363918,1,0.3265,0.3196,0.0097,41.1146,0.008,42
158573,33.574219,-4.780192,168.064587,-60.175886,1,0.2736,0.5337,0.1790,42.4315,0.019,90
158697,35.332031,-5.979157,172.286722,-59.931743,1,0.3830,0.4049,0.0394,41.7159,0.022,90
158731,347.846710,-64.760857,318.929827,-49.143596,1,0.0000,0.0000,0.0000,nan,0.019,65
158813,150.820312,3.134927,236.341348,43.230123,1,0.1966,0.1962,0.0134,39.9093,0.016,62
158904,2.097458,-45.783966,324.737840,-69.478613,1,1.8591,1.6874,0.1207,45.5041,0.011,88
159277,359.805206,-46.768478,327.135979,-67.829903,1,0.6452,0.5890,0.0356,42.6903,0.011,90
159316,151.699219,3.583322,236.533224,44.205648,1,0.0000,0.0000,0.0000,nan,0.016,65
159491,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,16
159665,33.574219,-4.780192,168.064587,-60.175886,1,0.3253,0.3336,0.0122,41.2221,0.019,90
159925,150.820312,3.134927,236.341348,43.230123,1,1.6095,1.6763,0.0691,45.4866,0.016,88
160048,51.855469,-28.630989,224.733260,-55.649872,1,0.3638,0.4409,0.3427,41.9350,0.009,90
160426,351.321442,-64.198746,317.458993,-50.429931,1,0.3511,0.3353,0.0637,41.2356,0.023,90
160527,2.097458,-45.783966,324.737840,-69.478613,1,0.3162,0.2266,0.6723,40.2609,0.011,90
160737,52.207031,-28.291550,224.208534,-55.300157,1,0.3593,0.5374,0.2042,42.4496,0.007,88
160921,349.160583,-64.760857,318.219706,-49.458924,1,0.0000,0.0000,0.0000,nan,0.020,65
161135,52.207031,-28.291550,224.208534,-55.300157,1,0.4293,0.4326,0.0209,41.8860,0.007,90
161411,150.468750,3.732834,235.392208,43.283244,1,0.0000,0.0000,0.0000,nan,0.020,65
161432,149.414062,2.238686,236.239766,41.565558,1,0.0000,0.0000,0.0000,nan,0.017,16
161521,346.130127,-63.072620,321.423103,-50.042305,1,0.1147,0.1640,0.9394,39.4785,0.020,90
161591,150.996094,4.181528,235.291975,43.970869,1,0.0761,0.1416,0.0294,39.1295,0.015,62
161877,150.468750,1.641510,237.714575,42.075234,1,0.0000,0.0000,0.0000,nan,0.017,65
161988,51.679688,-27.447618,222.618229,-55.642263,1,0.5641,0.5875,0.0180,42.6834,0.010,90
162093,51.855469,-28.630989,224.733260,-55.649872,1,0.1580,0.1822,0.0192,39.7311,0.009,62
162139,150.820312,3.134927,236.341348,43.230123,1,0.1036,0.0926,0.0114,38.1367,0.016,42
162152,348.529419,-61.755440,321.293980,-51.763351,1,0.0000,0.0000,0.0000,nan,0.016,16
162531,347.861847,-61.943836,321.519104,-51.424048,1,0.5290,0.4535,0.0438,42.0076,0.017,90
162538,150.996094,4.181528,235.291975,43.970869,1,0.2795,0.5154,0.2561,42.3402,0.015,90
162994,53.085938,-28.122234,224.100909,-54.509752,1,0.5827,0.5520,0.0825,42.5199,0.007,90
163208,33.398438,-4.331149,167.226341,-59.936551,1,0.1546,0.1227,0.0022,38.7928,0.018,42
163680,0.574468,-45.981140,327.041068,-68.778764,1,0.3407,0.3516,0.0201,41.3557,0.006,90
163894,52.207031,-28.630989,224.800211,-55.343637,1,0.3027,0.3162,0.0110,41.0874,0.009,88
164582,32.871094,-4.780192,166.959493,-60.615132,1,0.6341,0.6230,0.0125,42.8383,0.017,90
164805,51.679688,-27.447618,222.618229,-55.642263,1,0.0000,0.0000,0.0000,nan,0.010,92
165406,351.321442,-64.198746,317.458993,-50.429931,1,0.0000,0.0000,0.0000,nan,0.023,65
165494,349.615387,-63.636005,318.927246,-50.506542,1,0.2616,0.2665,0.0118,40.6604,0.018,90
165507,151.171875,1.342993,238.602520,42.464379,1,0.2323,0.2251,0.0187,40.2445,0.026,67
165821,149.414062,2.238686,236.239766,41.565558,1,0.2458,1.0072,0.4491,44.1195,0.017,42
165985,51.679688,-27.447618,222.618229,-55.642263,1,0.4551,0.4933,0.2305,42.2259,0.010,88
166103,0.589520,-47.161343,325.385896,-67.769893,1,0.2871,0.2079,0.4627,40.0507,0.009,90
166165,150.117188,2.836105,236.124718,42.483719,1,0.0000,0.0000,0.0000,nan,0.016,16
166186,51.679688,-27.447618,222.618229,-55.642263,1,0.5156,0.5329,0.0107,42.4278,0.010,42
166195,149.238281,3.882372,234.283829,42.351155,1,0.0000,0.0000,0.0000,nan,0.033,16
166330,149.414062,1.940072,236.565366,41.393323,1,0.0000,0.0000,0.0000,nan,0.018,65
166697,53.613281,-28.630989,225.073365,-54.119461,1,1.1664,1.1160,0.0368,44.3951,0.006,95
166727,34.453125,-5.229529,169.987075,-59.956185,1,0.2659,0.2946,0.0220,40.9098,0.019,62
166956,359.811707,-45.191612,329.485675,-69.150905,1,0.0000,0.0000,0.0000,nan,0.010,65
167123,149.414062,2.238686,236.239766,41.565558,1,0.0000,0.0000,0.0000,nan,0.017,65
167220,359.446716,-44.201530,331.730015,-69.805709,1,0.7136,0.7235,0.0416,43.2348,0.010,90
167260,2.071130,-45.191612,325.606223,-69.989264,1,0.3449,0.3554,0.0193,41.3826,0.011,62
167310,349.615387,-63.636005,318.927246,-50.506542,1,0.3079,0.3246,0.0181,41.1538,0.018,42
167417,349.046051,-61.943836,320.796530,-51.753706,1,0.5774,0.5558,0.0410,42.5377,0.017,90
167436,350.230255,-61.943836,320.053946,-52.070537,1,0.1918,0.2263,0.0138,40.2576,0.017,90
167488,348.586945,-64.573555,318.693903,-49.477869,1,0.3928,0.3939,0.0171,41.6449,0.018,90
167910,348.908447,-63.823658,319.169886,-50.176186,1,0.4761,0.4646,0.0434,42.0701,0.018,90
168146,51.855469,-26.276812,220.627031,-55.293792,1,0.0000,0.0000,0.0000,nan,0.014,65
168465,149.414062,1.940072,236.565366,41.393323,1,2.1492,2.4337,0.1988,46.4769,0.018,95
168659,53.613281,-27.953188,223.929533,-54.024772,1,0.2663,0.3135,0.2502,41.0656,0.007,67
168952,358.312500,-44.993881,332.185785,-68.685906,1,0.0000,0.0000,0.0000,nan,0.009,65
168957,53.085938,-27.784405,223.525509,-54.460748,1,0.0000,0.0000,0.0000,nan,0.007,65
168967,347.812500,-63.448284,320.128971,-50.202348,1,0.1845,0.2142,0.1608,40.1234,0.021,90
168989,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,16
169133,347.013428,-62.508568,321.472056,-50.735330,1,0.4656,0.4799,0.2303,42.1546,0.018,90
169203,347.861847,-61.943836,321.519104,-51.424048,1,0.1833,0.1791,0.1849,39.6900,0.017,90
169282,149.414062,3.433834,234.919132,42.245550,1,0.3181,0.3458,0.3165,41.3133,0.027,90


================================================
FILE: examples/docker/modin-ray/Dockerfile
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# Build image from this dockerfile like this:
# docker build -t modin-ray:latest .

FROM ubuntu:20.04

# Proxy settings
ENV http_proxy=${http_proxy}
ENV https_proxy=${https_proxy}
ENV no_proxy=${no_proxy}

RUN apt-get update --yes \
    && apt-get install wget --yes \
    && rm -rf /var/lib/apt/lists/*

ENV USER modin
ENV UID 1000
ENV HOME /home/$USER

RUN adduser --disabled-password \
    --gecos "Non-root user" \
    --uid $UID \
    --home $HOME \
    $USER

# Conda settings
ENV CONDA_DIR=${HOME}/miniconda
ENV CONDA_ENV_NAME=modin-ray
ENV PATH="${CONDA_DIR}/bin:${PATH}"

RUN wget -nv https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda3.sh \
    && bash /tmp/miniconda3.sh -b -p "${CONDA_DIR}" -f -u \
    && "${CONDA_DIR}/bin/conda" init bash \
    && rm -f /tmp/miniconda3.sh

RUN conda update -n base -c defaults conda -y \
    && conda create -n ${CONDA_ENV_NAME} --yes -c conda-forge --strict-channel-priority \
        modin-ray \
        ray-dashboard \
        scikit-learn \
        scikit-learn-intelex \
        xgboost \
    && conda clean --all --yes

# Activate ${CONDA_ENV_NAME} for interactive shells
RUN echo "source ${CONDA_DIR}/bin/activate ${CONDA_ENV_NAME}" >> "${HOME}/.bashrc"
# Activate ${CONDA_ENV_NAME} for non-interactive shells
# The following line comments out line that prevents ~/.bashrc execution in
# non-interactive mode.
RUN sed -e 's,\(^[[:space:]]\+[*]) return;;$\),# \1,' -i "${HOME}/.bashrc"
ENV BASH_ENV="${HOME}/.bashrc"

# Set up benchmark scripts
COPY nyc-taxi.py "${HOME}"
COPY census.py "${HOME}"
COPY plasticc.py "${HOME}"
RUN mkdir /dataset
WORKDIR ${HOME}

# Clean up proxy settings to publish on Docker Hub
ENV http_proxy=
ENV https_proxy=
ENV no_proxy=

# Set entrypoint with arguments expansion
ENTRYPOINT ["/bin/bash", "-c", "exec $0 $*"]


================================================
FILE: examples/docker/modin-ray/build-docker-image.sh
================================================
#!/bin/bash -e

# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

cd "`dirname \"$0\"`"

docker build -t modin-ray .

echo -e '\nNYC TAXI BENCHMARK
User is responsible for preparing the dataset.
It Can be generated by following the instructions on the link:
https://github.com/toddwschneider/nyc-taxi-data#instructions
To run the benchmark execute:
\tdocker run --rm -v /path/to/dataset:/dataset modin-ray python nyc-taxi.py <name of file starting with /dataset>

CENSUS BENCHMARK
User is responsible for preparing the dataset.
It can be downloaded from the following link:
https://rapidsai-data.s3.us-east-2.amazonaws.com/datasets/ipums_education2income_1970-2010.csv.gz
To run the benchmark execute:
\tdocker run --rm -v /path/to/dataset:/dataset modin-ray python census.py <name of file starting with /dataset>

PLASTICC BENCHMARK
User is responsible for preparing the datasets.
The datasets must include four files: training set, test set,
training set metadata and test set metadata.
To run the benchmark execute:
\tdocker run --rm -v /path/to/dataset:/dataset modin-ray python plasticc.py <training set file name starting with /dataset> <test set file name starting with /dataset> <training set metadata file name starting with /dataset> <test set metadata file name starting with /dataset>\n'


================================================
FILE: examples/docker/modin-ray/census.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import sys
import time

import sklearnex
from sklearn import config_context

import modin.pandas as pd

sklearnex.patch_sklearn()
import numpy as np
import sklearn.linear_model as lm
from sklearn.model_selection import train_test_split


def read(filename):
    columns_names = [
        "YEAR0",
        "DATANUM",
        "SERIAL",
        "CBSERIAL",
        "HHWT",
        "CPI99",
        "GQ",
        "QGQ",
        "PERNUM",
        "PERWT",
        "SEX",
        "AGE",
        "EDUC",
        "EDUCD",
        "INCTOT",
        "SEX_HEAD",
        "SEX_MOM",
        "SEX_POP",
        "SEX_SP",
        "SEX_MOM2",
        "SEX_POP2",
        "AGE_HEAD",
        "AGE_MOM",
        "AGE_POP",
        "AGE_SP",
        "AGE_MOM2",
        "AGE_POP2",
        "EDUC_HEAD",
        "EDUC_MOM",
        "EDUC_POP",
        "EDUC_SP",
        "EDUC_MOM2",
        "EDUC_POP2",
        "EDUCD_HEAD",
        "EDUCD_MOM",
        "EDUCD_POP",
        "EDUCD_SP",
        "EDUCD_MOM2",
        "EDUCD_POP2",
        "INCTOT_HEAD",
        "INCTOT_MOM",
        "INCTOT_POP",
        "INCTOT_SP",
        "INCTOT_MOM2",
        "INCTOT_POP2",
    ]
    columns_types = [
        "int64",
        "int64",
        "int64",
        "float64",
        "int64",
        "float64",
        "int64",
        "float64",
        "int64",
        "int64",
        "int64",
        "int64",
        "int64",
        "int64",
        "int64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
        "float64",
    ]
    dtypes = {columns_names[i]: columns_types[i] for i in range(len(columns_names))}

    df = pd.read_csv(
        filename,
        names=columns_names,
        dtype=dtypes,
        skiprows=1,
    )

    return df


def etl(df):
    keep_cols = [
        "YEAR0",
        "DATANUM",
        "SERIAL",
        "CBSERIAL",
        "HHWT",
        "CPI99",
        "GQ",
        "PERNUM",
        "SEX",
        "AGE",
        "INCTOT",
        "EDUC",
        "EDUCD",
        "EDUC_HEAD",
        "EDUC_POP",
        "EDUC_MOM",
        "EDUCD_MOM2",
        "EDUCD_POP2",
        "INCTOT_MOM",
        "INCTOT_POP",
        "INCTOT_MOM2",
        "INCTOT_POP2",
        "INCTOT_HEAD",
        "SEX_HEAD",
    ]
    df = df[keep_cols]

    df = df[df["INCTOT"] != 9999999]
    df = df[df["EDUC"] != -1]
    df = df[df["EDUCD"] != -1]

    df["INCTOT"] = df["INCTOT"] * df["CPI99"]

    for column in keep_cols:
        df[column] = df[column].fillna(-1)

        df[column] = df[column].astype("float64")

    y = df["EDUC"]
    X = df.drop(columns=["EDUC", "CPI99"])

    return (df, X, y)


def mse(y_test, y_pred):
    return ((y_test - y_pred) ** 2).mean()


def cod(y_test, y_pred):
    y_bar = y_test.mean()
    total = ((y_test - y_bar) ** 2).sum()
    residuals = ((y_test - y_pred) ** 2).sum()
    return 1 - (residuals / total)


def ml(X, y, random_state, n_runs, test_size):
    clf = lm.Ridge()

    X = np.ascontiguousarray(X, dtype=np.float64)
    y = np.ascontiguousarray(y, dtype=np.float64)

    mse_values, cod_values = [], []
    ml_scores = {}

    print("ML runs: ", n_runs)
    for i in range(n_runs):
        (X_train, X_test, y_train, y_test) = train_test_split(
            X, y, test_size=test_size, random_state=random_state
        )
        random_state += 777

        with config_context(assume_finite=True):
            model = clf.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        mse_values.append(mse(y_test, y_pred))
        cod_values.append(cod(y_test, y_pred))

    ml_scores["mse_mean"] = sum(mse_values) / len(mse_values)
    ml_scores["cod_mean"] = sum(cod_values) / len(cod_values)
    ml_scores["mse_dev"] = pow(
        sum([(mse_value - ml_scores["mse_mean"]) ** 2 for mse_value in mse_values])
        / (len(mse_values) - 1),
        0.5,
    )
    ml_scores["cod_dev"] = pow(
        sum([(cod_value - ml_scores["cod_mean"]) ** 2 for cod_value in cod_values])
        / (len(cod_values) - 1),
        0.5,
    )

    return ml_scores


def measure(name, func, *args, **kw):
    t0 = time.time()
    res = func(*args, **kw)
    t1 = time.time()
    print(f"{name}: {t1 - t0} sec")
    return res


def main():
    if len(sys.argv) != 2:
        print(
            f"USAGE: docker run --rm -v /path/to/dataset:/dataset python census.py <data file name starting with /dataset>"
        )
        return
    # ML specific
    N_RUNS = 50
    TEST_SIZE = 0.1
    RANDOM_STATE = 777

    df = measure("Reading", read, sys.argv[1])
    _, X, y = measure("ETL", etl, df)
    measure(
        "ML", ml, X, y, random_state=RANDOM_STATE, n_runs=N_RUNS, test_size=TEST_SIZE
    )


if __name__ == "__main__":
    main()


================================================
FILE: examples/docker/modin-ray/nyc-taxi.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import sys
import time

import modin.pandas as pd


def read(filename):
    columns_names = [
        "trip_id",
        "vendor_id",
        "pickup_datetime",
        "dropoff_datetime",
        "store_and_fwd_flag",
        "rate_code_id",
        "pickup_longitude",
        "pickup_latitude",
        "dropoff_longitude",
        "dropoff_latitude",
        "passenger_count",
        "trip_distance",
        "fare_amount",
        "extra",
        "mta_tax",
        "tip_amount",
        "tolls_amount",
        "ehail_fee",
        "improvement_surcharge",
        "total_amount",
        "payment_type",
        "trip_type",
        "pickup",
        "dropoff",
        "cab_type",
        "precipitation",
        "snow_depth",
        "snowfall",
        "max_temperature",
        "min_temperature",
        "average_wind_speed",
        "pickup_nyct2010_gid",
        "pickup_ctlabel",
        "pickup_borocode",
        "pickup_boroname",
        "pickup_ct2010",
        "pickup_boroct2010",
        "pickup_cdeligibil",
        "pickup_ntacode",
        "pickup_ntaname",
        "pickup_puma",
        "dropoff_nyct2010_gid",
        "dropoff_ctlabel",
        "dropoff_borocode",
        "dropoff_boroname",
        "dropoff_ct2010",
        "dropoff_boroct2010",
        "dropoff_cdeligibil",
        "dropoff_ntacode",
        "dropoff_ntaname",
        "dropoff_puma",
    ]
    parse_dates = ["pickup_datetime", "dropoff_datetime"]
    return pd.read_csv(
        filename, names=columns_names, header=None, parse_dates=parse_dates
    )


def q1(df):
    return df.groupby("cab_type")["cab_type"].count()


def q2(df):
    return df.groupby("passenger_count", as_index=False).mean()[
        ["passenger_count", "total_amount"]
    ]


def q3(df):
    transformed = pd.DataFrame(
        {
            "pickup_datetime": df["pickup_datetime"].dt.year,
            "passenger_count": df["passenger_count"],
        }
    )
    return transformed.groupby(
        ["pickup_datetime", "passenger_count"], as_index=False
    ).size()


def q4(df):
    transformed = pd.DataFrame(
        {
            "passenger_count": df["passenger_count"],
            "pickup_datetime": df["pickup_datetime"].dt.year,
            "trip_distance": df["trip_distance"].astype("int64"),
        }
    )
    return (
        transformed.groupby(
            ["passenger_count", "pickup_datetime", "trip_distance"], as_index=False
        )
        .size()
        .sort_values(by=["pickup_datetime", "size"], ascending=[True, False])
    )


def measure(name, func, *args, **kw):
    t0 = time.time()
    res = func(*args, **kw)
    t1 = time.time()
    print(f"{name}: {t1 - t0} sec")
    return res


def main():
    if len(sys.argv) != 2:
        print(
            f"USAGE: docker run --rm -v /path/to/dataset:/dataset python nyc-taxi.py <data file name starting with /dataset>"
        )
        return
    df = measure("Reading", read, sys.argv[1])
    measure("Q1", q1, df)
    measure("Q2", q2, df)
    measure("Q3", q3, df)
    measure("Q4", q4, df)


if __name__ == "__main__":
    main()


================================================
FILE: examples/docker/modin-ray/plasticc.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import sys
import time
from functools import partial

import numpy as np
import sklearnex
import xgboost as xgb

import modin.pandas as pd

sklearnex.patch_sklearn()
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


################ helper functions ###############################
def create_dtypes():
    dtypes = dict(
        [
            ("object_id", "int32"),
            ("mjd", "float32"),
            ("passband", "int32"),
            ("flux", "float32"),
            ("flux_err", "float32"),
            ("detected", "int32"),
        ]
    )

    # load metadata
    columns_names = [
        "object_id",
        "ra",
        "decl",
        "gal_l",
        "gal_b",
        "ddf",
        "hostgal_specz",
        "hostgal_photoz",
        "hostgal_photoz_err",
        "distmod",
        "mwebv",
        "target",
    ]
    meta_dtypes = ["int32"] + ["float32"] * 4 + ["int32"] + ["float32"] * 5 + ["int32"]
    meta_dtypes = dict(
        [(columns_names[i], meta_dtypes[i]) for i in range(len(meta_dtypes))]
    )
    return dtypes, meta_dtypes


def ravel_column_names(cols):
    d0 = cols.get_level_values(0)
    d1 = cols.get_level_values(1)
    return ["%s_%s" % (i, j) for i, j in zip(d0, d1)]


def measure(name, func, *args, **kw):
    t0 = time.time()
    res = func(*args, **kw)
    t1 = time.time()
    print(f"{name}: {t1 - t0} sec")
    return res


def all_etl(train, train_meta, test, test_meta):
    train_final = etl(train, train_meta)
    test_final = etl(test, test_meta)
    return (train_final, test_final)


def split_step(train_final, test_final):
    X = train_final.drop(["object_id", "target"], axis=1).values
    Xt = test_final.drop(["object_id"], axis=1).values

    y = train_final["target"]
    assert X.shape[1] == Xt.shape[1]
    classes = sorted(y.unique())

    class_weights = {c: 1 for c in classes}
    class_weights.update({c: 2 for c in [64, 15]})

    lbl = LabelEncoder()
    y = lbl.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.1, stratify=y, random_state=126
    )

    return X_train, y_train, X_test, y_test, Xt, classes, class_weights


def multi_weighted_logloss(y_true, y_preds, classes, class_weights):
    """
    refactor from
    @author olivier https://www.kaggle.com/ogrellier
    multi logloss for PLAsTiCC challenge
    """
    y_p = y_preds.reshape(y_true.shape[0], len(classes), order="F")
    y_ohe = pd.get_dummies(y_true)
    y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)
    y_p_log = np.log(y_p)
    y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)
    nb_pos = y_ohe.sum(axis=0).values.astype(float)
    class_arr = np.array([class_weights[k] for k in sorted(class_weights.keys())])
    y_w = y_log_ones * class_arr / nb_pos

    loss = -np.sum(y_w) / np.sum(class_arr)
    return loss


def xgb_multi_weighted_logloss(y_predicted, y_true, classes, class_weights):
    loss = multi_weighted_logloss(
        y_true.get_label(), y_predicted, classes, class_weights
    )
    return "wloss", loss


################ helper functions ###############################


def read(
    training_set_filename,
    test_set_filename,
    training_set_metadata_filename,
    test_set_metadata_filename,
    dtypes,
    meta_dtypes,
):
    train = pd.read_csv(training_set_filename, dtype=dtypes)
    test = pd.read_csv(
        test_set_filename,
        names=list(dtypes.keys()),
        dtype=dtypes,
        header=0,
    )

    train_meta = pd.read_csv(training_set_metadata_filename, dtype=meta_dtypes)
    target = meta_dtypes.pop("target")
    test_meta = pd.read_csv(test_set_metadata_filename, dtype=meta_dtypes)
    meta_dtypes["target"] = target

    dfs = (train, train_meta, test, test_meta)
    return dfs


def etl(df, df_meta):
    # workaround for Modin_on_ray. Eventually this should be fixed
    df["flux_ratio_sq"] = (df["flux"] / df["flux_err"]) * (
        df["flux"] / df["flux_err"]
    )  # np.power(df["flux"] / df["flux_err"], 2.0)
    df["flux_by_flux_ratio_sq"] = df["flux"] * df["flux_ratio_sq"]

    aggs = {
        "passband": ["mean"],
        "flux": ["min", "max", "mean", "skew"],
        "flux_err": ["min", "max", "mean"],
        "detected": ["mean"],
        "mjd": ["max", "min"],
        "flux_ratio_sq": ["sum"],
        "flux_by_flux_ratio_sq": ["sum"],
    }
    agg_df = df.groupby("object_id", sort=False).agg(aggs)

    agg_df.columns = ravel_column_names(agg_df.columns)

    agg_df["flux_diff"] = agg_df["flux_max"] - agg_df["flux_min"]
    agg_df["flux_dif2"] = agg_df["flux_diff"] / agg_df["flux_mean"]
    agg_df["flux_w_mean"] = (
        agg_df["flux_by_flux_ratio_sq_sum"] / agg_df["flux_ratio_sq_sum"]
    )
    agg_df["flux_dif3"] = agg_df["flux_diff"] / agg_df["flux_w_mean"]
    agg_df["mjd_diff"] = agg_df["mjd_max"] - agg_df["mjd_min"]

    agg_df = agg_df.drop(["mjd_max", "mjd_min"], axis=1)

    agg_df = agg_df.reset_index()

    df_meta = df_meta.drop(["ra", "decl", "gal_l", "gal_b"], axis=1)

    df_meta = df_meta.merge(agg_df, on="object_id", how="left")

    return df_meta


def ml(train_final, test_final):
    X_train, y_train, X_test, y_test, Xt, classes, class_weights = split_step(
        train_final, test_final
    )

    cpu_params = {
        "objective": "multi:softprob",
        "eval_metric": "merror",
        "tree_method": "hist",
        "nthread": 16,
        "num_class": 14,
        "max_depth": 7,
        "verbosity": 1,
        "subsample": 0.7,
        "colsample_bytree": 0.7,
    }

    func_loss = partial(
        xgb_multi_weighted_logloss, classes=classes, class_weights=class_weights
    )

    dtrain = xgb.DMatrix(data=X_train, label=y_train)
    dvalid = xgb.DMatrix(data=X_test, label=y_test)
    dtest = xgb.DMatrix(data=Xt)

    watchlist = [(dvalid, "eval"), (dtrain, "train")]

    clf = xgb.train(
        cpu_params,
        dtrain=dtrain,
        num_boost_round=60,
        evals=watchlist,
        feval=func_loss,
        early_stopping_rounds=10,
        verbose_eval=None,
    )

    yp = clf.predict(dvalid)
    cpu_loss = multi_weighted_logloss(y_test, yp, classes, class_weights)
    ysub = clf.predict(dtest)  # noqa: F841 (unused variable)

    return cpu_loss


def main():
    if len(sys.argv) != 5:
        print(
            f"USAGE: docker run --rm -v /path/to/dataset:/dataset python plasticc.py <training set file name startin with /dataset> <test set file name starting with /dataset> <training set metadata file name starting with /dataset> <test set metadata file name starting with /dataset>"
        )
        return

    dtypes, meta_dtypes = create_dtypes()

    train, train_meta, test, test_meta = measure(
        "Reading",
        read,
        sys.argv[1],
        sys.argv[2],
        sys.argv[3],
        sys.argv[4],
        dtypes,
        meta_dtypes,
    )
    train_final, test_final = measure(
        "ETL", all_etl, train, train_meta, test, test_meta
    )
    cpu_loss = measure("ML", ml, train_final, test_final)

    print("validation cpu_loss:", cpu_loss)


if __name__ == "__main__":
    main()


================================================
FILE: examples/jupyter/Modin_Taxi.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cc4bd9e9",
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "outputs": [],
   "source": [
    "# To run this notebook as done in the README GIFs, you must first locally download the 2015 NYC Taxi Trip Data.\n",
    "import urllib.request\n",
    "url_path = \"https://modin-datasets.intel.com/green-taxi/green_tripdata_2015-01.csv\"\n",
    "urllib.request.urlretrieve(url_path, \"taxi.csv\")\n",
    "\n",
    "from modin.config import Engine\n",
    "Engine.put(\"dask\")\n",
    "from dask.distributed import Client\n",
    "client = Client(n_workers=12)\n",
    "\n",
    "from modin.config import BenchmarkMode\n",
    "BenchmarkMode.put(True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "97b245e5",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "import modin.pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b65b121c",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.57 s, sys: 683 ms, total: 2.26 s\n",
      "Wall time: 14.2 s\n"
     ]
    }
   ],
   "source": [
    "%time df = pd.read_csv(\"taxi.csv\", parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c48193b2",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 138 ms, sys: 27.3 ms, total: 166 ms\n",
      "Wall time: 404 ms\n"
     ]
    }
   ],
   "source": [
    "%time isnull = df.isnull()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "1d32ed7c",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 175 ms, sys: 28.4 ms, total: 203 ms\n",
      "Wall time: 663 ms\n"
     ]
    }
   ],
   "source": [
    "%time rounded_trip_distance = df[[\"pickup_longitude\"]].applymap(round)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ef271dc",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: examples/jupyter/Pandas_Taxi.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5d674ce8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# To run this notebook as done in the README GIFs, you must first locally download the 2015 NYC Taxi Trip Data.\n",
    "import urllib.request\n",
    "url_path = \"https://modin-datasets.intel.com/green-taxi/green_tripdata_2015-01.csv\"\n",
    "urllib.request.urlretrieve(url_path, \"taxi.csv\")\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "27f7321c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8de98215",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 30.7 s, sys: 4.25 s, total: 35 s\n",
      "Wall time: 35.3 s\n"
     ]
    }
   ],
   "source": [
    "%time df = pd.read_csv(\"taxi.csv\", parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "14422c3f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.37 s, sys: 300 ms, total: 1.67 s\n",
      "Wall time: 1.67 s\n"
     ]
    }
   ],
   "source": [
    "%time isnull = df.isnull()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f8f87974",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.07 s, sys: 305 ms, total: 3.37 s\n",
      "Wall time: 3.37 s\n"
     ]
    }
   ],
   "source": [
    "%time rounded_trip_distance = df[[\"pickup_longitude\"]].applymap(round)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c7d62bf",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: examples/jupyter/integrations/NLTK.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating NLTK Modin Interoperability"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## All the examples in this section are taken / adapted from https://www.kirenz.com/post/2021-12-11-text-mining-and-sentiment-analysis-with-nltk-and-pandas-in-python/text-mining-and-sentiment-analysis-with-nltk-and-pandas-in-python/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import nltk\n",
    "from nltk.tokenize import RegexpTokenizer\n",
    "from nltk.corpus import stopwords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import some Tweets from Barack Obama \n",
    "modin_df = pd.read_csv(\"https://raw.githubusercontent.com/kirenz/twitter-tweepy/main/tweets-obama.csv\")\n",
    "modin_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df['text'] = modin_df['text'].astype(str).str.lower()\n",
    "modin_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regexp = RegexpTokenizer('\\w+')\n",
    "\n",
    "modin_df['text_token']=modin_df['text'].apply(regexp.tokenize)\n",
    "modin_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "nltk.download('stopwords')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make a list of english stopwords\n",
    "stopwords = nltk.corpus.stopwords.words(\"english\")\n",
    "\n",
    "# Extend the list with your own custom stopwords\n",
    "my_stopwords = ['https']\n",
    "stopwords.extend(my_stopwords)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Remove stopwords\n",
    "modin_df['text_token'] = modin_df['text_token'].apply(lambda x: [item for item in x if item not in stopwords])\n",
    "modin_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df['text_string'] = modin_df['text_token'].apply(lambda x: ' '.join([item for item in x if len(item)>2]))\n",
    "modin_df[['text', 'text_token', 'text_string']].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "nltk.download('punkt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_words = ' '.join([word for word in modin_df['text_string']])\n",
    "tokenized_words = nltk.tokenize.word_tokenize(all_words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.probability import FreqDist\n",
    "\n",
    "fdist = FreqDist(tokenized_words)\n",
    "fdist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df['text_string_fdist'] = modin_df['text_token'].apply(lambda x: ' '.join([item for item in x if fdist[item] >= 1 ]))\n",
    "modin_df[['text', 'text_token', 'text_string', 'text_string_fdist']].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#lemmatization\n",
    "nltk.download('wordnet')\n",
    "nltk.download('omw-1.4')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.stem import WordNetLemmatizer\n",
    "\n",
    "wordnet_lem = WordNetLemmatizer()\n",
    "\n",
    "modin_df['text_string_lem'] = modin_df['text_string_fdist'].apply(wordnet_lem.lemmatize)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# check if the columns are equal\n",
    "modin_df['is_equal']= (modin_df['text_string_fdist']==modin_df['text_string_lem'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# show level count\n",
    "modin_df.is_equal.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_words_lem = ' '.join([word for word in modin_df['text_string_lem']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "from wordcloud import WordCloud\n",
    "\n",
    "wordcloud = WordCloud(width=600, \n",
    "                     height=400, \n",
    "                     random_state=2, \n",
    "                     max_font_size=100).generate(all_words_lem)\n",
    "\n",
    "plt.figure(figsize=(10, 7))\n",
    "plt.imshow(wordcloud, interpolation='bilinear')\n",
    "plt.axis('off');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Replicating NLTK workflow with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import some Tweets from Barack Obama as pandas df\n",
    "pandas_df = pandas.read_csv(\"https://raw.githubusercontent.com/kirenz/twitter-tweepy/main/tweets-obama.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df['text'] = pandas_df['text'].astype(str).str.lower()\n",
    "pandas_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regexp = RegexpTokenizer('\\w+')\n",
    "\n",
    "pandas_df['text_token']=pandas_df['text'].apply(regexp.tokenize)\n",
    "pandas_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Remove stopwords\n",
    "pandas_df['text_token'] = pandas_df['text_token'].apply(lambda x: [item for item in x if item not in stopwords])\n",
    "pandas_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df['text_string'] = pandas_df['text_token'].apply(lambda x: ' '.join([item for item in x if len(item)>2]))\n",
    "pandas_df[['text', 'text_token', 'text_string']].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_words = ' '.join([word for word in pandas_df['text_string']])\n",
    "tokenized_words = nltk.tokenize.word_tokenize(all_words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.probability import FreqDist\n",
    "\n",
    "fdist = FreqDist(tokenized_words)\n",
    "fdist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df['text_string_fdist'] = pandas_df['text_token'].apply(lambda x: ' '.join([item for item in x if fdist[item] >= 1 ]))\n",
    "pandas_df[['text', 'text_token', 'text_string', 'text_string_fdist']].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.stem import WordNetLemmatizer\n",
    "\n",
    "wordnet_lem = WordNetLemmatizer()\n",
    "\n",
    "pandas_df['text_string_lem'] = pandas_df['text_string_fdist'].apply(wordnet_lem.lemmatize)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# check if the columns are equal\n",
    "pandas_df['is_equal']= (pandas_df['text_string_fdist']==pandas_df['text_string_lem'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# show level count\n",
    "pandas_df.is_equal.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_words_lem = ' '.join([word for word in pandas_df['text_string_lem']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "from wordcloud import WordCloud\n",
    "\n",
    "wordcloud = WordCloud(width=600, \n",
    "                     height=400, \n",
    "                     random_state=2, \n",
    "                     max_font_size=100).generate(all_words_lem)\n",
    "\n",
    "plt.figure(figsize=(10, 7))\n",
    "plt.imshow(wordcloud, interpolation='bilinear')\n",
    "plt.axis('off');"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/altair.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating Altair Modin Interoperability\n",
    "### Currently Altair is not interoperable with Modin. Each visualization is created with a Modin and then pandas dataframe for comparison."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import altair as alt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from vega_datasets import data\n",
    "pandas_cars = data.cars()\n",
    "modin_cars = pd.DataFrame(data.cars())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "alt.Chart(modin_cars).mark_point().encode(\n",
    "    x='Horsepower',\n",
    "    y='Miles_per_Gallon',\n",
    "    color='Origin',\n",
    ").interactive()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "alt.Chart(pandas_cars).mark_point().encode(\n",
    "    x='Horsepower',\n",
    "    y='Miles_per_Gallon',\n",
    "    color='Origin',\n",
    ").interactive()"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/bokeh.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating Bokeh Modin Interoperability\n",
    "### Currently Boken is not interoperable with Modin. Each visualization is created with a Modin and then pandas dataframe for comparison."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "from bokeh.plotting import figure\n",
    "from bokeh.models import ColumnDataSource\n",
    "from bokeh.io import show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "UserWarning: `from_dict` is not currently supported by PandasOnRay, defaulting to pandas implementation.\n",
      "Please refer to https://modin.readthedocs.io/en/stable/supported_apis/defaulting_to_pandas.html for explanation.\n",
      "2023-04-06 12:14:58,510\tINFO worker.py:1544 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
      "UserWarning: When using a pre-initialized Ray cluster, please ensure that the runtime env sets environment variable __MODIN_AUTOIMPORT_PANDAS__ to 1\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "expected a dict or pandas.DataFrame, got    x_values  y_values\n0         1         6\n1         2         7\n2         3         2\n3         4         3\n4         5         6",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m/var/folders/qj/jybppsbd2jl75s8y2q8s2xx80000gn/T/ipykernel_5953/1336630338.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m# create a ColumnDataSource by passing the dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0msource\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mColumnDataSource\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodin_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/bokeh/models/sources.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    229\u001b[0m                 \u001b[0mraw_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data_from_groupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraw_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    230\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 231\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"expected a dict or pandas.DataFrame, got {raw_data}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    232\u001b[0m         \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    233\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraw_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: expected a dict or pandas.DataFrame, got    x_values  y_values\n0         1         6\n1         2         7\n2         3         2\n3         4         3\n4         5         6"
     ]
    }
   ],
   "source": [
    "# Create a visualization with Modin df \n",
    "modin_data = pd.DataFrame.from_dict({'x_values': [1, 2, 3, 4, 5], 'y_values': [6, 7, 2, 3, 6]})\n",
    "\n",
    "# create a ColumnDataSource by passing the dict\n",
    "source = ColumnDataSource(modin_data)\n",
    "\n",
    "p = figure()\n",
    "p.circle(x='x_values', y='y_values', source=source)\n",
    "show(p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df \n",
    "pandas_data = pandas.DataFrame.from_dict({'x_values': [1, 2, 3, 4, 5], 'y_values': [6, 7, 2, 3, 6]})\n",
    "\n",
    "# create a ColumnDataSource by passing the dict\n",
    "source = ColumnDataSource(pandas_data)\n",
    "\n",
    "p = figure()\n",
    "p.circle(x='x_values', y='y_values', source=source)\n",
    "show(p)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/huggingface.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating Hugging Face Modin Interoperability\n",
    "## All the examples in this section are taken/ adapted from https://www.kaggle.com/code/satyampd/imdb-sentiment-analysis-using-bert-w-huggingface/notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import numpy as np # linear algebra"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import sklearn\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import urllib.request\n",
    "url_path = \"https://modin-datasets.intel.com/testing/IMDB_Dataset.csv\"\n",
    "urllib.request.urlretrieve(url_path, \"imdb.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "modin_df = pd.read_csv(\"imdb.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(modin_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df.sample()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import BertTokenizer, TFBertForSequenceClassification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loading the BERT Classifier and Tokenizer along with Input module\n",
    "from transformers import InputExample, InputFeatures\n",
    "\n",
    "model = TFBertForSequenceClassification.from_pretrained(\"bert-base-uncased\")\n",
    "tokenizer = BertTokenizer.from_pretrained(\"bert-base-uncased\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# changing positive and negative into numeric values\n",
    "\n",
    "def cat2num(value):\n",
    "    if value=='positive': \n",
    "        return 1\n",
    "    else: \n",
    "        return 0\n",
    "    \n",
    "modin_df['sentiment']  =  modin_df['sentiment'].apply(cat2num)\n",
    "train = modin_df[:45000]\n",
    "test = modin_df[45000:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# But first see BERT tokenizer exmaples and other required stuff!\n",
    "\n",
    "example='In this Kaggle notebook, I will do sentiment analysis using BERT with Huggingface'\n",
    "tokens=tokenizer.tokenize(example)\n",
    "token_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
    "print(tokens)\n",
    "print(token_ids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_data_to_examples(train, test, review, sentiment): \n",
    "    train_InputExamples = train.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case\n",
    "                                                          text_a = x[review], \n",
    "                                                          label = x[sentiment]), axis = 1)\n",
    "\n",
    "    validation_InputExamples = test.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case\n",
    "                                                          text_a = x[review], \n",
    "                                                          label = x[sentiment]), axis = 1,)\n",
    "  \n",
    "    return train_InputExamples, validation_InputExamples\n",
    "\n",
    "train_InputExamples, validation_InputExamples = convert_data_to_examples(train,  test, 'review',  'sentiment')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_examples_to_tf_dataset(examples, tokenizer, max_length=128):\n",
    "    features = [] # -> will hold InputFeatures to be converted later\n",
    "\n",
    "    for e in tqdm(examples):\n",
    "        input_dict = tokenizer.encode_plus(\n",
    "            e.text_a,\n",
    "            add_special_tokens=True,    # Add 'CLS' and 'SEP'\n",
    "            max_length=max_length,    # truncates if len(s) > max_length\n",
    "            return_token_type_ids=True,\n",
    "            return_attention_mask=True,\n",
    "            pad_to_max_length=True, # pads to the right by default # CHECK THIS for pad_to_max_length\n",
    "            truncation=True\n",
    "        )\n",
    "\n",
    "        input_ids, token_type_ids, attention_mask = (input_dict[\"input_ids\"],input_dict[\"token_type_ids\"], input_dict['attention_mask'])\n",
    "        features.append(InputFeatures( input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label) )\n",
    "\n",
    "    def gen():\n",
    "        for f in features:\n",
    "            yield (\n",
    "                {\n",
    "                    \"input_ids\": f.input_ids,\n",
    "                    \"attention_mask\": f.attention_mask,\n",
    "                    \"token_type_ids\": f.token_type_ids,\n",
    "                },\n",
    "                f.label,\n",
    "            )\n",
    "\n",
    "    return tf.data.Dataset.from_generator(\n",
    "        gen,\n",
    "        ({\"input_ids\": tf.int32, \"attention_mask\": tf.int32, \"token_type_ids\": tf.int32}, tf.int64),\n",
    "        (\n",
    "            {\n",
    "                \"input_ids\": tf.TensorShape([None]),\n",
    "                \"attention_mask\": tf.TensorShape([None]),\n",
    "                \"token_type_ids\": tf.TensorShape([None]),\n",
    "            },\n",
    "            tf.TensorShape([]),\n",
    "        ),\n",
    "    )\n",
    "\n",
    "\n",
    "DATA_COLUMN = 'review'\n",
    "LABEL_COLUMN = 'sentiment'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_InputExamples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data = convert_examples_to_tf_dataset(list(train_InputExamples), tokenizer)\n",
    "train_data = train_data.shuffle(100).batch(32).repeat(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "validation_data = convert_examples_to_tf_dataset(list(validation_InputExamples), tokenizer)\n",
    "validation_data = validation_data.batch(32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0), \n",
    "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), \n",
    "              metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(train_data, epochs=2, validation_data=validation_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pred_sentences = ['worst movie of my life, will never watch movies from this series', \n",
    "                  'Wow, blew my mind, what a movie by Marvel, animation and story is amazing']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf_batch = tokenizer(pred_sentences, max_length=128, padding=True, truncation=True, return_tensors='tf')   # we are tokenizing before sending into our trained model\n",
    "tf_outputs = model(tf_batch)                                  \n",
    "tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)       # axis=-1, this means that the index that will be returned by argmax will be taken from the *last* axis.\n",
    "labels = ['Negative','Positive']\n",
    "label = tf.argmax(tf_predictions, axis=1)\n",
    "label = label.numpy()\n",
    "for i in range(len(pred_sentences)):\n",
    "    print(pred_sentences[i], \": \", labels[label[i]])"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/matplotlib.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating Matplotlib Modin Interoperability"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "# Example modified from https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/xcorr_acorr_demo.html#sphx-glr-gallery-lines-bars-and-markers-xcorr-acorr-demo-py\n",
    "\n",
    "# Fixing random state for reproducibility\n",
    "np.random.seed(19680801)\n",
    "\n",
    "x = pd.DataFrame(np.random.randn(100, 1),columns=[\"Col_1\"])\n",
    "y = pd.DataFrame(np.random.randn(100, 1),columns=[\"Col_1\"])\n",
    "\n",
    "fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True)\n",
    "ax1.xcorr(x[\"Col_1\"], y[\"Col_1\"], usevlines=True, maxlags=50, normed=True, lw=2)\n",
    "ax1.grid(True)\n",
    "\n",
    "ax2.acorr(x[\"Col_1\"], usevlines=True, normed=True, maxlags=50, lw=2)\n",
    "ax2.grid(True)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "# Example modified from https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/xcorr_acorr_demo.html#sphx-glr-gallery-lines-bars-and-markers-xcorr-acorr-demo-py\n",
    "\n",
    "# Fixing random state for reproducibility\n",
    "np.random.seed(19680801)\n",
    "\n",
    "x = pandas.DataFrame(np.random.randn(100, 1),columns=[\"Col_1\"])\n",
    "y = pandas.DataFrame(np.random.randn(100, 1),columns=[\"Col_1\"])\n",
    "\n",
    "fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True)\n",
    "ax1.xcorr(x[\"Col_1\"], y[\"Col_1\"], usevlines=True, maxlags=50, normed=True, lw=2)\n",
    "ax1.grid(True)\n",
    "\n",
    "ax2.acorr(x[\"Col_1\"], usevlines=True, normed=True, maxlags=50, lw=2)\n",
    "ax2.grid(True)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\n",
    "\n",
    "names = ['group_a', 'group_b', 'group_c']\n",
    "values = [1, 10, 100]\n",
    "\n",
    "modin_df = pd.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\n",
    "\n",
    "plt.figure(figsize=(9, 3))\n",
    "\n",
    "plt.subplot(131)\n",
    "plt.bar(modin_df['names'], modin_df['values'])\n",
    "plt.subplot(132)\n",
    "#plt.scatter(df['names'], df['values'])\n",
    "#plt.subplot(133)\n",
    "plt.plot(modin_df['names'], modin_df['values'])\n",
    "plt.suptitle('Categorical Plotting')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\n",
    "\n",
    "names = ['group_a', 'group_b', 'group_c']\n",
    "values = [1, 10, 100]\n",
    "\n",
    "pandas_df = pandas.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\n",
    "\n",
    "plt.figure(figsize=(9, 3))\n",
    "\n",
    "plt.subplot(131)\n",
    "plt.bar(pandas_df['names'], pandas_df['values'])\n",
    "plt.subplot(132)\n",
    "\n",
    "plt.plot(pandas_df['names'], pandas_df['values'])\n",
    "plt.suptitle('Categorical Plotting')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\n",
    "\n",
    "names = ['group_a', 'group_b', 'group_c']\n",
    "values = [1, 10, 100]\n",
    "\n",
    "modin_df = pd.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\n",
    "\n",
    "plt.figure(figsize=(9, 3))\n",
    "\n",
    "plt.subplot(131)\n",
    "plt.barh(modin_df['names'], modin_df['values'])\n",
    "plt.subplot(132)\n",
    "#plt.scatter(df['names'], df['values'])\n",
    "#plt.subplot(133)\n",
    "plt.plot(modin_df['names'], modin_df['values'])\n",
    "plt.suptitle('Categorical Plotting')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\n",
    "\n",
    "names = ['group_a', 'group_b', 'group_c']\n",
    "values = [1, 10, 100]\n",
    "\n",
    "pandas_df = pandas.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\n",
    "\n",
    "plt.figure(figsize=(9, 3))\n",
    "\n",
    "plt.subplot(131)\n",
    "plt.barh(pandas_df['names'], pandas_df['values'])\n",
    "plt.subplot(132)\n",
    "plt.plot(pandas_df['names'], pandas_df['values'])\n",
    "plt.suptitle('Categorical Plotting')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(9, 3))\n",
    "\n",
    "plt.subplot(131)\n",
    "plt.hlines(pandas_df['values'], 1, 3)\n",
    "plt.suptitle('Categorical Plotting')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\n",
    "\n",
    "names = ['group_a', 'group_b', 'group_c']\n",
    "values = [1, 10, 100]\n",
    "\n",
    "modin_df = pd.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\n",
    "\n",
    "plt.figure(figsize=(9, 3))\n",
    "\n",
    "plt.subplot(131)\n",
    "plt.bar(modin_df['names'], modin_df['values'])\n",
    "plt.subplot(132)\n",
    "#plt.scatter(df['names'], df['values'])\n",
    "#plt.subplot(133)\n",
    "plt.plot(modin_df['names'], modin_df['values'])\n",
    "plt.suptitle('Categorical Plotting')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\n",
    "\n",
    "names = ['group_a', 'group_b', 'group_c']\n",
    "values = [1, 10, 100]\n",
    "\n",
    "pandas_df = pandas.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\n",
    "\n",
    "plt.figure(figsize=(9, 3))\n",
    "\n",
    "plt.subplot(131)\n",
    "plt.bar(pandas_df['names'], pandas_df['values'])\n",
    "plt.subplot(132)\n",
    "#plt.scatter(df['names'], df['values'])\n",
    "#plt.subplot(133)\n",
    "plt.plot(pandas_df['names'], pandas_df['values'])\n",
    "plt.suptitle('Categorical Plotting')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
  },
  "kernelspec": {
   "display_name": "Python 3.9.10 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/plotly.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating Plotly Modin Interoperability"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Currently Plotly is not completely interoperable with Modin. Each visualization is created with a Modin and then pandas dataframe for comparison."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import numpy as np\n",
    "import plotly.express as px\n",
    "import plotly.io as pio\n",
    "pio.renderers.default = \"notebook\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df = pd.DataFrame(dict(a=[1,3,2,4], b=[3,2,1,0]))\n",
    "pandas_df = pandas.DataFrame(dict(a=[1,3,2,4], b=[3,2,1,0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "fig2 = px.bar(modin_df)\n",
    "fig2.show()\n",
    "# py.iplot(fig2 , filename='jupyter-basic_bar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "fig2 = px.bar(pandas_df)\n",
    "fig2.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "fig = px.line(modin_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "fig = px.line(pandas_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "fig = px.area(modin_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "fig = px.area(pandas_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "fig = px.area(modin_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "fig = px.area(pandas_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "fig = px.violin(modin_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "fig = px.violin(pandas_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "fig = px.box(modin_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "fig = px.box(pandas_df)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "fig = px.histogram(modin_df, opacity=0.5, orientation='h', nbins=5)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "fig = px.histogram(pandas_df, opacity=0.5, orientation='h', nbins=5)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with Modin df\n",
    "# Example from https://plotly.com/python/mapbox-county-choropleth/#choropleth-map-using-plotlyexpress-and-carto-base-map-no-token-needed\n",
    "from urllib.request import urlopen\n",
    "import json\n",
    "with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:\n",
    "    counties = json.load(response)\n",
    "import modin.pandas as pd\n",
    "modin_df = pd.read_csv(\"https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv\",\n",
    "                   dtype={\"fips\": str})\n",
    "fig = px.choropleth(modin_df, geojson=counties, locations='fips', color='unemp',\n",
    "                           color_continuous_scale=\"Viridis\",\n",
    "                           range_color=(0, 12),\n",
    "                           scope=\"usa\",\n",
    "                           labels={'unemp':'unemployment rate'}\n",
    "                          )\n",
    "fig.update_layout(margin={\"r\":0,\"t\":0,\"l\":0,\"b\":0})\n",
    "fig.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a visualization with pandas df\n",
    "# Example from https://plotly.com/python/mapbox-county-choropleth/#choropleth-map-using-plotlyexpress-and-carto-base-map-no-token-needed\n",
    "from urllib.request import urlopen\n",
    "import json\n",
    "with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:\n",
    "    counties = json.load(response)\n",
    "import pandas\n",
    "pandas_df = pandas.read_csv(\"https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv\",\n",
    "                   dtype={\"fips\": str})\n",
    "\n",
    "fig = px.choropleth(pandas_df, geojson=counties, locations='fips', color='unemp',\n",
    "                           color_continuous_scale=\"Viridis\",\n",
    "                           range_color=(0, 12),\n",
    "                           scope=\"usa\",\n",
    "                           labels={'unemp':'unemployment rate'}\n",
    "                          )\n",
    "fig.update_layout(margin={\"r\":0,\"t\":0,\"l\":0,\"b\":0})\n",
    "fig.show()\n"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/seaborn.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating Seaborn Modin Interoperability"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### All the examples in this section are taken / adapted from https://seaborn.pydata.org/tutorial/introduction.html. Each visualization is created with a Modin and then pandas dataframe for comparison."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import modin.pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "UserWarning: Distributing <class 'pandas.core.frame.DataFrame'> object. This may take some time.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "modin.pandas.dataframe.DataFrame"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Apply the default theme\n",
    "sns.set_theme()\n",
    "\n",
    "# Load an example dataset\n",
    "pandas_tips = sns.load_dataset(\"tips\")\n",
    "modin_tips = pd.DataFrame(pandas_tips)\n",
    "\n",
    "type(modin_tips)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_bill   tip     sex smoker  day    time  size\n",
       "0       16.99  1.01  Female     No  Sun  Dinner     2\n",
       "1       10.34  1.66    Male     No  Sun  Dinner     3\n",
       "2       21.01  3.50    Male     No  Sun  Dinner     3\n",
       "3       23.68  3.31    Male     No  Sun  Dinner     2\n",
       "4       24.59  3.61  Female     No  Sun  Dinner     4"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "modin_tips.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<seaborn.axisgrid.FacetGrid at 0x7fc3bbb1a430>"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAwEAAAFcCAYAAACQkLIVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACZvElEQVR4nOzdZ4BU1dnA8f+902dntvdCWXrvKEXBggXEAhp7iRo1+qoxMUaNpvhGo4mvRGOKJiZGYxcQsKNYqErvvSxsYXubXu59PywsLLsLy/bdeX5fYO7MnHvO7O6Z+9xzznMUXdd1hBBCCCGEEBFD7egKCCGEEEIIIdqXBAFCCCGEEEJEGAkChBBCCCGEiDASBAghhBBCCBFhJAgQQgghhBAiwkgQIIQQQgghRISRIEB0GbfeeitlZWUA/OhHP2LPnj0dWp+HH36YV155pc3Pc+ONN/Lpp5+2+XmEEN1TZ+w7zzrrLC677DIuu+wypk+fzq9+9SuKi4sBKCws5JprrunQOgoRCYwdXQEhmmr58uW1///HP/7RgTURQoiuozP2nbfccgu33XYbALqu89JLL3H77bczb948UlJSePvttzu4hkJ0fxIEiC7hkUceAeDmm2/m5Zdf5vrrr+f555/H4/Hw3HPPkZaWxv79+7HZbNxxxx28/vrr7N+/nwsuuIBHH30UgCVLlvC3v/2NYDCI1WrlF7/4BaNGjapznj179vCzn/2s3vlvuukmZs+e3aS65ubmMnPmTNavX1/v8bx581i8eDGqqpKTk4PVauWZZ56hT58+FBcX8+tf/5p9+/ahqirXXHMNN910EwBffvklr7zyCiUlJUyYMIHf/e53qKoM5AkhTq4r9J2KonDXXXcxf/58li9fTnZ2dm2f+ec//5m8vDyKi4vJy8sjJSWFP/7xjyQnJ3PuuedyxRVXsHLlSgoKCrjsssv4yU9+ctI6//nPf2bDhg0UFRUxYMAAnn322Vb4lIXoonQhuoj+/fvrpaWluq7r+jnnnKNv2rRJX7VqlT5o0CB969atuq7r+m233aZfffXVut/v10tLS/UhQ4bohw8f1vfv369fcsklellZma7rur5r1y590qRJutvtbnZ9fvGLX+j//Oc/6x0/dOiQPnLkyAYfz507Vx8zZoxeUFCg67quP/HEE/pDDz2k67qu33PPPfozzzyj67quV1VV6TNmzNAPHDig33DDDfqPf/xjPRQK6R6PR580aZK+evXqZtdbCBFZukrfee+99+r/+Mc/6vSZL7zwgn7eeefp1dXVuq7r+p133qk///zztW15+umndV3X9cOHD+vDhg3TDx48eNI6v/DCC/qFF16oB4PBZtdfiO5CRgJEl5eZmcngwYMB6NGjB06nE7PZTHx8PFFRUVRWVrJ69WqKioq45ZZbat+nKAoHDx5k4MCBtcdaYyTgVIYMGUJqaioAgwcPZvHixQCsWLGCn//85wA4nU4+/PDD2vdMnz4dg8GAzWajV69elJaWtkpdhBCRq7P1nYqiYLPZ6h0fP348DocDqOkzKysra58777zzAEhJSSEhIYHKyko2btzYaJ0BRo4cidEolz9CyF+B6PLMZnOdxw117pqmMWHCBP70pz/VHisoKCA5ObnO6/r27cuCBQtaVB9FUdB1vfZxMBis87zVam3wtUajEUVRap87dOgQcXFxtc81Vr4QQjRHZ+o7dV1n69at3HDDDfWea6zPBLBYLPWeO1mdFy9ejN1ub3Y9hehOZFKx6DIMBgOhUKhZ750wYQLLly9n7969AHzzzTdceuml+Hy+1qwiANHR0QSDwdoMHB999FGT6zh37lwAqqurufnmmzlw4ECr108IEVk6e98ZDof5y1/+QlxcHOPGjWtxee3Z3wvRlclIgOgyLrroIm688Ub+/Oc/n/Z7+/btyxNPPMFPf/pTdF3HaDTyt7/9jaioqBbVac6cObz44ou1j8855xyee+45fv7zn/OjH/2I+Ph4LrrooiaV9atf/Yrf/OY3zJw5E13XufPOOxk6dGiL6ieEEJ2x73z11VdZuHAhiqIQDocZNmwYL7/8covKbOs6C9HdKLrMKxBCCCGEECKiyHQgIYQQQgghIowEAUIIIYQQQkQYCQKEEEIIIYSIMBIECCGEEEIIEWEkCBBCCCGEECLCdLkUoaWlLjSt+yY0iouzU17u6ehqtLtIbHckthmk3a0hKcnZrPd19/4TIvP3KxLbDJHZ7khsM7R+u5vbh3Y3MhLQyRiNho6uQoeIxHZHYptB2i3aViR+zpHYZojMdkdimyFy293WJAgQQgghhBAiwkgQIIQQQgghRISRIEAIIYQQQogII0GAEEIIIYQQEUaCACGEEEIIISKMBAFCCCGEEEJEGAkChBBCCCGEiDBtGgS4XC4uueQScnNzAVixYgUzZ87kggsuYM6cOW15aiGEEEII0Y0oCpg1d+1jix55G6e1pjYLAjZu3Mi1117LgQMHAPD5fDz66KP89a9/5eOPP2bLli188803bXV6IYQQQgjRTSgKmMr2UP3Rc1j8pVjceVR+8Hss7vyOrlqX1WZBwLvvvsuvf/1rkpOTAdi0aRM9e/YkKysLo9HIzJkz+fTTT9vq9EIIIYQQoptQtRDBgl0ECvZSsfBZyuY9Q7D4EFp5PorS0bXrmoxtVfCTTz5Z53FRURFJSUm1j5OTkyksLGyr0wshhBBCiG4irBgxDjqfKFc57g2LAYg55ybCmaPQ9Q6uXBfVZkHAiTRNQzkuVNN1vc7jpkpIcLRmtTqlpCRnR1ehQ0RiuyOxzSDt7iiR0H9Cx3/OHSES2wyR2e5IbDPUtNtfsJfKXd/XHvNu+ZrkAWMxJ2R0YM26rnYLAlJTUykuLq59XFxcXDtV6HSUlrrQtO4b8iUlOSkuru7oarS7SGx3JLYZpN2tVVZzdPf+EyLz9ysS2wyR2e5IbDPUtLu8pJzgxq/QPJXETrudcHUZ1avm4d6zkSo9+rRGAyI1kDpRuwUBI0aMYP/+/eTk5JCZmcmHH37I7Nmz2+v0QgghhBCiiwrpRswjZpDUZwyB2N4Y9TBJWQMJxfSU6UDN1G5BgMVi4emnn+bee+/F7/czZcoULrroovY6vRBCCCGE6MKCRgfB2L4AaIqBUFz/Dq5R19bmQcCSJUtq/z9hwgQWLlzY1qcUQgghhBBCnITsGCyEEEIIIUSEkSBACCGEEEKICCNBgBBCCCGEEBFGggAhhBBCCCEijAQBQgghhBBCRBgJAoQQQgghhIgwEgQIIYQQQggRYSQIEEIIIYQQIsJIECCEEEIIIUSEkSBACCGEEEKICCNBgBBCCCGEEBFGggAhhBBCCCEijAQBQgghhBBCRBgJAoQQQgghhIgwEgQIIYQQQggRYSQIEEIIIYQQIsJIECCEEEIIIUSEkSBACCGEEEKICCNBgBBCCCGEEBFGggAhhBBCCNGuFKWjayCMHV0BIYQQQggRGTyBMPsLqjlwuIqEaCv9s2KJd5g7uloRSYIAIYQQQgjR5jz+MC8v2sqmPSW1x6KsRn55y3hSY60dWLPIJNOBhBBCCCFEm9uTX1knAABw+0J8uHw/egfVKZJJECCEEEIIIdqUoijsOlTR4HOrtxfiDYTbt0JCggAhhBBCCNG2dF0nKdbW4HNJsTZMRrkkbW/yiQshhBBCiDY3uFc8FpOh3vEfnNcPkyrpgtqbBAFCCCGEEKLNJcdYeOyH4xjeLxGApFgr9101gsE9Yzu2YhFKsgMJIYQQQog2p+uQEW/nvlnD8QTCmI0qZoOMAHQUCQKEEEIIIUS7URVwWOpPCxLtS6YDCSGEEEIIEWEkCBBCCCGEECLCSBAghBBCCCFEhJEgQAghhBBCiAgjQYAQQgghhBARRoIAIYQQQgghIowEAUIIIYQQQkQYCQKEEEIIIYSIMBIECCGEEEIIEWEkCBBCCCGEECLCSBAghBBCCCFEhJEgQAghhBBCiAgjQYAQQgghhBARRoIAIYQQQgghIowEAUIIIYQQQkQYCQKEEEIIIYSIMBIECCGEEEIIEWEkCBBCCCGEECLCSBAghBBCCCFEhJEgQAghhBBCiAjTIUHAggULmDFjBjNmzOCZZ57piCoIIYQQQggRsdo9CPB6vTz55JO8/vrrLFiwgDVr1rBixYr2roYQQgghhBARq92DgHA4jKZpeL1eQqEQoVAIi8XS3tUQQgghhBAiYhnb+4QOh4P777+fiy++GJvNxrhx4xg9enR7V0MIIYQQQoiIpei6rrfnCXfs2MHDDz/MK6+8gtPp5MEHH2T48OHcfvvt7VkNIYQQQgghIla7jwQsW7aMCRMmkJCQAMCsWbN48803mxwElJa60LR2jVvaVVKSk+Li6o6uRruLxHZHYptB2t1aZTVHd+8/ITJ/vyKxzRCZ7Y7ENkPrt7u5fWh30+5rAgYOHMiKFSvweDzous6SJUsYNmxYe1dDCCGEEEKIiNXuIwGTJ09m27ZtzJo1C5PJxLBhw7jjjjvauxpCCCGEEEJErHYPAgDuuOMOufAXQgghhBCig8iOwUIIIYQQQkQYCQKEEEIIIYSIMBIECCGEEEII0QkcTaPfHjpkTYAQQgghhGhYSNPJL/OQX+zGYjbQOy2aWLupo6sluhkJAoQQQgghOomQprN4zSHeW7Kn9pjNYuSXN4+T/PadiNvt5pFHHiEnJwdVVRkyZAgzZszgT3/6E2lpaezfvx+bzcYdd9zB66+/zv79+7ngggt49NFHAXjnnXd4/fXXUVWVxMREHn/8cXr37l3nHE899RQ7d+7kr3/9KyaTiWeffZbVq1cTDocZPHgwjz32GA6Hg3PPPZfhw4ezc+dOfvrTnzJt2rQmtUGmAwkhhBBCdBL5ZZ46AQCA1x/i3x9tw+0NdlCtxIkWL16M2+1mwYIFvP/++wDk5uayefNm7rjjDhYsWIDD4eDll1/mpZdeYt68ebz55psUFhaycuVK/vnPf/Laa6+xcOFCLrnkEu655x50vWYzR13XeeKJJ8jPz+cf//gHUVFRvPzyyxgMBubNm8fChQtJTk7m2Wefra1Pv379+OSTT5ocAICMBAghhBBCdBr5Je4Gj+/Nq6Sw3IPTJPdvO4MxY8YwZ84cbrzxRiZOnMjNN99MWVkZmZmZDB48GIAePXrgdDoxm83Ex8cTFRVFZWUlS5cuZfr06cTHxwMwa9YsnnzySXJzcwF49dVXKS0t5YMPPsBsNgPw9ddfU11dzYoVKwAIBoMkJCTU1mfs2LGn3QYJAoQQQgghOgmLydDgcaNBwWKUAKCzyMrKYvHixXz33XesWrWKH/7whzzxxBO1F+1HGY31L7U1Tat3TNd1QqEQAOPGjWP06NE88sgjvPPOO5hMJjRN49FHH2XKlClAzXQkv99f+3673X7abZDfJiGEEEKITqJ3WjQ2S/0Lx4sn9iIt0dEBNRINefPNN3nkkUeYPHkyP//5z5k8eTLbtm1r0nvPOussPv74Y8rKygCYO3cusbGx9OzZE4ChQ4dyww034HQ6efHFFwGYPHkyb7zxBoFAAE3TePzxx3nuueda1AYJAoQQQgghOolYu4lf3jKOvpmxQM0IwMyzejNtbBaqqnRs5UStyy+/nHA4zPTp05k1axbV1dUMGDCgSe+dNGkSt9xyCzfffDMzZszggw8+4KWXXkJVj12WK4rCU089xZtvvsm6deu4++67ycjI4IorrmD69Onous7DDz/cojYo+tFVCF1EaakLTetSVT4tSUlOiourO7oa7S4S2x2JbQZpd2uV1Rzdvf+EyPz9isQ2Q/dvd0jTqXAHMBlVYuwm0Lt/mxvT2u2WLEs1ZE2AEEIIIUQnY1QVEp2WmgfdO3YXHUSmAwkhhBBCCBFhJAgQQgghhBAiwkgQIIQQQgghRISRIEAIIYQQQogII0GAEEIIIYQQEUaCACGEEEIIISKMBAFCCCGEEEJ0Erm5uQwYMIDly5fXOX7uueeSm5vbaueRIEAIIYQQQohOxGQy8fjjj+NyudrsHLJZmBBCCCGEEKfh67WHeO2T7ZSUe0mMs3HTxYOYOiar1cpPTk5m4sSJPPPMM/zv//5vnef+/ve/s3DhQgwGA5MmTeLnP/85BoPhtM8hIwFCCCGEEEI00ddrD/HiexspLveiA8XlXl58byNfrz3Uqud5+OGHWbZsWZ1pQd9++y1Llixh7ty5zJ8/n5ycHN5+++1mlS9BgBBCCCGEEE302ifb8QfDdY75g2Fe+2R7q57H4XDwv//7v3WmBa1atYoZM2Zgs9kwGo3Mnj2blStXNqt8CQKEEEIIIYRoopJy72kdb4nJkyfXTgsC0DSt3mtCoVCzypYgQAghhBBCiCZKjLOd1vGWOjotqKioiDPPPJOPPvoIn89HKBRi7ty5nHnmmc0qV4IAIYQQQgghmuimiwdhMdVdiGsxGbjp4kFtcr6j04KCwSBTp05l6tSpzJ49mxkzZpCens4NN9zQrHIlO5AQQgghhBBNdDQLUFtlB8rMzGTJkiV1jk2ePJmdO3cCcPfdd3P33Xe3+DwSBAjRzRkMCgDhsN7BNRFCCCFAA0IhDbNJhS761TR1TFarpgTtCBIECNFNufwh9uZVsXp7IaqqMG5QCtlp0URZTj+XsBBCCNFSYV1nV24lC5buo6jMy5lDUzl3TCaJTktHVy0iSRAgRDdU5Q3ywvsb2ZdXVXts2cZ8BvWK567Lh+K0yp++EEKI9rXtQAVz3llf+/jTVTl8t+Uwv7p1PDF2UwfWLDLJwmAhuhlFUVi2qaBOAHDU9gNlrNtVjKJ0QMWEEEJErEBY47+f7ah3vNzlZ09eZQfUSEgQIEQ34/aH+HD5/kafX7h0H75Q/TzDQgghRFvxBzWKKxrJr1/Z+vn1xalJECBENxMK6/gC4Uafr/YEZJGwEEKIdmUzG+ibGdvgcxmJjvatjAAkCBCi27FbDPTJiGn0+aF9ErGa5U9fCCFE+zGqCtdd0B+DWnc+6sCecfROc3ZQrTqfJ554gvvuu6/OsWXLlnHeeefhcrla9VxyJSBEN2NUFa48t2+jz18yqTcqsihACHGMMeTGGD42JcMarmrXtUOyTiky9Ep28Nvbz2TW1D5MGp7G3bOGc/cVw4iySLKKo372s5+xZcsWvvzySwA8Hg+/+c1veOqpp3A4WnfERD51IbqhvunR3HfVCP790TaqPUEAYh0Wbps5mN4pMuwqhDjGFHLjW/0+itGCZezlqK4iyhbNIW7GfQRie6O34exBRQFT0VYwmgkl9MdUeQDNVUYobSS63KzoltLjbWRM6IWqdt39a/RwkMPvPg1AyuwHKZz7LACpP3gYxdCyLEdRUVH87ne/49FHH2XChAm88MILnHvuudhsNq699lp8Ph9xcXH89re/JSsri3//+9/Mnz8fVVUZPnw4TzzxRJPPJUGAEN2QQVEY1TeBfndO5HC5B1WBlDg7drPsESCEqMvgL8OzbSmEQ2ieCvwHNqH5PXg2fYll8o2ElLbL4W4NlFK86HkA4s7/IaVLXkMP+km+6Wm8luQ2O6/oWLquE2586Vqnd/jdp/Ed3AbAwRfuQA+Hao+nXft4i8ufOHEikydP5pFHHmHfvn28+eabXH/99fz9738nPT2dpUuX8vjjj/PKK6/w0ksvsXTpUgwGA7/85S8pLCwkJSWlSeeRIECIbkrXIcpioE+qzLUUQjTO7+hBwuUPUjr/Wbw7VwFg6zsW64QfEGzDAADAa0og/tIHKP3gWco+fQlQiJ95Pz5LUpueV4jWoIcC6KEAAIrR3KplP/zww0ydOpW//OUvFBQUcOjQIX784x/XPu9yuTAYDIwaNYorr7yS8847jx/+8IdNDgBAggAhhBAioum6jmK2o6gq+pHswWpULLraPps3KRY7iqKiEwZFQbU5auYJdc2ZIiICpMx+sGYE4EgAAKAYjKTM/nmrncPhcBAdHU1GRgYul4vMzEwWLFgAQDgcpqSkBIC//vWvbNiwgW+//Zbbb7+dZ599lvHjxzfpHLIwWAghhIhgVnceZfOeRg8FMCX3BtWIe+MXBNcvwqD72/bcgVJK5z2DroVxnnk5KCql8/6A1VvYpucVoiUK5z5bOwXoKD0conDuH9vkfNnZ2VRWVrJmzRoA5s6dy4MPPkhZWRnTp0+nf//+3H///UyaNImdO3c2uVwZCRBCCCEimGZ2YErri2q0EDX1ZvTyXMoWPIc5axAh1dymd+QD5lhiz7kJxWRFyxhBQvoAwuWHCVgT2u6kQrQSxWhGMRjrBQStzWw28/zzz/Pkk0/i9/txOBw888wzxMfHc/XVV3PllVdis9no3bs3s2fPbnK5iq635br/1lda6kLTulSVT0tSkpPi4uqOrka7i8R2R2KbQdrdWmU1R3fvPyEyf79ao83mQAW6ohI0RaMoCtZACT5zPLre9hl6FMKAWpsNSCWMxqmTGMjPOnK0drub24ce1ZbZgdqTjAQIIYQQES5gjq39v67reE0J7TYnXz/hgr8pAYAQHUkxmOpkAWqNjEAdQdYECCGEEEIIEWEkCBBCCCGEECLCSBAghBBCCCFEhJEgQAghhBBCiAgjQYAQQgghhBARRoIAIYQQQkQERQGjUUVV2z71qRCdXYcEAUuWLGHWrFlcfPHF/O53v+uIKgghhBAigpS6Any2JpenXl/L3G/3UVDuRZFYQLSizZs3c99993V0NZqs3fcJOHToEL/+9a957733SEhI4Oabb+abb75hypQp7V0VIYQQQkSAMleAp15dTbnLD8COnHI+W5XD47eOJyPe3sG1E93FsGHDeOGFFzq6Gk3W7kHA4sWLmT59OqmpqQDMmTMHi8XS3tUQQgghRCdX6Q1iNKg4LEZ0vXm7lykKbNpbUhsAHBUIaXz+/UF+ePHAdtsYTXQfbrebRx55hJycHFRVZciQIcyYMYMnn3ySDz/8kNtuu42SkhIAPB4Phw4d4tNPPyU9PZ1nn32W1atXEw6HGTx4MI899hgOh6Pd29Du04FycnIIh8PcddddXHbZZbz55pvExMS0dzWEEEII0UlVeIK88vF2fvrCUh76y3K+3VRAMKw1qyxVVdl2oLzB57buKyMQal65IrItXrwYt9vNggULeP/99wHIzc2tff6VV15hwYIFvPfee6SkpPDTn/6UXr168fLLL2MwGJg3bx4LFy4kOTmZZ599tkPa0O4jAeFwmDVr1vD6669jt9v58Y9/zPz585k1a1aT3p+Q0P6RUntLSnJ2dBU6RCS2OxLbDNLujhIJ/Sd0/OfcEbpTmwOhMP/4aC0rNxcA4PWH+PdH20hOsDN5REad1za13QN6xrFme2H94z3iSE50YDIaWl7xdtKdftano7O1e8yYMcyZM4cbb7yRiRMncvPNN1NWVlbnNZqm8eCDD5Kdnc0dd9wBwNdff011dTUrVqwAIBgMkpCQ0O71hw4IAhITE5kwYQLx8fEAnH/++WzatKnJQUBpqQtN677jdklJToqLqzu6Gu2uPdtdXOVnT14FBoNK/8xYYu2mdjnvieRnHVlas93N/TLs7v0nRObvV3drc7knWBsAHO+bdbkMzoolfGRE4HTaPaJPAvOtRjy+UO0xVVW48MweVJR7Wqfi7aC7/aybqrXb3RoBRVZWFosXL+a7775j1apV/PCHP+SJJ56o85onn3wSr9fLnDlzao9pmsajjz5auxbW7Xbj99edqtZe2j0IOOecc/jFL35BVVUVUVFRLF26lPPOO6+9qyEiVEm1n9+88h1ef80XQXKcncduGYvD0u5/CkIIIRpgVBUsJgP+YLjO8VinpdnrAhKdFn71w/F8uzGfDbuK6ZMRw/nje5CVaJf1AKJZ3nzzTdauXcuzzz7LWWedRWlpKdu2bat9/uWXX2b9+vW8/vrrGAzHRpomT57MG2+8wYQJEzAajTz++OPY7fYOyZbZ7lc+I0aM4Pbbb+e6664jGAwyadIkZs+e3d7VEBFqf0FVbQAAUFTuIbfIzcAsWZcihBCdQbTNyJXn9OWNz3fWHjOoChOHprVoJCs5xspVU7KZdXY2BkWpCSgkABDNdPnll/P9998zffp0bDYbaWlpDBgwgE8//ZTCwkKee+45evfuzQ033ICm1Yxe3Xfffdx9990888wzXHHFFYTDYQYNGsTDDz/cIW3okNufV155JVdeeWVHnFpEOJOx/lp4k0n2zBNCiM5C1+Gs4Wkkxtr4dkMeCTFWzhqZQWYr3LXX9ZqMKM0dURDiKLvdzp/+9Kd6x6dNmwbAjh07Gn3vr3/967aq1mmRORAiovRJj6FPRgx78yoBmDA0lczEqA6ulRBCiOOZjSojsuMZ3S8RXddrRgDkul2IViVBgIgoTquRn10zkrwSNwaDSnqCHbNBRgKEEAJAOTpNhpr8+h19wzzczLSgQohTkyBARByryUCftOiOroYQQnQq5mAleuEu9IzhGDzF6D43gYQBHV2tRgVDYUqq/fj8IRKirdjMXSfNpxCdgQQBQgghhECpzKP0478QNXQq3j1rMMam4LzkAQJq59tfwuUP8dbcjXyx+hC6DrEOC3ddMYz+mdEybUiIJpJ5EEIIIYQglDiA6AmzcG/5Gj3oJ/biuztlAKAo8NW6PBZ/f6h2ulKFy88f3lhLYbmvYysnRBfS5CCgsrISl8vVlnURQgghRAcxugpwrf8c1WJHDwfxbluKQet8F9XVvhAfLd9f77im6ewrqOyAGgnRNZ0yCNi3bx+zZ89mwoQJnHHGGdxwww3k5+e3R92EEEII0U4UdExJWSRc9ztip90GWrhTTq3RNJ1gIwuGAyFZSCy6B5fLxSWXXEJubm6bneOUQcAjjzzCVVddxcaNG1m/fj0XXnghv/zlL9usQkIIIYRofz5HFo6L7sdnikfvPRHDiBmEDdaOrlY9TpuJCUPTGnyuV4qznWsjROvbuHEj1157LQcOHGjT85wyCPB6vVxzzTWYTCbMZjM33ngjJSUlbVopIYQQQrS/gFJz0a9hIKx2vgAAQAGumNKHlHh7nePXXtBf9n0R7SoUClFSUkIoFGrVct99911+/etfk5yc3KrlnuiU2YGys7NZt24do0ePBmDXrl1kZma2aaWEEEIIIRqT4DDz9D2T2bG/FI8/RFqCnaQYKwZF6eiqiQixceNG7r//fgKBAGazmeeff54RI0a0StlPPvlkq5RzKqcMAvLz87nxxhsZMGAARqORbdu2kZSUxMyZMwFYtGhRm1dSCCGEEOJ4ibE2BmTGdHQ1RAQKhULcf//9tQlzAoEA999/P19++SUGQ9fZr+KUQcCDDz7YHvUQQgghhBCi06uoqCAQCNQ5FggEKC8vJzExsYNqdfoaDQL27t1Lnz59iIpqeH7dkCFD2qxSQgghhBBCdEaxsbGYzeY6gYDZbCYuLq4Da3X6Gg0C/vCHP/DSSy9x1VVXkZaWhq4fyxPm9XpZuXJlu1RQCCGEEKIzUBTQO2HaVNG+jEYjzz//fL01AV1pKhCcJAh45plnqKiooE+fPrz++uvouo6iKASDQW644Yb2rKMQopWoqoKiKIQbybEthBCivgpPkN25FRSVe8lMdtAnPRqH5ZQzqkU3NmLECL788kvKy8uJi4trkwBgyZIlrV7m8Rr9Df7Zz37G8uXLURSFCRMm1B43GAxceOGFbVopIUTr0tE5WOxh1dbDFJV5OGNIKoN7xuGwypeYEKJ1WL2HCVpiCatWDJoPk78Cny211cpXFHD7wxRVeDEaVGxRllYr+2RKqv089Z/VVLiOTf3okxHDvVcNJ9pqapc6iM7JYDB0qTUAJ2r0CuCVV14BajYL+/3vf99uFRJCtL5duVX84Y21tcPY63cVM7h3PPdcMQybuWsNX4ruRVF0LMFKfMZYjJoXJRwgaJKML12NpTqHkrlP4xg5DevIi/Bt+oTKjV+SMOsX+J09W+EMOlsPVvKPBVuoPHIx3ivNyR2XDSM1tg33M1Dg63V5dQIAgL15lWw/UM4ZA9s2j7sQbemUm4VJACBE1+YPabz68bZ681i37S/jQJGrYyolxBHm0l2Uvv0rrK5DBNYuwPPNq5jD7o6uljgNigK6z4Ue9FP9/ULK3/kN1d8vQg/60X0uWiN1/8ESD//35rraAADgQEE1T/1nNVXeYMtP0IhASGPV1sMNPrd+VzEGwykvo4TotOS3V4huzuULUVjmbfC50sqGjwvRbqISMUTFUvzm47jXf4p94ERCBltH10qcBl2HYPJg4mf+BIBQZREA8TPvJ5g8uMULaVVVYcXmggbLcXmD7C+obtkJTsJkUOvtTHxUepIDTZNVwqLrkiBAiCYqdwdYv7eUFdsK2ZlXiTcY7ugqNYndYiDW0fDc2Zh2mlN7SgqUuQLklrip9oVQTnHr8PinFYVWudMoOkbYZMec2rfmgcGIIS4NTb6amkeBck/N31GFJ9iufxdq2E/w8O46x4IFe1DD/haXrSgK+/KqGn2+3NXyczR6buCys7LrHTeoCmMGJAMSBJwOX0hjZ14VK7YVsn5vKWXuwKnfJNqMrAoU4lQU2H6okj+9vZ5g6FhWnYwkBw9cPZJ4h7ndq2RQwoR1Q73/N8RuNnD9RQP4y/ub6hxPT4yid5qzTevZFC5/iI9X5fD5dwfRNB2bxcjV5/fjzMEpmBsYajcHK9EObUTpOYaQMQpT6U5AJZjQT1L3dUFqyV7cm5cQe/6tuDd+QcUnfyNm9qP4lYb3qBEN8wTCLFmXx8Kl+wiFNcxGlSum9OHskenYTHX7B0WpuRhToMG/sdNhUMLoihElZy3V3y8C1UD0GZdRteoDqr9fSFxcCmrvyS26Y67rOkOyE9iTW9Hg8zbzsUsZTdc5XOHjcKmHWIeZzKSoFrexT7qTB68bzZuf7yS/xE3/HrGcMyaLtxbvYNygVEb1SyLaJpdTp1LmDvCndzaQe9w0VJNR5SdXj2JQjxiJpzqA/NYKcQqFFT6ee3Md4RO+xPKKXfxj0VYevGYUhna842YKuQhu+gTLoLMA8G9fimn4xQSNjgZfr+swIjuBn18/mg+XH6CsysdZI9KZMDStw1Pc6cDcb/bxzbrc2mNef4hXP9qO1Wxk/ICkOq83aT7cy/6Lb/dq7MP2Y+s7ltKFc1BQSLz2N/jsGe3cAtFSWmJfkq77LQFHFjEZQyAckADgNGmazpJ1ucz7em/tsUBI450vd6OoKheNy6y9CA/rOut3l/LmZztRVYVbpg9icM841Gb0YRZfMf6tS7CMnI4am4IlcxCOsTMIpw4lIaUPrnUfo6YNJtTCKTOapjN2YDIfr9hf50YMQEq8HU3XUFUFTddYsbWIfy3aWvv8xOFp3DCtP1ZT8xMgGBSFwT1i+fWt4zlY7OLDZQd4+YPN6Dps21/Oyh4F3H/ViHrBljgmrOu8smhrnQAAIBjS+L+31vHknRNIiWnDBd6iQRIECHEKO3LK6wUAR+3MKSe/1ENWYsNzRtuCGvLg2b0az47lNQcMZuIGT4VGggAAo6owKCuWgdeOJBzWMRnUOhsAdpSiCl+dAOB4b3+xi2HZ8XW+WEMGK1EjL8S/fyOezV/h2fwVAPZRFxKydK2dGkWNkGol5KjJHuO3JHRwbbqmvGIXC5fub/C5+d/s4czBKbV3qg8Vu/nr3GOjgs+9vZ7/vWMCGfHNWIfhq6R6w2J8B7cSLM0jbur16Am9CGugpQzGMa0XAUPrBHQZCTZuu3QIi787yN68SlQFRg9MoVdaNAo1gUJJdYBXP9pW530rNhUweXg6AzNbnnEqENR44Z0NVHvqLkTedbCCg0UuBmRIVqvG5Jd52X6gvMHnNE1ne04ZKcPT27lWndeLL77IJ598AsCUKVN46KGH2uQ8MvFSiJNQVYX8kpNnKqn2tO+cRr81mfiL7iTsqiDsqiD+orvwW5NO/UZA0WsCgs4QAABUneSzq6j24wvUXXeh6xBK6EvM2dfUHlNtTuxjZxIytF8gJkRnUuUOEGpkA0B/IIzbd+yitbC8fjKA4gpPs84bjO9LzMQrCRYfBEXB1GsUQUPNzQhdV1otAKgpENLi7cQ4zFx6VjaXTM6mqNzDt+tyGdgzHoDyan+D045KK32tUgW3L1gvADiqvKrt1iV0B65TfE/mF7tRmzMc1Q2tWLGCZcuWMX/+fD744AO2bt3K4sWL2+RcEgQIcRKappOZ1PgddoDoqPZdE2DxFVH20YsYnIkYnImUffwiFl9Ru9ahtcQ4zI0uXoyPttaZ6ws1c5mNJTup+ObN2mOatxrPqnmYJK2kiFCxDjNmY8Nf5zaLkajj5qunnpDpRlEgJa55AbSpdDeVy9/DnNYHgOpv38AUaru0w1mJUcya0pdwWGP3oXLOHZPFL24cQ6y9ZsOueKcVQwMXkomxrZNtymEzNdrfx0fLVJaTcdpP/j2Zkdz1Mi2Vl5fzt7/9jXvuuYe///3vlJc3PNJxupKSknj44Ycxm82YTCb69OlDfn5+q5R9IpkOJMQpDOgRi9GgEArX76CGZMeT1pxh9BbQjFFEDZ2KuX/NTt6B3SvRTF1zDnVStJXzxmbxxepD9Z67dlp/rKa6FzbGsBfv5q8gHCJq1IXY+oyh9INn8exYjn3E+QTtXfNzEKIl0pMcXD6lD+9+ubvec1ee25cYm7l29C8z0c79PxjJm4t3YlJVrr9oIKlxzbyAtcfhHHcJ5mEXQHkumquMsKFtL4bT421cNbUPqqoQFxdFcfGx9KAJTjO3XzqElxdsqU0SMHV0Jj2TW6dfiLIYuHn6IP783sY6x4dkx5OVfPKbRZEuPd7GsD4JbN5bWu85o0FhUM+uNZ2zvLycq6++murqaoLBIOvXr2f+/Pm8/fbbxMW1rC39+vWr/f+BAwf45JNPeOutt1pa5QYpemeZF9BEpaWuLhctno6kJGedTi1SdPZ2786v4rm31+M/bnpKr7Ro7r1yOHHNHAloSZsNikZYV+v9vys4sd1uf5gv1h7io+UHCIU1HDYT110wgDEDkjA1cFfPFKpCz98KGSMIG+2YyvagqCqB2OxOnR2oNX/Hk5Kal9Wpu/ef0Pn7kraQlOTkYH4FSzcdZv43e/AHwtgsRq48ty8Th6RiaWCUIKjpKNRMD2yJo/2PogC6ht6OEwwa+lnr1CRzKCr3EOMwkx5vx9SKG3ppus6+w9UsWZtLeZWfs0amMyw7Aae1fe6pduXf73J3gBfnbmJ//rF0rxazgQeuGUX/9OiTvre1293cPvSov/3tb7z22msEg8emh5lMJm666SZ+/OMft7R6AOzevZs777yTe++9lyuuuKJVyjyRjAQI0QT9M6J5+scTOXi4Gk8gTEK0laykqAa/XNvD8Rf9XSkAaEiUxcBlk3oxZWQGXn+IaLsZh9XQ6AV90BiN0nNizZ1NvWZesq4j6eVERLOZDFw0LpMzB6fg9gVrpq7YTI2u/2kowG6Oo/1PzWk6vi9SgNRYK6mxbTMioSoKfdOiGXDZUADCjazFaAtGjl1wKgqoWpCwYmq387dUXJSZh64bTW6xm5IqHzazgZ4pTmKjTJ36Bk5DtmzZUicAAAgGg2zZsqVVyl+7di333Xcfjz76KDNmzGiVMhsiQYAQTaDrEGMzMax3fEdXpXvSIdZuqp3be6ovhOMvbLral4cQbUXTdKJtxtpMQG090K/pUFzpI7/UjcVkoEeKo8PTDreX9rz4BzAFK/Gvno9//HQUUwqm0l0EDm3BOOwiQmrX2WHbYlTpk+akz3F71HTFPnzYsGGsX7+eQODYgmez2cywYcNaXHZBQQH33HMPc+bMYcKECS0u72Qi469VCCGEEK1G03WWby3k1Y+21V7ExTksPHTjmIjL964oCsGwhtGgtMmIpKrohHYtxb3la7z71hN7zo2UffoSejhIQmo2SuqILnkh3ZVdffXVzJ8/n6qqKoLBIGazGafTydVXX93isl955RX8fj9PP/107bFrrrmGa6+9tsVln0iCACGEEEKclsPlPv79Yd2c/OUuP699up2f/WAkamNpv7qZSm+Q5ZsKWLoxn8xkBzMn96ZHYusmKNB0BXP/yVgObcd/cCtlH70IQNSI89ES+0oA0AHi4uJ4++23efvtt9myZQvDhg3j6quvbvGiYIDHHnuMxx57rBVqeWoSBAghhBDitBwqajgV6Pb95ZS5AiQ6Le1co/YX1nVe/3QH63YWA1BY5mHj7mKe+NGZpLZSWtKjguZYHGOm4z94bDdk+7Bz8LXmXgzitMTFxbXaIuCO0vGreIQQQgjRpZiMDd/pV1UFQytm4+nMiip8tQHAUaGwzq5DFa16HkWp2ZOhbOGfah4batZOlc77AxZP2+SPF5EhMv5ShRBCCNFqeqZGN7hB2TljMomzd52MNS3R2MLrcGun4dU1dG8VejiEc9Q0km7+A5YeQ9ADXgjU3wFaiKaS6UBCCCGEOC2JTjMP3zSWv83bTHGFF0WBScPTmTmxV0dXrVYwrJFX6sHtDZIUZyMp2kprrlRIjrUxsGccO3KO7RSrKtA/K7YVzwI6KqHMUSRd+2tsKVmUeww4zvsRqr8Kv7OHpEcWzSZBgBBCCCFOi65Dr2QHT9x+BsWVPiwmA4nRlla9yG6JSm+QfyzayrZ9ZUDNxfm1Fwxgyoj0Fm+QdpRRVfjRpUP57Psclm7IJz0ximum9Scj3t4q5R9P11V8zl44o5zgqSZgigVTrAQAokUkCBBCCCFEs1iMKpkJrX/R2xKKAl+vy6sNAKBmT4M3PttJr7Ro+qS2bLfY48VFmbj23H5cNjkbs1HF0FmiICGaQNYECCGEEKLb8ATCfPpdToPP7cwpp7Wzl+q6js0kAYBoPc8//zzTp09nxowZ/Pvf/26z88hIgBBCCCFaRFFAVVU0TevwvPUKCoZGpvyorTQVSAgAn89HSUkJiYmJWK2ts0ne999/z6pVq1i4cCGhUIjp06czZcoUsrOzW6X840kQIIQQQohmK6n2s2F3CXvzKhjQI57hfROIjzJ3WH1sZgMzJ/XmnS9313tuUK/4Dg9SRNcXCoV44YUXmDt3bm3wO3v2bO677z6MxpZdWo8fP57XXnsNo9FIYWEh4XAYu71tptxJECCEEEJEAFVV0HW9VS+Ci6v8PPGv73D7QgB8t7WQOIeFR28ZR4KjYwIBXdeZNCyNvBI3yzbW5NG3mAzcMmMwWYmda/2C6JpeeOEF5s2bh9/vrz02b948FEXhgQceaHH5JpOJF154gX/9619cdNFFpKSktLjMhsiaACGEEKKNmANlWMp2oipgqdyPxXu43esQ0nS251byn093sGR9PpWeYKuUaw6UUVFUQFjTeeTSNPql1Vxgl7v8rN1Z1Opz70+Hw2rklosG8NSdE/jlzWN5+scTOXNQMmpHVkp0Cz6fj7lz5+Lz+eodf//99+sdb6777ruPlStXUlBQwLvvvtsqZZ5IggAh2pE3qLE7v4otOeXsPlgu2d2E6MZUFcL711Iy9xnY9iml857Gs/ZDTErrXIQ31a7cSv7437V8tS6P1z/dwT8XbSPUwg2tzJoH11f/Jm3XPH4zI5q075/nx+M1HLaajcI27Czu8J2DVUUhNc5Gn7RoYuymRjf3EuJ0lJSUoKoN/26rqkpJSUmLyt+7dy/bt28HwGazccEFF7Bz584WldkYmQ4kRDtQFNhX6OLP722kotpfe+ycMZlcflY2Dov8KQrR3WgamPtNxpKzmcqlb2OMTsI+4Sr8evvtqGswqHyzIa/Osa37Symt9pMS0/yFjEGDHee4SymZ9wy23E1oSX3YWObA5a25AOrbIxattXfOFaITSExMRNO0Bp/TdZ3ExMQWlZ+bm8sLL7zAW2+9BcCXX37J7NmzW1RmY2QkQHQIgx7E5slDQcccrMBfeKCjq9SmCit9PPPamtoAAGo221myJpdPvztIp9lhRwjRahQFlOoCAnk7QTUSqi5FK9qP0o5jgJqmk5XsqHPMYjZgM7fsxoOuA0YTypHpNbrRgjtQc2FkMqpMGJIqQYDolqxWK7Nnz66XDaix46drypQpTJ06lcsvv5zZs2czatQoZsyY0aIyGyO3H0WHMJbtpWjeH4i76C5c276lsrqM2FmP4lcdp35zF6MosHlvKYFQw3cOPl2VwzmjMklwdlw2DSFEW9DR3BWYU/sSM+02XKsXEa4sQk0NEVbaZzRA13UmDk1j9fZCDhW6MBlV7rp8WIunx5h1L1VL38KUmo111AyqP/kzZ/WfTPnYLKaOziA93iZZeES3dd9996EoCu+//36d7ED33ntvq5R/7733tlpZJ6PoXWySXGmpq1vfXUhKclJcXN3R1WhzprAb38q38WxbCopK6nW/wuPo1S2/NAwGlefnbmTdjuJGX/PLm8fSJy26HWvVcSLld/xErdnupKTm7Xja3ftP6Hy/Xwo6prCbgMGBMeRGN5gIK60b8Delzf6QRkmljyibibgoU6v0tWZfCagqQUs8Vk8BbnMcqtFKew10dLafdXuIxDZD67e7uX3oidpin4D2JCMBokMoWpCw68iW7rqO5qmCKJ3uOC9G13XS4qOAxoMAm6X95ggLIdqPjkLAUDPCGTJGneR1OsVVfkorfaTE2Ulwmpt9oe4LahwqcWEyqKQn2DEbVCxGlYyEmuw9rXWzJWBNPFp5vLa0mvnF3TvGFKIOq9VKZmZmR1ej2SQIEB1CqS4iULCHxKsexbNjBZXfLcI5oy9+5WgOZx1FUTrlyIAOhDUds1Ft0l1VTdMZPSCZj1YcaPD5Qb3jSI3rencQhBCtQ1Fgy/4K5ryzHl2vmVP/6M3j6JnUeNBwIkv5bjBH4TLEoh3cxLaDFhasKeesEelcf0F/zB2cqUcI0fl0aK/wzDPP8PDDD3dkFcRpMFGT1k5RwKgHTvraoKbjD/ix+Q6jKDoKOjbvYVTCAATi+5F0/ZME4vtjHXcVCRfeRnDHt5hCLhRFx3R4M6aSHbWLzjqDkKaz41Alz769gV/+YxUfLDtAmevkn8NRPZKjuOGiAfWOx0db+OH0wZK7WohmMOjHUm0e7Z86+zmNR96j6iFUvWaDLW9Q45+Lttbe9AiGNN7/aneTb6oHq0qp/Pq/lM1/Bn3dXMJf/52ze2iYjSpLN+aTX+I5ZRlW72GMmhcAi68IU7DytNrlC4bZmVfJ9zuLySl2EW7kBomR9v+ZdUaBsMbew9V8v7OYfYerCYQbXjMmRFvqsJGAlStXMn/+fKZOndpRVRCnwRwow/vdXKLGz0L3VuDbvwHTiOmEVFu91+aXeZj/1S6u7V1EYO3bxF76U1Rdo2jRn4i74EfoPc9ER8FrTgQdFFWlbPEb+HI2Yy/Lw9prJGUf/wVUlaRrn8BnT++AFte39UA5z7+7ofbxB9/uZcXmAn5581ic1pP/KRkUhSnD0+mfGcfOQ+VUuQP0y4qjZ7IDp00G5IQ4XUbNT3j7YszJ2Shx6XhWvod93OVA68z1bfSc2xZjTslGiU3Hs6rmnH5rUpPeb3Hn4t28BNv42YQObqy5ydFjHGFNwesP1XltlSuApoOhCfcHTNEJxM24l6LXHsa/5Uu04Zfw8nfh2mQEgVD4pO+3unMpef8p7EPOJmrYuZQt+D+MCRnYp/6QoPHUn6c3GOZfH21n7Y6i2mPXTuvP+WMy60zwNIVcBNYtwDpoMhgteNZ9hG38bAKm2FM3shsJhjXe/3ovX6w+VHvsojN7csVZ2Zia8gMXopV0yNVHRUUFc+bM4a677mLHjh0dUQVxmpSQD1/OFvyHtqH53JgzBmAO+cFcNwjwBTX+9M4G3L4QroFJxBtNVCx4tqYMsxVDXBqhE+b9Bw024s66ksP5u/BsXYpn61IAnOMuJ2RrWb7d1uILabz+af3f1aJyD/vyqxiRHX/KMgyqQmaincxEO4qikJjoiMgFXkK0BkULECzJpWrlfAzOeDSvC/uoC9vhnIeoWnX8OS9q2nsV0L1VuLd8g//QVkLlhTjHz8SQFcJhsTJral/e+WJX7etnTe3bpAAAIOzz4N2xArRQTT+7+1suH/kj/ljgJi3BTmaSA1WtmV7ZUC4QzRSFKTEL97pPca/7FICoUdPQDE2bpnig0FUnAAB4+4tdjOibSPJxexEoYT/+w3txb1uGYjSjGM3Yx/ghwpZE5ZV66gQAUJMl7syhqfRIbPoUMCFaqkOCgF/96lc88MADFBQUnPZ7ExK6XwrJE7XWqvXWNQBl2i0UL3gegMTzbsSSllXvVfvyKimprNky+5XvAjw4/BJYNxeAuLOvwZk9hGil/iw0PWEwsZOvovyr/wJgiIohdtyFmKIT2qpBp+VQYTVlVQ1vBV7lCTT7Z9Y5f9ZtT9rdMbpX/+nEPuUacnd+R7iymIRpt+LsNRhoy8/ZiX3qteTuOnLOC27D2WsQ0U2czqfFjiIw4jyq13+OanUQO346ppiaUYRLz+5DdmYMRWUespKdDOmTgLWJufzD7ioIeki44HZsvYdSuPBFeiXbufeqEQzqlUCFy8+n3x/CZFSYNDyDAT3j6rXLMu0W8l99BABjfDoxI87B6Iht0vm/21k/6YGugycQPuFn4cR8wW0UvPZL9ICXlB88QlRW3yadozEd/TfVHJtzKho87vKGmtSertjm1hCp7W5L7R4EvPfee6SlpTFhwgTmzZt32u/v7inuOmv6L6s7l5KP/oY5rS+hymIKP3ie2MsexG+q+2ViNSj0THVS7fZx97gwrJqPYqxJh1f25WsojiQCyYM5PguQouhYS7ZR/vWbtcfC7kpKl/wX6xlXEzR2/IWLQdfJTHKQW+yq91xCtLVZP7PO+rNua9Lu1imrObpT/2kOVVH9yYsoZivmlN6Ufvkf1PgsnH2Ht9nv17Fz2mrO+cWrqHGZ+GJ6n/K9iqJgzFtL9frPsfYeiS9nM6VfvoF10g0E1Zq75f1SnfRLrfnZVld6aWorkpKiMY68lJDBTKVuJOaSn+FXbIwAiiq9PPbyKkJH5px/ujKH/73jTOKjjqUptXiLKF84BwDV6iBUlk/pt+9hHn0ZIUP9KZ8nSoiuP2JgNChE20x1fhYWfwnl8/8PQ3QiqiWKogXPk3jV4/jsaU1s6Ynt7pp9SVwje8LEOcynbE9XbXNLddYUoW3tmWeeoby8nKeffrpNym/3IODjjz+muLiYyy67jMrKSjweD0899RSPPvpoe1dFnIawOYboCbMw9p2AEnQTPryHUAMX5yaDwj2zh/PlmoMYDSWoZiuxl/8cFZ3SD56lofxxxrAX9/YVoGs4z5yFpccQSuf/Ad/edUSNnt4pggCTQeGm6QN5+rU1HH8NNbBnHL1Su0ZnIkR3EjLYiRoxDUNcKro9AeuuZWj2E+9wt8U5z8cQl3ba59R1HTU+i9ipN6D2P4uo4j2AQkBtncxgIYO99v9+5diF+8HC6toAAMDrD5Fb7CI+6rgpjAqg68RMvQFL9lgqPv87NX110wLGHklRXDKpNx8u3w+AqircecUw4h11L3ZDZifOsTMwZg1DV42E9q0mbI6M/VGOlx5r4/oLB/DGZzuBmqliN140iORYyRLXlbhcLj777DNycnLo2bMnF154IQ5H612vtMfa2Q7dLGzevHl8//33pxXhdKc7WQ3pzFG+qlB7AawoOrp+kiFwBcKahiNUidcUh6KAJVCG3xzX4PviLAHcOdsIpwxCUy2YK/aBwYTfUX/KUUc6VOLm+22FFJS5GT8olUE94065KLgxnfln3Zak3a1TVnN0t/5TQUc/Mqp4tH9q69+vhs55Ok71HgUN/bjEfQZFI6yfPJHfydq8/VAFf3xjXZ1jj90yjuwTbl5YAqWETNGEFRPmQDmawVInsGiwrke6cl2vSZtcWOGlwhUgOc5GgtOM0sC+L8e3vzmf3/G6cl+io1NY4aOsyk9CjJXkaCtNmVXWldvcEp1tJGDNmjU88MAD6LqOz+fDarWiKApz5sxh7NixLa5fRUUFd9xxB9OnT2fHjh3dZyRAdF3Hd9YnDQAAdDAoKt4j04V0HXym+EZvLBmjEwimjaxJkaeDPya7dSrdyrISo+g5tQ+KohCWlG5CdCj9uIvM9optWnrOk71HJYx6aC3+6EyqjIkkmP2Et3yGeej5zc6g0zvVydTRmXy9LheASyb1IrOB/Qf85mPrrwLmk49uhHWdQ8Vuth8oR9d1BvaMp0dyFOnxdtLjTx44HN/+bhSPnjYFhdRYG6mxp55uJToXl8vFAw88gNfrrT3m89WsGXzggQf45JNPWjwi0JK1s6ejQ4OAWbNmMWvWrI6sguhEOuPGYA2puZPaRSorhOhwvmAYtz9MjN2EUW38BoqlOpedVUb+9vEBCst3MLhHNDcOjCPxu7lYJ99EsBlpdKwmA9dP68eFZ2TVZCVzWjlJFU5Nge+2FfHPhVvrHL5p+iCmjkiFU90gEqKL++yzzxrMsgU10/4+++wzZs+e3ezyW7p29nTISIAQQgjRJnS2Hazknwu3UuHy0zPVyY8uHUp6fMN3f6ttqby8Mp/C8pq7itsOVvFVfCI3nzsCfwvyaBoUhZSY1rnjXO4K8OpH2+sd/++nOxjaO55Ep6VVztNZKGhYqg4SticRNEZhDlag+irxR/fsMjeuROvKycmpvfN/Ip/Px8GDB1tUfnuunZUgQAghhGgD+WU+/u+tdbUXizmHq3nmv2t48s4JOCz1v35dQZX8Em+dY9vy/fjMiQ3Mru8YxZW+OguNj9I0neIKb7cKAhR0jPkbKP7wz9iHTcUx7lIql7xCIHcn8bMeIhDXr6OrKDpAz549sVqtDQYCVquVHj16tKj8f//737X/P7p2tq2S55x8tZEQQgghmuVQUXW9u8XVniCHSz0Nvj7e6GNUn9g6x87ub8NatqvB17cXDQiENdyBMKFw47e/o2zdbNcvRUG1x6AYTXg2f0XRf36OP2cLisWOYpZNvSLVhRdeWLPbdwMUReHCC9t208LWJEGAEEII0QYsZkMjxxsehDf7K7mmTxkzxiTSM9XJjedmMTq0nkDeTgx6oC2r2gidPQXV/PGt9fzy5VV8uHw/+cVuRvRLqvfKkf0SSTvFouCuRtchENeH+EsfqDkQDgGQcMVD+KPSO7BmoiM5HA7mzJmDzWbDaq1J62q1WrHZbMyZM6dV04TOmjWrzTIDgUwHEkIIIdpE77Ro4qMtlFX5a4+NHpBEaiNrAnyOTDL7hbluiB2/NRmT5kUtDRKKzyasNLzBVFs6WOzh96+trh3N+Oy7g4wdlEL/HrEkxdlYvjEfXde5eEIvpozMwNSiFcedkylQgWvtR3WOuTd9gXXclQSNMhoQqcaOHcsnn3zCZ599xsGDB+nRo0er7xPQHiQIEEIIIdpAjM3EIzeNY+3OIvYcqmD0gCSG9E7AoCi4/GEsJrXehXPYmUEII+g6umrEnzi4fsFK7f5e9Z9SQNWC+HUjvqBGlClMTkmASpefuGgr6fH2k2YoOlaOwvrdxfXOsXZHIRlJUazZXsjkEenEx1iZPr5Ht0yZrCoQ2rMKf84WVHs0cefeRNmnL+PZ/BW2XsNR0kdF1OJgI0FCRxaoG7QgmsF46nTh3ZjD4WhRFqDOQIKAbk1H1cNoihEDIcJt8OP2BsPsL6ii3BUgK9lBVmJUp1nAJoQQHS3BYebCsZlcfORCubDSxz8WbqW4wsuFZ/ZEUSAh2krfjGhivLn4dq7EPGomatCNZ91H2MZdUbtHgKLAwRIPC5buw+0JctnZ2QzIOLbjrqKAuXwvnv0b+cw1iPE9jBgPLmdzYATzV5cDcNbIdK45tx92k8b+Ej95RS6yU+0kxDowG06YIXzcBa5BVZh2Rk8sJgOxTgtnj8zg8+9yGDsopdF0iV2dpoO1z3iiynIxJ2TgWr+Y2EmzCbur0FMGRlQAYAq7CWz4EGu/MwlHpxPetQRTfCbBlMERHQh0dRIEdFOKomMq3EbYU46l11gCmz/H0nskfkfLVq3XPQl8vDKHj1YcqD30yE1j6ZceedvACyFEY3QdwmENtz/M06+vodIV4LoLB/D6J9trLyTvubQvI4o/wbtzFTZ3JYHDewlXFmHLHoVyZCPF0uoAT/77ewKhmrvuf/jvWn5163iSk2v6XJPmpWrVPAIHt3JWnxIMS7cQ9FQx+ZwRfLheJRjSWLohn5F9Exjm/R6UDAweIzHb11HW6xzSsrJq66PrOiP7J7Jw2T50Ha6Y2pcvVh+korpmapPNYmT2uf3ITo/uVrtQn8hnjscx4SqK/vtLdJ8LS88hmMZcTkDvZougT8HgLcW17jPcm7/GPmgS7o1fYErMJOayX+A3tGz3XdFxZGFwN6WGgwQP76Xi839SueBpqlbOI1x+GEVpeWd9dFV8hSvAJysP1HnuoxUHULvhvFAhhGip/FI3la4AvdKi2bqvrM6d5L8u2oMy7geYMwfh3bmScGUR0WddSzhlSO3rCso8tQHAUXnFrtr/BxQb/rE3QFwmyt4VaJ4qvKOv4YWlPoLHve+rdflolSUkrvk7Q/b/F33vKnzVlbj94Tpl90iK4uEbxzJuUDKHy9y1AQCA1x9iw64ieiQ1PgfaoPux+IsBUPUQtkAxjSRV6bRMYTdVS99CD3ixZA2iasV8KNrV5drRUn5HFvEz/gc94MW98QsMzgRiZvxEAoAuToKAbiqsmrEMOx9jdBLBwgPY+o9D6TG6RcN2bl+I73cWM2/pPnbmVoKqYDxh+Nhpj6y7I0KIrqW02s/WnAoOV3jbfd9vo6Gm//UHwlhPyBxkNKiYtADhqpLaY6GKAtTwsaxA8Q3k4I+PrrvI2KoEUHxVtY9N7mLSY+v2y3abkcCg89EDPqg8THDcdbyywlPvBo6CQr/0aH506RD251Vxop055fiD4XrHAQyan/DmTymf+xRW32HUg99T/MZjmCv2Nfj6zipksGPrfyYJl/0Mx0X3ET35KpTo5IiaCgSg6kFCFYW1jzW/BwLuDqyRaA0SBHRTBj2Ab8MnhKqKsWQMwLtrNXrOmmaPBOjA3G/38ff5m1m0bD/P/HctBaVubpo+qPY1FrOBi87s2a2HhoUQXVdOkZtH/r6C/3trHY/+fSUb95W26x3d9IQo+mTEUFDqJjsjBttxG4Y9cs1g/KveJlxVTPTkq7H0GoFn89coRTtr65gSa+XHs4ZhsxgxqApXTO1DdtqxO7Ehn4vw8v+ge6swT7oeQ2pfjDu/5KrhKibjsa/7c0akYlj2T1SLHeIyMH7/X359eTI2U8OXBCZVYUS/xHrHB2cnYG3kPbpqxOCIJ+wqp+TNX1H+2csoJiuYrM345DqOjkIobQSBpMEEFBvKkIvwW+qnSO3uzK58qpa9gzE+nbgL70AP+qn4+EWsWnVHV61buvHGG5kxYwaXXXYZl112GRs3bmyT88iagG4qrJgx9xhCXEImao+RWLd/hSEhi1AzRwIqPEG+WZ9b59jCb/fz8+tH0TPVSaUrQFqCnXiHOeLukAghOj8deHfJ7jqbXf1r0TaeuXtSoxe/rc1iVLln9nBWbT3M2h1F3DZzCAZVITHGSlqcDVvSDdgHTEDLHI2j30TseZvRjluAqioK4wckM7hXHOGwTozdVPtcSbWfJ/61jjumziI9u5znvjNyz8W3EFO9j39t1AiGNCwmA9dc0J/+6U4M3omQ0p9qzYotZxmW6FiCx/fdCri8IVRVIcpi4NzRmazZVkhRRc2OxlE2E1ef1w+1kShKw4C5zwRsBzbi3bMGgPjpd+O3d738+vpx6S40PTLvnQYcmcRf/GPU5GwClkQSLnWgOOLxqZE5HSgUCrFw4ULeeustiouLSUpK4tprr+XSSy/FaGzZpbWu6xw4cICvvvqqxWWdigQB3VggYSBKgkYIFXXwBfhb0HmZDSp2ixG3L1R7LDXRjlFRyIi3k3FkkxgJAIQQnZGug8cfrHMsEAyjtXOnFWs3cfH4Hlx8Rg8UqB05VRRw6QlYeiWhhXXCphjU7LPqjazquk7Ukc3Gjq/6pr0luLwh/vRpEdEOCxXVXn75xh6eu/8sfpAR5uKzg0RHmYmLOhI4DDwPTVeJAgwJVxA87vvBH9L4cm0uH3y7F4vZyG0zBzMiO4HHbx3PoSIXYU0nKymK6JPsEKzqIcIHvq8JAAxGCIco//TvxF3+EH57Wmt9nKKdhBUjes8zCR75fQymjui2WaFOJRQKcd9997Fp0yZ8Ph8ALpeL5557ji+++IIXXnihRRfv+/bVTJm79dZbqaio4Ac/+AE33HBDq9T9RJEZ0kYQ/ciPuKV3L+JNPu65cjhGg8IV4xLonxnNRWc0PPUnpOnkl3nYk1+FO9DwfFEhhGhPqgKzp/atc2z2Of1wWBre1bfJ5arKaSdD0HUdXdNr+89qX4gPlh/gl/9YyWuf7aTcXbMO4HSmVhYfuUOv6dQu4A2GNFzuIHFRZnokRRF73MjB0e+EkKaTU+JjT0EVniP99b6CKt7/ag+hsI7bG+TP722ksMJLlNnAwMwYhvSIPWkAAKDoYcIVhaj2GJKu+19ip92G5nND0Nv0D0p0Ksf/PkZqAACwcOHCOgHAUT6fj02bNrFo0aIWlV9VVcWECRP4y1/+wquvvsrbb7/N8uXLW1RmY2QkQJySxVdM5cd/Zvg5N/HS/4zE/80rXD71HLQYS72FdZqus3hNLu8t2Q1AcqyNh24YQ7yj/Xe7FEKI4w3qEcuvbh1PXombhGgr2WnOFo1eFlb6WL6pALcvyNkjMuiZfPo7yCoKLFmXy4Jva+7+LVmTS4XLz92XD0Vt4q4rIVc5o/on8+36fC4eFc+8VUUA9Ex1Eh9dfzFx7fs0nYUrDvDhsv0AZCRF8bNrR1NSWffiRtehvNpPamzDOx03JKxaMAy/mIRBk/FZklB7J5OYPgC/NZl2X5EtRCt666236gUAR/l8Pt58802uuOKKZpc/atQoRo0aVfv4yiuv5JtvvmHSpEnNLrMxEgSIU/NUECzLp2z+HzDGJhMsySUYl4Y5fTAhte6XQkmVvzYAACiq8LJhTzHnjsxo71oLIUQdBkWhV7KDXsmNp7VsqkpvkN+9uhq3t2aK0Tfrcvntj86snRrZVIGwzopNBXWOrdtRjMcfxmFp/Cs6pOkUVXhJMLop2PAdg/qM5I9XJaGvnUv81Ev5fLufH88aXm9H4uMVVfhqAwCAvGI323PKyUysG8yYjSopcafXLoCwaiVsqVkIrGHAZ2n/AMAcrCRstBFWzFjDlfiNMTJtVbRIcXFxi54/lTVr1hAMBpkwYQJQM+rSVmsDZDqQOCV/fD8SLrkXPRQgWJKLpddwrONn1wsAAPzB+lvHuzzB2r0FhBCiO8gvcdcGAFAzDSfncE2mFIuvEEvlflQFLOW7MAfKGi3HZFAY3rdu5p3+PWKxmRv/0g+GdT5Yto9vN+bzyaZqfvt9InM+LeRwRQCjM55xg9N49KaxJJ9kFABoML2nxxeiR4qDn107iuyMaEb0S+SXt4wnwdn80VxNC1NZWcnGfaWUl5cTDLfPNFFzsBLXFy+jbfsCa3UOpW89jqUqp13OLbqvpKSTZ4c61fOnUl1dzR/+8Af8fj8ul4v58+czbdq0FpXZGBkJEKdkDlbgWv957eNA7g6oOIQS36/eHZWUOBuDe8Wz7UDNl56qKozsnxTR8weFEN2Pw1b/othpN2NSQnjWfYx3x0qiJ86iZOm7xEz+Aergi2hwir8O0yf0pNIdYM32Qvr3iOW2S4ZgOMl9k8PlXvblV2E1G9mwq+auY2EZbN6v8MRNV5Iek9Sku91pCTayM6LZd2QPAKNBYVDPOAyKwpCecQy6cSyqUlPH5nbhgWCQ0L7v0PN2oyRPRt37BZ6sURh7jyHK0rx9Zazew2gGC+DEoAcxewvx29PRTrivGTZFYe0zmsqvX4flYE7rh2apyWajoGMOVeM3RmMMe4CaPQGEOJVrr72W5557rsEpQVarleuuu65F5Z9zzjls3LiRyy+/HE3TuO666+pMD2pNEgSIU1Iq8/Ef2krUiPOx9h5F6cI5uNd/hn1qJsETRgPMBoU7LhvKrtwKXN4AfTNiyUy0yxxQIUSXp6hKbUaftHgb10wbwLtf7kLTdM4akU6f9GiCuoGoM2YTyN1B5bdvY+kxBOPAKQSO9IEuf4i9eVVUeQKkJUTRM9lBXJSZ+2b0JDQthbAtAXuglLAGIbXhO/m+QIgBPeJZ8O3eOsfDms7m3YVkx+l4LSmnbI/VaODeK0ew61AlHn+QfpmxpMcd69NVaHHffbDITbRPw7bnW/ocWo0W8OFOHcnuncWcPSL9tIMLi6eAsvnPYIhPxz7zHrSdyyn69i3ip9+NnjmmNhkGQBgj1rR+x97bYwhhkwNFAdPhLVR8+xbxl/4E94bPAQXzuFkNjnALcbxLL72UL774ot7iYKvVyvDhw5k5c2aLz/GTn/yEn/zkJy0u51QkCBCnFEwcQNI1vyIUlUrAYCfp6sfQrHEEGukso21Gxh6/sYwEAEKILkxHZ29+NYuWH8BuNTBjYm+yEu1MG5PBmAFJgE5slBmDoqCgEy4+QKiyGFQjgfzdKJX5KHF9qPKGmPPOBg4UHNt99+rz+jF9XCrB9QsJHN5H7Dk3UbroeZxjZxDMnopBVTlxWn96YhT7C+ru4BtlMzF6QDKpGfEErXFN7ndjbCbG9a+/EVhrUFWFZZsLyYxOZEJ8BlpZLmpqf74rcbJsxwHOGJKKxahiCVbgM8YCNXfoLaHK2seNCRzaRt6/foHmc0EjC6it4SrKFs3BnNYPa59RVC17l6SeQ/HF9kV1JqIHvBT95yFAIX7G/xBS228jM4OiET6SoUlFQ1GofXzi86JzMRqNvPDCCyxatIg333yzdp+A66+/nksuuaTNc/u3pq5TU9HuKj1B9uVX4QuG6ZGcSrrBhgL4nL2aXaaCjqoFajJHaD40gwW9kQ3MFEWh2hdE0+puilP/dTV1VVUFp9UkU4+EEK0qt8TLc++sZ8LQdJx2E+98sYtbZgzGZlY5UFBFhTtAdno0PeONOPCgqeAYdwlRQyYRLjqA7qsGXWdvfhUHC6uZPqk3FpNKOKzTK9FMiUcndfBZhN3lFL35K9ToRNzx/Vi4dB+KonDGkFSSYqzYTDXpTB0WI+MGpbJ1Xymb95ZyzphMLGYj3289zJa9JRRP7MWEgQnYbVaMmr/REYX20CfVxijXt2jl+SiDp6Ft+4JzU3dRmNYXg0HFVLqT0o9eJP6ynxGM6YUhfyNlS/5D/OUPErIlElItddrgt6eRMOthiv776JEAAOIuuI1w1hj0Ey6afYZo4i/9KZo5irA5muTMQQQdNZuVha2xWLIG4tmxEtVqx5DUk2ATszG1lMVXRGDXSkxDzidssGI8vIlQWT6WHsPxO7KweAsJ7FmFafA0go1MUTIRIIgZBQ1VCxLuwJ9xJDIajVxxxRUtygLUGUgQIBpU6vLz1H/WUH4k37SiwAPXjGJYr7hmzw1V0DEe3kgwfzfWURfjWbsIa8/hBJIH13ttWNf5fnsRr32ynVBI44opfThvTCYWY91O3h/SWLwmlwXf7sVoVLn54kGMH5jU6C6WQghxug6XebjynH58uHw/FdV+RvZPosLl58udRVQdyemfrpZh08NUbfoKa88hmKMT8G5dij9/L9FnX4uOQkW1j5mTs1m6IY+yKh+3T00ka+d8VOeVKEoVChA18EzcRXkcLguSmeJk0+4SfvPP74h1WLh79nCy0xys3l7Eqx9v58IzejK0TyI7c8pZt/PYju5vfLaTfQcTuGtmPwKr52HtM4ZA4qB2/9w0TSczNY5DhSOIn9iP1zcZuWVSH/ICUYwYlEpUqJKyT/+O5q2mdN7TRE+YTdm3b4GuUb30LaKGnY01fTCeNQuw9h5FIGkwBj2IL2cT6MeSULi3r8CZPoSAOa5eHXyOrNr/e6N7AzU3mMjbiGfHKmLPvZnq7xZS9cU/cUz/CQHadjTAormo/OgFgqW52N3lmNP6ogU8VC57D9XyEfEz7qV88SuEq0uItUShDjwf7YR8GxZPPq4V7+GcchPhor0ES/MwDL2QcDuOZIjuQcaaRD2KAmt3FNcGAFCzKOyVRVtbuPmXjh7wU73mI8refAz3+s/Rgz6UBsatC8q8/GPBFvyBMGFN5/2v9nDgSOaN4+0/XM28r/cQ1nT8gTAvL9hCQfmxzWg0HQ6VuPlyQz6L1+axv9BF6DQ24BFCiKQ4G/O/3lO7CdeGXcXsya1kxaZ8kuPsrNpcgMmgo3kq8eXuoGr1R5R98W/CrgrMKb3QLNEAZCU78QdClFXVzCO2mRS0inzCHz9NyQd/IuypRut7FoqrhFTPLipdAb7fVghAhcvPn95Zz+EKHy99UNM3Lly6D13XWbezqF6dV24vZf+mTbg3LUEP+hvsZ9tDz2QH1dZU/vh1iG05lTy5JESOL5oR2fH4jDHEXfYgqs2BHvBR+c0boGuYknriHDed0o/+WvNdseEL9FAABR1TZQ5V374JKMSfdxOGqFgCh7bhX/8RRqVp30+6rkPaEJKufhy931Tir3iI6Kk3t3kAABAwOIg574coRjOezV9T8fk/CZbmYes9Es3voWTeM4SrSzCn9cXQa3S9AABA0cL483ZS9u5vKPvoL2h+N4rewAuFOAUZCRD1qKrKnrzKescrXQG8/hB2U/N22NRRMWQNx5Tci2DRAcwZ/VEyhtZZyHXU8QHIUaVVPiCmzrGjX6Z13lvlJyPejqIorN9Twl/e31jn+ZsuHsSUEWntNPArhOjqTAYVty9U59ieQxX07xFHtSeApsOrK108frYRx/BzqF77CYrJgiUtG0NKX3ymWAB6pzlZvOZQbRmvLq/gN+dfgXHZyzXnGTeb+948zAPT/odd5Qb2l9Tthz2+EBXVgTrHTqzX8ar9Oqk9BqOkDULvoB5PVRXOGJjMwB5xVLj8RNmMJDgttWsWgo4MoideScWXr9a+J+bs69ATemBKzCJYcghL1pDaNoSiM3COm4kpqQcxw89GTRuEa+mbWEdehF9v+ndT0OgkGOMEHXz2tFZudeN0HUJxvYgacT6utR8D4MvfTcLFd+Pdv6H2dTFnXYvPHN9gGQFnBtHjL6Ny6VugGogaeSFeyWwkmkFGAkQ94bDG6P7189z2THUSbW9+rmiDEsa/8ROCRTlEDTmbQN4uQlu/QFXq36FKT7BjNNT99cxKdtZ7XWZS3U1/TEaVtCMb3VS4A/xz4ZZ67/nvp9spqaofZAghREMSYyxknNDXnDksjfPHZpEQbUVVFX56bjROc5jqtZ9g6z0CUHBvX4lv5wosnprNwFSo07f+/MJ4TCv/DakDMMQk4/78r9x6dgK/W1jI3OUFDOtTd8FuaoKd1HgbpuOmRaqKgqGBDcFUVSE5MRp/zhZCO7+t7WctngIs/hIURcPqysGoeeu9t7Xpuk60zUiPpCgSHMcCAAUdQ8FGKpa8Vuf1pYvmQP5WQpXFRA05G/+hrYS2f42qaIRUG4YRlxDOGoNiMOK3pxF14f/gt7TN4ubWphKGvStqAwAUFcegSZQseK7O60oXPofFdaiBEsBUupvKpW9j6zcexWSh4tO/YQ672rrqohuSkQDRoKG94xk3KIXV22uGoqOjzNxx2dCT7j55KmHdgKXfGZhSekPGcCy9RqDGpePXFSpcfg5XeHHazUSZDSRGW3jsh+NZuGwfPn+ISydn16QaPUFmYhS/uHEsC77di91m4rLJ2SQ6zeg6lFT58DcwfUnToajcS9IpNtIRQggAi0Hl/h+M4PPvD7Int5ILz+jB0F5xWIwqyXE2EmNt7CovJqVXInFTr0Pze3COuQjN66Jy3WdYhpxXW9aQXnGcOTSVVVsO881uH5ePvxpTz5FEmzUCh/eSm2umb2YMZwxNQ1F0brx4EEs35NE7PZqLz+xJgsPMY7eMZ8GyfXi8QYb3SSDGYea1j7fXqfO15/YirV9PFMs9qAlZ+HUFi+6h8rO/omsaztEXUbz4XyRc+hOUtBEdsouuOVRFxXFTgGoyI81B87pwb19B4qwHCcf1xtJzGIaELHxHFv6GVUud7EcBuk5fbgh58R7cDEDstNsxJmYSLthVM3UsrS+xF95F9bK38e5ZQ7g8H8WZWS95hu5MIW7arSg9xxLlLkb3VBA0RDV0OiFOStG7WCqV0lIXWjee052U5KS4uP7c944Q0nQOl3vxB8KkxtuJsjRvGtCJFEVB1/Xaf4ur/Tz/znrySzxER5l54JpR9Ew60qEpoKCcMuOPoiig6MevFeNwuZdHX1rZ4Ot/9cPx9EpxNPhce+lMP+v2JO1unbKao7v3n9C2v1+KqtRcLDfQHykqqIqKEvahBL2EzLGoeghjqBr/Cekuj+9b0xPttVl/1CM3WVz+EIeKXOw4UEZ8jJURfROJsZvRj/vZKSqg1/SNDqeNDbsK2bC7BHSdEf2SyE5zYlAUVFWp8zO3evIpefs36KEA9iFTsJx5dYdukmXxFuJe8S5Rk6/Db0nA4s7DvXoh9knXEjTHoet6vTYc1VX7ElOoGrUyj2BCP3TFgLnqIErAheZMJWCOxxSsQq0qIBjfB02pe6/2aJuP/0yOfpd2Z639s25uH3qivLy82hShGRkZrVImwJIlS3jxxRfxer1MmjSJxx57rNXKPp6MBHRnioJywoXx6TCqCpkJrf/lcLSz0nUdFPhw2X7yS2p2bKxyB/jb/E389rYzsBjUmp0qm7CgTdf1enmxk2KtTByWxorNBXWOD+gZS3qi3DURQpwe/SQBlK5BGA0wg8kMuk4YA+EG8t0f7VuDYR1/SEOH2k3IAOwmAwMyYhiUFVt77MRz1/TrNcdsViP9MmLonxkLul7ngvn4/yuKRri8AD0UBCBweA/2kLtDgwC/LQXbeXfip2aqqT8qA9vU2whgrg22ulvgGjQ6IWFgzQMd/M4edZ83RUNC9EnLOP4z6e4BQGe0bds2nnrqKfbv34/JZCIYDNK7d28effRRBg+un/HwdBw6dIhf//rXvPfeeyQkJHDzzTfzzTffMGXKlFaq/TESBHRDIU1n64FyPly+n5R4O5dM6k1qbNtlPWjsLk1TaDrsy6+76U1xuZdAUKsJAlrAoChcfV4/UhPsfLTiAOGwzrTxPZg2PguzQZYFCyEapygK3mAYg6Jgau3+QoE9+VW8+tEO8ktcjB6QxNXn9yfJWXdaS5P6VQXW7Szi/SW70TSdy8/Opk9qw3c5jSEv3l2riBp5PvahUyib/yx6dQmKNalDpgMdFcR80sdCdCbbtm3jjjvuqN0t2O+vWWO4Y8cO7rjjDl5++eUWBQKLFy9m+vTppKamAjBnzhwslraZ8iZBQDe0t6Ca59/dUPP/vEo27y3hyTsn4LC07o+7whNg455Siso9jB6QTO8Ux2nn5zcocMEZPfj3h9tqj501IgOntXXq6rQauXRiL84dnYkONVOa5KaJEOIkAmGNlVsLef+rPURHmfjhjCH0TXe2Wt9RUObl6dfX1l7kr9tZTG6xi9/eeka9vVBOpbDCx2//uaq2rO37S3nijgmkx9Xf0T1oiMI2+UZ01YjPYCf+6t/gN8Z0aAAgRFfz1FNP1QYAJ/L5fPz+97/n9ddfb3b5OTk5mEwm7rrrLgoKCpg6dSo/+clPml3eyUh2oG5GUWDXwfI6x6o9QQrLWzcDRCCs8dd5m/nPx9v5ZGUOT766mv2Fp5+dQNdh/MBk/ueqEYzqn8T1Fw5k1tTsVq2rpunYzQaizBIACCFObW9+Ff/5eDtub5CCEg9/+O8aSk9IzdkSOYer693lLyrzklfiPu2y8kvcdaf/6JBX3HhfHDRF107/8RljOyx1qBBdUV5eHvv37z/pa/bt20deXl6zzxEOh1m5ciVPPfUU77zzDps2bWL+/PnNLu9kJAjoZnSdeqnsDKpCrKN1h5KKK3zsya2bw3r19sLahW2nw2JUufDMXvzkqhGcPzqdaKuptaophBCnLf+Ei/FQWKe0svVupJyY/vioPbmV5JZ6TqusOGf9vj0+WnaOFaItFBcXYzKd/BrFZDJRXFzc7HMkJiYyYcIE4uPjsVqtnH/++WzatKnZ5Z2MBAHd0MAeMVxwRg9UBZx2E/dfPYoER+vOsbRajPVyUyfF2lq0QCkc1mRYWgjRbCGtZvfc73YWc7DY3aSkAg058UaK0aAS42i9C+vs9GjsJ0x5HNAjjt25FTzz37W4TrIB2ImyEqO4beYQLCYDJqPK9RcMoEdSx2Y+E6K7SkpKIhgMnvQ1wWCQpKT6ey011TnnnMOyZcuoqqoiHA6zdOlShgwZ0uzyTkbWBHRDdrORa87ty8Vn9sRkUImyGFr94jrBYeLWmUN4ZdFWNE2nZ6qT0f07dnFZR1MUsATL8BnjMehBjMFq/I3s+CiEaF2KovD9jkL+uXBr7bGfXjuaoT1jT7usvunRXHfhAD5ZcQCn3cyU0Zks35THrLNaZ6pivMPM47eMZ/Hqg+zLr2Jgzzh04LNVOQAUlHnol37y7DBHGVSFy6b0YUSfBHQg2mZs1X5Y03WqfSHsFmOL9okRojvIyMigd+/e7Nixo9HXZGdntyhd6IgRI7j99tu57rrrCAaDTJo0idmzZze7vJORIKCb8IU0yqp8OO3m2i+BGFvNkFXbXJgrTBiURL/MSXj9QZJjbae9oK27sbjyKHn/SeIuvptweQEVm74k9rKHJBAQoh14AiHe/HxnnWPvfrGLgbeOw3iaCQs0XeebdbkM65uI2xvkjc92oCpw0Rk9sZtbZ7+UlFgrYwcmU1ThZdnGfFzeY3cXDQaFjfvKCGs6WckOEqPNKCeZu68oCo4jIwut2d+XuQL8++NtbN1XRq+0aO66YijJMtWoW1MUMGh+QkrNNDMTAcnWdIJHH320Tnag41mtVh555JEWn+PKK6/kyiuvbHE5pyJBQDdQUu1nztsbKCh1Y7MY+ck1o+iX1nCKuLCms+9wNTmHq0mJt9EvIwarqfEvNVXRsFTloKhGQtZYjJ5iNJMdnzUVUEh0msEpHQRA2BKNtdcIyj54FoDos64hZJJheSHag6Io9ebam4wqiqKgoGNxHSRkT0bRNAy+UvzOrNqdWF3+EAVlHnIOV2NQFcZmR/PoxXHMWVzGpL52pvdP5L2NIaq8QVZuPUyc00L/zNjaC++m1Q9UVSUc1mofa8DhUnedAGDsoBQ+XZnDmh1FAKgK3HvVSEb0iW9SYoOQpuMNhLGZDRhbcudegUXL9rN1XxkABwqqeGXRNh66bjSSYbl7UhQwl+/Du/UbLGdchRqoxr3qfeyTrycgN7NqDR48mJdffpnf//737Nu3r3afgOzsbB555JEW7xPQniQI6OIUFT777iAFpTUL2bz+EH+du5Gn7pyIzVT/zvzmA2W88O7G2sdXTOnDpRN7Nnr3yOIppOitJ7APGA+Kgmf3GhKm3Yol04FflQvc42kGK6akLLw7a3YpNiZkElQkQBKiPdhMKrfMGFTbv6kKXHNBfwyA1VdI0Vu/JWrE+eihAJ5ty0i+8Sm8lmTCus6mvaW8v2QPFS4/qqow+soEQl88xz2jr0TN3YDhYBk/mflL7vnrCo5cwzNuUAp3XDoYQxNGGaq8QdbtKmbXoQrGDUphSM84jEaVd77YxcRh6QRDGkXlHjKTHWQmO/nL+8f6aE2Hv8zdyNM/nkT8SdZ26cDuvCre+HwHhwpd9Ex1cv2FA+mT5jjpKEJjAiGNzftK6xzbk1uBLxButd3jRediCPvxbPkKz7alhDwVhIoOEnaXYx8yBSU1PqKn+55o8ODBvP766222Y3B7kSCgy1M4UFB3s61KVwBfIFwvCNCBBd/uq3Ns0bJ9nDMqo9E7Wn5bCnHTbqV88SsAxE6ajZrQE58EAPWYPYcpWvYu0ROvIliaS/nHfyHx+ifxmeQOihBtTddhRO94nvmfyeQXVZMSH1W7SaLfmkzcBT+i/LOXAIif8T/4LDUL98qq/RRX+Khw1Wz4o2k6/13r50ejpuNf9z4Asdf8hv+uKa8NAKAmG9qsKX1IOcVGjLoO73y5m5VbDgOwasth7rpiGOMHJpGdHsPCpfuwWYzEOi3kF7soq/LXKyMU1skvcZ80CDhQ6OLp19fUPs45XM3vX1vNr249g55Jp79DusWocvaoDD74Zm/tsTOHpGKXAKDbCqkWrBN+QNhTiX9/TSAae+7NhFKGSADQiIyMjC558X9UZE/i7gZ0TeeCM+puOT5haCqx9voprBQF4k6Yz2m3mjCcZGdeQ9hLIG9X7eNgWQG6qwRVb3r2ikjht6eRdN0TqEOmYZt0HYlX/VLWAwjRjhRFYXDvBEZkJ9TZJd0Q8uLPP9aP+fN3Y9Bq5vOajAbUE7rAoekWwoV7ah+HS3LISKibitOgKpiasA6q0husDQCO+uy7HEBhxsReZCQ58PpDuL1BbrlkCKu2FDRYjvkk0zbDms7n3x+sd1zX4dsNeRiaMX9H1+HcURlceW4/0hOjmD6xF1ed2092FejmDH4XoeJDtY/9uTsxhE4vba3oOmQkoBsY0SeB+68eyfdbC+mXFcPo/kk0OEKtw+wpfdhxoAxfIIyqwI8uG4rdrDYa5RsC1XgPbCT2rB+g2qKpXPkBlqxBGMJeNGPD6w4ilaYY8TlqArKw0UzQ2Ho7jAohms8QdOHft574S+5HDwepWvYOtmHnE7LaiI0y0TstmhH9kti4uxhVVRiRYUDbl0/SNb/Gd2Aznh0rOWPKMOY5LZRX19ypv+GigcQ7Taf8G7eYVGIdltqRBoDs9BgUBRKdFh6/eSyFlTUjERt2F9MnI4YdOXU3fIx3Wsg4yd38UEijoJGNxnKLXIBCczojh9XIjDN6cOG4LAxqs4oQXYiJANUr3yPsLif2nJvx5+3Au2sV9gHjUdJHy2hAN6ToLUns3gFKS131dlrsTpKSnBQXV5/0NRo6Fa4gFpMBh/VY+k+DQUXTTp5rX1Gg3BOkqNxDnMN6yqwTANZQBaAQNEdj8ZcRxEjQFHN6DTuFprS7u4nENoO0u7XKao7u3n9C45+zNVSB3xiDgo45VIXPGFv7nK7rFFcHKKn0YjKq9Ex2EK3XvMaoB1CDHgLm2JoFxKUeHDYTKbFW1CZmHdpzuJo/vb0ejy9EWkIUD1wzksQjm3y5/SH++OY6Dh7Zcf2KqX3ZfaicHQfKuXhiL2xmA3arkd5p0WQmRjXYWyclOfnvJ9t454vd9Z676eKBnDMyvVtewEViX9LWbbYEytDLDhFKHYoh6EEp3kU4dQhhtWOzQrV2u5vbh3Y3MhLQxVT7Qrz9xS5WbjlMlM3Ej68YxuAesQC1WSdORtch1mYi1tb0i3ivMYZ9h118uHwzFpOB6RN70TOprVKPCiFE6zt60a+j1AkAoGYaUXK0heToY1N+fNS8JqSYwVwzF99hMTY5f//x+qU5+f1dE6n2BkhwWuukU95fUF0bAADM/3oPA3vFce9VI3hl0Vaq3IHa5x64ZhTDe8c12PeOH5TCN+vyOFx2bOpGRpKDkf0ie/8WcXr85niUtJpFwJrJiZIxRn5/ujEJArqYTXtLaueXur1Bnn93A8/cPanBNQCtJb/My1P/WV3bEazbWcxTd02ovZMlhBDdgaIoLdr1vE5ZKuSXesk5XI3ZZCA7PZr0OHu915VV1881nl/sZvO+0joBAMC/PtzaaOa3uCgzj9w4ltxSF8GQTpTNRFqcrdX2NRCR4/g/AQkAujcJAroQVVXIOXxsOExVICPRRrUnQKz96I+y9Zdt5Ra563QEobBGfom7UwUBqqqgKEqTRkOEEOJE5kA5evE+9IwRGCprFtkGYrNPOb3SH9IwqirHp+QvrvaxN6+KfyzYUvv+mCgzj94yjqQT+s34BjbfinNaKCqrvxiz0hXA4w9hMzWcJchsUtmTW8nCb/dhMqrcPH0Q4wYkNXnakhAiskh2oC5E03RG9qtJa6cqcN+FSfxs6GF6xqmYCjdjKtyMorR+2B7XwMV+bCcJABQF8su9/HfxLn732hqWbz2M2y+Zi4QQTacoCnrRHso++jOB79+mdN4zuFYvwqjVv0t/VCCk8e2mAn758ir+750N5B25aHcHwny1Lp/3l+ypE0BUugN8t/UwygkX5L1To+mTUXeKUX6Jm9EDkuuds09GDDEnGfXdV1DN/K/3EtZ0fIEwL32whcPl3qZ8BEKICCQjAV1M/8xo7v/BSJZuzKO/00fwy/n4inbgz9uJffBkLEn9CKm2Vj1nrxQHs6b2ZcG3e1FVhesuGEB6fP1h7Y5QWOHjf//1Pf5gGIDdhyqYMiqDGy8YQEs2yxRCRA5d19GzRhE1/DzcG75AtTqIPucWvErjiyF3HKrg3x9tB6C00sfTr63hqbsm1i4uLquqH0DsOliBYbJCKHQsOoiyGPifK0ewaU8pyzblkxpvZ+roTBJjrew8mMbKzTUpQ+Ojrdw2c8hJdwFu6Jzl1f5O018LIToXCQK6GKOqMiI7nlH9EjCgESi7ANf6z1GtDqImXImvlQMAALNR5ZIJPThreBqKohBjN3aaeYI7D1bUBgBHfbM+j4sn9CS5gWF2IYRoiKHiIJ7ty1CtUWg+F/69qzH0m0JYqX/nXVUVdh6sqHPM7QtRWuXDaTeTX+yid3o0+/PrbuQ4fnAKoVD9KYsxNhNnD09l6sh04FiSh1unD2TGxF74A2FS4+0NrgU4XmZy3TSiZqNKWsLpbxQmhIgMEgR0UbqmoxRtw7XhC0yJmQRLcvGs+xjT6MvbJpWXTu0wdGcJAAC8jUz9aeiLVgghGqXrWDIHEX3OLfh2r0IPh1AaSYyvaTp9M+pmWLOYDSSavCSYPEwclkaFy095tZ+KI/sKjOyfxIi+iSc7fb01TQZFIT2u6Td2shId/OLGsSz4di8Om4mZZ2WT4DR3qj77lBSo9oYwGBSizMZWW6gthKhPgoAuStdBiYrDMepCrGNmouVtBSCsNr6tfHMEwzrBsEaUxdApv0gGHEmPerzs9GiSYlt/REQI0X0F4voQdd6deFUbhoHnAhBWGu9PB/aM5cpz+rBw6X4SYyzcfkEWykf/i3/wZMaNvYpyT5hhfRKpdPkxG1VS4u0EQxpBTcfUSnMVPYEwhuPKUhUYkBHNL64bDYqOrrXspo2igKqF0FQjqh5CUWrWQvg1A1aTodXTUFR5Q3y86gBfrD6E1Wzg6vP6ccbgFMyN7GofKM1DJQqNmgxI1mAZflMcuuxrLESTdEgQ8OKLL/LJJ58AMGXKFB566KGOqEbrU6DCFUCnJl3b6TLrXvSQFT3gxR8MYrcY0U02dL2mQ9OAclcAk0Ehxm7Cb0/HNG42pT4IxwwjyezCEPJgNULQ48ZliCHW4MFrjEdRwBT24FdtlLuCGAwKBlXFFwgT5zBhUBQUNMxlu9EdSWgGC5TsY68vga15AUb3spOUnFgnv3VnkJXk4KfXjOI/H2+ntMrHuEEp/OC8fq32JSuE6CQUqHAH0TSdOIf5pJd5iqJjDHsJqnaMmhdNNddeKJ7sBKUBE/6AnzinhUp3AE33owDJdo3ygJFgWCfFoaOHdaJMJmac2ZOZY+Jxbf2W0OJXMCRmYBk5A7+m4LQacaLj8YXYlVvBlv1lhMM663cVMWtKHwZlxaIe108Zwj50gwkNA2bdS0Bp/EZGSXWAT1YdYOmGPKJsJmZN6Utqgh2zSSUjIQpjE3f3VRQwaTXnMug16UiPBj6KAqbSnYQK92POGIhWeRh/4QFM6QMorAjzVX4054/vcVrTLlU9hKKFCBusmDQvIYO19vsNBT5edYDPv6vJzOTxhfj3R9uJdlgY0Tv+WBlomF25KEYThz/8M/Hn3oAWnQGeckrm/4G4C+4glDZCAgEhmqDdg4AVK1awbNky5s+fj6Io3H777SxevJhp06a1d1VaVTCss2R9Hu8v2Y2u68w8K5uLx/fA3MSLZouvmOov/4l6xqUYq8sJ7F1POGMAhtgUSB+O2x/mva/38c26XMxGlR9eMpjxA1PYfNDNX+Zu5Ipx8Zx5+F3iz5hO+foviBpwJk6bneq963GOmU44FMa1ehGHsi/jqbkHMKgqF0/sxeY9JaQnRnHNef1INFRRsuh5TAmZGGJT8W5bysDLH8QXYyJhw7uYxs2CpL5t/EmeHlWBob3i+N2dZxIIakRZjZLySohuRtN0lm09zH8/3UlY05g2vgeXTe6NzVT/wl7XNYwFm/Ht/g7HxKvxrl2EMSUbQ+ZQDL5K/I7Mmqk3mk5RpY9gWCM13s6OnDJe+mALg3rFE+s089XaXAyqwvUX9MOgBXn9yxyCIY3bz09nAhswJmRhzhhI1eKXcPYbQ4XRRLD4EHrJfpSUIei6wvbcKp59Y23t3Xir2cBlZ/fh/95az8+vH8OgrJopRcawh8D6RZgSs7BmDKDq85dxTrkRvyOzXvtc/hB/fGMtxRU1WX8qXQH+/dE2LjqzJ99vK2T6hF6cOzq9SUGAuXwf1d/NJ+a82/Dt+R7CQQyDpxFWzFj9JRR/8H/ooQDR42YQLMvHu3c9ht2rSTvnh+z9rpTvtxfxm9vOaNI+NaoeQsn5nmDJIaJGXYxr+VtYB0wkmDIUUKj2hvhi9aF671uy5hCj+yYQDusoChiLtlE8/1kcI87DOWwKRfOfw953DCgKut9DxZf/IuGa3+Izxp36AxAiwrV7EJCUlMTDDz+M+cgOjH369CE/P7+9q9HqckvcvPPFrtrHC77dx+Ce8fTLaOLukoqCFvBQsugF9HAIa9Yg9LJDVFkScSoqu3LL+WZdLlAzHPvygi30SHXywnsb8AfCfLqxkiFnXUR0VQlhVxkVK+aiB3zY+4+neu0nePeuw5wxiF2HqtD1mlz/i5bu44qpfZn/9R6G901k/IAkEi5/kJJ3noD8XURPuZ6qr16ll89NGAhqYFU615qAoywGFUsjQ8ZCiK6toNzLq0cy8QB8/t1BhmUnMKRnQxd6CorBgGfnKnz71qP5vcSfewPBzYspX/cpSTc8RZUxgQXL9vPxigMADOmdQGqiHa8/RK/0aOZ9tQeAUFinyqsx/+v9taVv3F/FuN4GXIv/gWqxg2oASzQFEx4k9dBiAq4K1GQNf1jhtY+31ekvfYEwBwurSU2wM/frPTxywxgMCuiKimo0UvH5y0fKNNbcim9AzuHq2gDgeF+tzeW88T14+4tdjBmQdNJUorVUlUDBbkreeBTN7yH67Os4uteMz5xA3MU/pmzRC1St/qjmkzWaiRk/k0/2m9hfWFOH/QVVjOqTcMpT6YqKajDgXvcJ3q3foAX92AadXXs+g6pgMRnqrfNy2s1HXqPXTIONSceUko1r45c1LzAYsaT2pvK7D1FtTuKv+IUEAEI0UbsHAf369av9/4EDB/jkk0946623mvz+hARHW1SrxbYeqqh3zOULkZTkBCBYWYwpJolgZTFGZzyKWvcOlq47CJ9xBeUfvgBAVL+xlO9cy2qjk0tHWalw+U94Pbi8QfyBmsw4gZCGplpxbVmKY8hZVK5aAIBj9AUUvfMk6BrGkdNZ+GZRnXKOLkQrrvASZw9T/v0KQAFVxbPre+z9x1O1+iP0QdPICcQyKdHZ4s+qMUc/q0gSiW0GaXdH6az956nsPm6TxKOqvaFGP09Hr8G4MwfiO7gVU0I6YVc51d9/SNLMe4lK68H+fWW1AQDA1v2l9D8SUJyYVODExbqr97q49fzJsOEzNL8H+5mz+Tw/hv9+k8P5w8ZRuBp+NtCCIaRRWFb/Yr2wzENijI39+ZWYrUbio22Ak8DI86he8zGa30PsWT/AkdmHaGP9C3nv7pIG2+wPhjEc2fHYajU16XdNi+mLf8jZVK//HNViJ2bIREzxx6behGzDqU7pSbCw5rOyZY/Cu38DYx1lrMnIYFeeG4+/8Z/DiUKWUVTHpREqL8DaeziOXoMw2I+997oLBvDKoq21jxUFLp7Yi/j44zMcOVEnXUHRvP8DwJo1iEDhATSfi9izrsKRkU10I5updRcd3Y90lEhtd1vqsIXBu3fv5s477+Shhx6iV69eTX5faakLTet8t6KTY2yYjCrBI18gBlUhNcFGcXE11nAl5XOfImbK9VQue5v46f+D15Ze5/1WTx7lH/8VS8+haNVllC9/n5jxMzkrsAtPdV96ptb95XfYTKTE2emR4uRgYTWzz0ggZutrxJ51FSWL/oKt9wgCpbmUfvRX4s+/BdeWb/F++jw/vegn/H5hzciL9bjt5Af0iMNdUUWgNJ/4S+5FsUdTufgVVHs0ev8pKNsXM6jPEEpKEttkJCApyUlxcf0v+u4sEtsM0u7WKqs5Omv/eSpJ0VZsFmPtXWJFgYykqAY/z8QEOxWrP8V3cBuOMRfhWr+YkKsca9Zg/KX5hIrLqaz213vfURazAVWBox+TAkRZjbh9Nee+7qwUtK//hmqxYes7FvequYw65z7eNap8sbkSgPwiF8mxVsYOTGbNjro3Xgb0iOOb9bmMG5xK2B+iuLgai+aicsEcVEsUtr6jqVj6LsaUPvjjB9RvX0zDc/CT42xUuv1cdnYfLAZO+bumKArGvDVUr/+cqJHT8GxbStHHL+G44G4Cig1T2I1v1Ts1AYCigq7j2fUdjnEzCccN5NDKmmAkKcbWpN9rI0ECq94mVFmEY8zFuNZ+QsXaz2DQhbWf9bgBSThsI/lyzSGio8ycPy6L9DhrnfIt5bspWfA8isGEGhWD78BmHEPPxpI5kIql74EtFr3XmU1YA9I1Sf/ZeuWJDgoC1q5dy3333cejjz7KjBkzOqIKrS45xsLjPxzP4u8PEgxrXHBGD9LibKCD3xhL9OSrKV3wHI6R5xO01d8JMmSNJ/7iOzHEpRPyenBXlhOyR2GNTiSkKfRMiuKRm8by2Xc5xEdbOW9sFtFWI/deNYKlG/NZd6iSsefeg+o0E3/h7YRjs7CiYaguRE8dgDNjGHrJPtyWNMYNCmO3mRjeJ5FVWwp46PoxZKc4CKgKjgvuJmiIAkUhdvrd5Be72OwcwDkXD8WQ1Itw17t+EEJ0cbF2E4/dMo4v1hzC5QlywRk9yEpoeOGsohowZY8lISEDY1w6Jmc8poQsAhWFVK9aQFy/CWQkxhIfbaGsqiYYMBpUhvdJIBAIcbjExU+vG8O3G3IxGQxkpTr45TUD+XJLBRWuAP16JhE74BqwxRC2J0LqAD7apxA4cgMoKdZKrMOMCvzg/H4cLvWQW+wCYFifRAIhDUVRuOiMHrV19qsOos++FsXqJGxLxNJjCFp0WoPty0iwc/64rDrz52s2cRyI3WqgR7KjSesBdF1HScwmfsa96Bn/396dx0lVn/ke/5xau3pf6I1eAWn2HRVwAST2NTar0YhmMIbMJLnXV5wk9w4xhmtmMiZRbl5xifEmNyZmcV7jK8YtMmKcIRCNC8giKCog0DQNTdP0vtZ2fvePhoJm7dbqharv+y+quuqc8zTVT53n/H7n+U3BN/YqsG0Cju7vLSvURfB4NZbbS9ZN3ybQ2kjLy4/RVlfD68dH0ekPMWdSPiW5vRtdCuHGN2kBCaOvIJw9hqyCsVgZw/GfdqweV/c6ONNHZwHWWaMwAJY3GVdSBmlzlhFuqaN97xa8hWMI292j4q2bXiSjYAJ+d3qvjksknllmgJvw1tTUsGzZMh566CFmz57d5/cP9StZzhPz0k9PXt5QM00vPIC3YAydezaTdctquhKHn/Xe+jY/2/bUkZeZyIQRmTg5e/690+nAGNPjd2BOdM041tBBXmZid+eMyHxSQ/jEmbt1Yqj45Dag+8vjQj31T8Zj23a/3gsQj1c34jFmUNzR2tYnMdTz58U4HBaWde6Tw5NO/p6tE/cvJYQa8LsycJgw7mAzXZ7u+ev1bQHe23ecrkCYyaOGUZDlw7KsSE7szn0msj/L6t539+/PRPJhVyjMX7Ye5tXNVZTkpXD79WPITT91tT4Qsqmqa8cfDNPY2oXTYVFWlEFWcs8pK9Zp91udzNXn4w/ZVNa2sqeqieREN5MvyyYr2d2rk/8zXWhf3kADlr8Vf2oJFgZHw34q23xsPRSgrCid0ryUc96cfeH9nWhx/SnuL0sIt2DjJLDtRRInzqPLl4cr2ELXOy/gm34j/oSzL7TFCuXP6G1PBqEIuP/++3n22WcpLj51FWT58uXcdtttvXr/pfglZlk2Ce01BJNycbXXEkrIJHTGyr7tgTD3/b+3aTwx9/8fFk9g9vjcXmwc/vbeUX699gMAEhNcfPfOy8m/xPrkx2Nii8eYQXFHa1ufxKWYP/vqXL9np90JlpOw5cFjtxN0Jp12wv3pmx1YFnQFbdxOBxfqTnyhfVkWBG2D02Fh9fJ4Tm5voP+movE7i4asNDf1zcHIY5cVImRie/kj5c/obU8GYTrQ6tWrWb169UDvdlAZ46AzsQAMhBILzvp5IGRz+Hg7c2cUcriujXc+qOXI8faLXg2C7hGA3637KPK4oyvEX7ZUs6K8LOa/7EVEzsXpdERGC5x2J6HtL+FISMZXdiWNLz9G2twv4E/vbnccjZNZY+jVGirn21cwbLNt73FefH0/2ek+Pr9gNAWZiZ94e/1tKBQAAA5PAnCqCIj1AkAk2vQXM8g6A2F+9R8fsG13HQDjSjOZP72QOZPze7VcejhsEzpjaLy5PXBiOpDBckAoDC7HxQsKEZFLWXNHkG1vV+IPhMnPSqQwKwmnw4UzJYvmjb+nddMLWG4vuIfWSOmewy384oX3ATha38G+w8388KtzSPXpK1pE+o8aqw+yg7VtkQIA4MPKBqaNyen1dJ6MFC/XzTy1oIxlQfkVxYTDNo3tAZ7+y8fc98QmXt5cRdsZ/ZdFRGJFyDZ8cLCRjVur+eWL7/PYH3fy7v56QpYbz4ipWO4ETChA4rg5hJOGzpxxy4KPq5t6PNfRFeJ489ntRUVEokmXGQaZPxg+67lQ+OznztTSGaLqWCsel5NFV49g8qhsmtv9lOSmUJSdhG0M//bq7kiB8cz6vWDgxiuLNSIgIjGnpTPIurcqqT7WRrKvu53o48/u5Gd3zyCw/v9iudwkTy+nddOfyCoch5U/NarTWtzBZhwtRwhlleFqPwrGxp9cdNH3GQPFuT3nJ7ucDjJSvNE7OBGRc1ARMMhK8lJITfLQ0h4AICXRTUnehVcZbmgL8MPfvkPDiV7XM8fm8PcLx+M5bU5qmz/MgcPNFA1L4NDxLsYUJvP2rhquv7wQ13lWohQRuVQ1tfmpPtZGWrKbVfM8BC0Pf620cB/aRur8FYQtF6GkXIYNL8POKI5qAWBZFtTu5vjLj5N61S00bF2HO7uI5Bv+kYB17r7+pxtbnM6yuaN46W/7SUvy8pWlE8lIiu0Fr0Rk8KkIGGTpiW7+952Xs/tQEwaYMjqbZPeFZ2ntrW6KFAAAWz46xo1zSinNOdWv2ed18N3PeLA7mtncXsBV7g8wwyfhdlhD5qYuEZFoCNk2gZDN4mtGMj7PRcpH/w61e1laOJnOv24hcen/JJg7CWMgnD0x6vs3xmAKppAwYS4tbzyDIyEJz9yVBB0JvWrb6XM7WTSnhOtmFOJyWnidmqkrIv1PRcAAuVBLtawUL3NOtAM9sw3W6R2CTv7bPseGzuwE5DUh0v01tG95nrnDSrGPV5I2vAiDoXsNzMExVFrLiUhsMMB/bT3MH9bvBeBVj5MH7/g7rJf+BSq3kDhzIeHcCRfNO582N9ktxwjs2woOF7a/k/rdO3CVXU1a0sVHAk4GkuSJvVVue9PlTkQGh4qAftbUEeS9/fU0tvqZMCKTEXnJOHpxEt7eFWLXwUYO17UxtSyb9s4gew41UZqXyojhaST53LR3drdGG1eayfBhST3eH3Z4SJx8Pf4P/0boeCW+sstxlswgaAanAGjpDPH+gXrqmjqZNHIYpblJODQtSUQ+pca2AH/8y97I44lFPhwfv4Yd7MKdXYhduw9XUxXhtFK8/uMQ6CCUXoLn+EeQkErAm0VlbQs7DraTluRl0qgs0hPdfToGy4JgezMmOQt71p0MsxppPHIEn78WLy78SWe3ho51YWM4cLSN9/fXk5fpY8KILFISdMohMpToLzLKDBAM2SR4nHQEwjz6zLtU1nRf2X/xtX38w9JJTCzJIPkCydAAz762j43bDuPzunA6Hbzw132Rn980bxTf//srOVDTgsftZEReKgln9Kh2mgBd764j1FKHp6CMzj3vkDByOlbJLMwAFwJB2/DLP+1i14F6AF58bT/3fvFyLsvXYh0i0pPDYbBti+4ux2EM5786HrINbf4Qpw+EdgQMeJNh8b+wsQp2flzP1YfczEgMYG9bS8fut0m/ZjnHNj5F6rRyavOu4p9/vzsyClCcm8w9K2aelVMvxBiodpWwv2A5M1qO0vDaL3BdcQtdr/6UkC+V1MX/hN8aWm1J+9vHh1t48KmtkcfTx2bz35dMxKmLPyJDhoqAKKpr8fPMho/5uLqJz8wsYvLoYZECALq/KPZVN+HAcMWY87eoa2oP8tfthwGYdNkw3n6/psfP//T6fuZPL2T6ZcPOu42w5cFTPIGMrEIcxVMJfbgBZ1YxoUEYCTje3BUpAE56670jlBWM1YJmIhLh9dcT2PMGngkLoOkIdnsDdtFM7HMUAu3+EL97ZTcNrV3MnpTPW+9158mPDnfgXzyPZ/96kLfeP9r9XFUTQdtB+RXL8B/6iMb1v8GTfxkGw+bd9T2mAVXVtnG0oaPHPVa9kZ+ZxNu73PyxxsPnRl1FaPMzWO4E0pauoivOCgCn08GG7dU9ntv2UR318/3kpPVyepSI9DsVAVESCNs8+swODte1AfDHDR/j9bhI8rlo7zzVnz/B42JfdROzxuWe9wTY43KQ6HXR3hWiuc1PZmoCR+s7Ij9PT/bivtDa9CePKWssVpZNCAeO8eX4zcDfbBYMGyzL4pqpw3nrvaORhc2yMxI1T1REeupooOXtF/AcfJ9g7QE8xeNJGj4B23n2CfnHh1t458NaADJTElg27zIyUryMK83E6bQiBcBJ//XOIeaPKCDUchyAYH01SWOuJDN4dl70efr+1ehxObh57igcbTV0vLwbGzDBLkLH9uMozjpnIROrjDHkZfacoupxOfC64+d3IHIpUAuCKGluD0YKgJPe+bCWlQvHc3L0s/DElaVJo7IveAU8OcHJV5ZOwuW02H2wkcvH55J0YvqQx+XgK0sn9WgHeiHmxH+xPQgFwOGGDn74+y3c+/M32XOwkeXXl5Ge7CU/K4krxuXqBmER6SGYNZq0a24lcGQPxhjS5t9J8BwFAEDbiXuioDvXPr/xY4alJTC2NBOfy0FeVmKP15cVpcHxSnwjp5J354O4M4eDZTFt/HBKc7tfa1lw+/VjyE77ZD36XU6LhGALDoeTnC+uIXnGjfgPfYgj7L/4m2OIbRuunpxPdnr3VX+Hw+LLiyeQntS3ey1EpH9Z5hK7HFtf3zYkp5D4wzb3/XITdU2nVnm87foxXD9zOFV1HVTVttLQ3MXw7GQmj8zEc54WcJHuQBbUtwZobQ+Qk+EjbBuON3eRkewh/RLoH93SGeTeX7xFR9epURCnw+K7X7qc3HQfvjOuCJ3ZFSkexGPMoLijta1PYqjmz5O8zfup/+OPcKZlE2o8im/MLHxXfYGgM+ms1x5u6OT//NtWFlzevUK61+NkzsQ8LivOpK6ulUP1HTz89HYaW/2U5qfyP26aTG4yOMJdBF2p+AJ1hNypBC0v4UAn1c02Po+T7DQv1qfooGZZBm+olS5nKi67E8sOE3T1bWpRXw3Vv6nOQJjapk6SfW6GpXiIdme6oRp3f4rHmCH6cX/SHBprNB0oSrxOB99cPo1fr/2Ayppm5s8oYtaEXDAWxcOSKMlJ7r7y3duay0BWsoes5FMn/CkJ/ftFEk1Hjnf0KAAAwrahvrmL0uxLJw4RGUDeZJImziVh5hJMfRXhlmPYjnNflS/I8vH1W6aw5qmtBEPd0wxDIZuS4ekAFGUlcv9XZtHWGSI92YPbYREGwq7unNrpyY5sy+nxUZJ95h4+GWMsupzdCz6GHL64Hm/3eZx9vrdCRAaOioAoyktP4NtfmI4/GMbncUauefhDNofq2uj0hyjOSSGtj+3nLkXu80xX8ro0J1REzs2fkIN71m34jRMrdzxWzhjs83xNOSyLN9+riRQAcyblY4DXtlczPCuJo/VtJHhdFGUn9+oeKhGReKMiIMqcFiSetuBLyDY8vX5vpNtPSqKb1V+6guyUTzbn9FJRmJ3E6OJ09lY1RZ7LSfdRkqchOBE5v5Dpzp/GWJiLfEWdnNo0a2Ie9S1dvHmiQ1BWWgKzJ+Wz9m8HmD+9kNuuH41LrSlFRHqI44HKgVHb2BkpAABaO4K8v7/+Au+IDV6Xg7uWTeKOz45l4sgsbv3MaFb93QwtFiMiUWHbhutmFOFyWuRlJbH7YGPkZ/XNXbhO3He1YVs1tY1dg3WYMkQ57S4cdI8ieUx83bgtcpLOyPqZfY57AMLhoXtjXjSl+tzMnzqcz8woJBw2agkqIlFVnJ3Ev35lNofr2i/4OuUeOZ3L7iT07lrcOaV4ckfSsv7XpFz7BfyJwwf70EQGlIqAfpaX4WPm2By2fHQMgASPk0kjswb5qAaOMd0364mIRJsxhty0BFITPYwuTGNvdTMAqUmeSA+Gy8flkpveu8W6LMvgCPsJOxJw2n5shwcT5Y42MgQYgx0O0fDy4zh8SZHnROKNioB+5nY6uPPGscybXkinP0RJXgrZqV7lGxGRKPG5HXz95ikcONqCwSI73UfN8Tb+1+3TKc1Lwe3szYm8wV37AV0Hd5I4czFdO9bhzh9NKG+yCoEYE3ImkjS1nPZ3/xO7s420a28nmJQ/2IclMuBUBAyARI+L8cXpkccqAEREois5wcWk0sxIP/H8EwtV9ZaFwQS7aN/+Kl17NhFubyLjhq8BNsTRar/xwBtupfE/HsWRkEhCyWSaX/t3huWOwJ8xerAPTWRAqQgQEZF+4TAhnLafoDMJd7idsMOLbQ29rx3LAne4EwomkjxlAaHGIziS0nEWTyHUDwVAbXMX1cfaSE30UJybjLeXK8BLdARcyaTOuQUrKQPbl4m3dBJ2cu5gH5bIgBt62VhERIYEb8cRcHq6hy/tYJ9unLSwcVS9Q2flTlKuvpXWvz1NQukUTMmVmDMa09U2d/He/nqy03yMK0k/74rq/cGywF2/h9Y3/kD61TeDHcSVkQ8NR/C/uw7ntCWETfQKgUPHO/jXJzcROtEgomJOKTddO+JTrVIsfWOMRTB3YmRU3iqZo5vHJS6pCBARkbM4LAgd3Uvb1nUAJM/4LI5Rw7F7vei5A0diGp17N9O1byvGDpM4/lpCZxQArV0hfvjbd2jtCALwj5+fypSRmVGN5YLHacDyJBJqrKHu+Z9gQgEyPvMlvCOnYyWm449iAeBwWGzcXh0pAABefquSudMKGZbiucA7JdpOP+dXASDxSmOQIiJyFtuAp3AC4dYGwq0NeAon9LoAiMgqwT2sGBMK4B5WDFklZ72k0x+KFAAAxxo7P+WR952dlIOv7EpMKIDDm4iraBLBnIn4kwqivi/HGYuWWXSPRoiIDDQVASIichaHBf4D20iZ/TlSZn8O/4HtOPpwsuq0bAI71hE8XkXqVTcTPF5FYMc6nFbPlsEZyR7+25XdxUGSz82kUQPbQtmygCM7ad/5F1JmLQHLouU/f4nb7oj6vmzbMG96Ae7T7gFYfM1IsjQKICKDQNOBRETkLLYB1+hrMCdv5DUh+rLOYdg48I6bi7d0MsGsMobll0FiBn7T89qT2+ngpmtHMG9aAYkJrgFfVdwYsHJGM2zZPxHKGUvmyMvBhPFbvVtboK8KsxL5wVdnc+hYG6lJHoqyk0CzUURkEKgIEBGRcwo5TjsRttx9fr8/IRsSssEGf2bZeV/ndjrI7WNLz2gKuNMgOw0MhJML+3VfxsCwFC/DUrz9uh8RkYvRdCARERERkTijIkBEREREJM6oCBARERERiTMqAkRERERE4oyKABERERGROKMiQEREREQkzqgIEBERERGJMyoCRERERETizCW3WJijL+vWX6LiIcZzice44zFmUNzxuv+BEi9xni4eY4b4jDseY4b4jbs/WcYYLVguIiIiIhJHNB1IRERERCTOqAgQEREREYkzKgJEREREROKMigARERERkTijIkBEREREJM6oCBARERERiTMqAkRERERE4oyKABERERGROKMiQEREREQkzqgIGGRtbW0sXLiQ6upqAN58800WLVpEeXk5Dz300CAfXf947LHHqKiooKKigjVr1gDxEfcjjzzCjTfeSEVFBU8++SQQH3EDPPjgg9xzzz1AfMS8YsUKKioqWLJkCUuWLGHHjh1xEfdgiLccqvyp/BnrMSt/DiAjg+bdd981CxcuNBMmTDCHDh0ynZ2dZu7cuaaqqsoEg0GzcuVKs3HjxsE+zKh64403zK233mr8fr8JBALmjjvuMC+99FLMx71p0yazfPlyEwwGTWdnp5k/f7758MMPYz5uY4x58803zZVXXmm+/e1vx8Vn3LZtc/XVV5tgMBh5Lh7iHgzxlkOVP5U/Yz1m5c+BpZGAQfSHP/yB733ve+Tk5ACwc+dOSkpKKCoqwuVysWjRIl555ZVBPsroys7O5p577sHj8eB2uxk1ahSVlZUxH/cVV1zB7373O1wuF/X19YTDYVpaWmI+7qamJh566CG+9rWvAfHxGd+/fz8AK1euZPHixTz11FNxEfdgiLccqvyp/BnrMSt/DiwVAYPoBz/4ATNnzow8PnbsGNnZ2ZHHOTk51NbWDsah9ZvRo0czdepUACorK1m3bh2WZcV83ABut5tHH32UiooKZs+eHRf/3/fddx/f/OY3SU1NBeLjM97S0sLs2bP52c9+xm9+8xuefvppjhw5EvNxD4Z4y6HKn8qfsR6z8ufAUhEwhNi2jWVZkcfGmB6PY8nevXtZuXIlq1atoqioKG7ivvvuu3nrrbeoqamhsrIypuN+5plnyM/PZ/bs2ZHn4uEzPm3aNNasWUNKSgqZmZncfPPNPProozEf91AQD58vUP5U/uwWazGD8udAcw32AcgpeXl51NXVRR7X1dVFhrljydatW7n77ru59957qaioYPPmzTEf9759+wgEAowbNw6fz0d5eTmvvPIKTqcz8ppYi/vll1+mrq6OJUuW0NzcTEdHB4cPH47pmAG2bNlCMBiMfHkbYygoKIj5z/hQEA85VPlT+fOkWIsZlD8HmkYChpApU6Zw4MABDh48SDgcZu3atVx77bWDfVhRVVNTw1133cWPf/xjKioqgPiIu7q6mtWrVxMIBAgEAqxfv57ly5fHdNxPPvkka9eu5cUXX+Tuu+/muuuu44knnojpmAFaW1tZs2YNfr+ftrY2nn/+eb71rW/FfNxDQaznEuVP5c9YjhmUPweaRgKGEK/XywMPPMDXv/51/H4/c+fO5YYbbhjsw4qqX/3qV/j9fh544IHIc8uXL4/5uOfOncvOnTtZunQpTqeT8vJyKioqyMzMjOm4zxQPn/H58+ezY8cOli5dim3b3H777UybNi3m4x4KYv3zpfyp/Bnr/9fKnwPLMsaYwT4IEREREREZOJoOJCIiIiISZ1QEiIiIiIjEGRUBIiIiIiJxRkWAiIiIiEicUREgIiIiIhJnVASIiIiIiMQZFQES01auXElDQ8Onfs2mTZtYuHDhRfc3ZsyYc25r/fr13H///QCsWLGCV155herqaqZNm3bRbYqIDAblT5HYpsXCJKa98cYbUXnNp7VgwQIWLFjQ7/sREYkW5U+R2KaRAIlZ3/nOdwD44he/yObNm1mxYgWLFi1i8eLFvPDCC2e9pqamhg0bNrB8+XJuuukm5s2bx8MPP9zn/T788MMsW7aMJUuWsGHDBgCee+45vvrVr0YlLhGR/qb8KRL7NBIgMetHP/oRzz33HL/97W/5/Oc/z6pVqygvL6e2tpZbbrmFkpKSHq/JyMhg1apVPPDAA5SWllJbW8v8+fO54447+rTfwsJCvv/977Nnzx5WrFjBunXr+ilCEZH+ofwpEvtUBEjM27dvH36/n/LycgByc3MpLy/n9ddf7zGn1LIsfv7zn7Nx40bWrl3Lvn37MMbQ2dnZp/3ddtttAJSVlTFq1Ci2b98evWBERAaQ8qdI7NJ0IIl5lmVhWVaP54wxhEKhHs91dHSwbNkydu3axfjx41m1ahUulwtjTJ/253Cc+rOybRuXS7W2iFyalD9FYpeKAIlpTqeTgoICXC4Xr776KgC1tbX8+c9/Zs6cOZHXhEIhDh48SFtbG9/4xje47rrr2LRpE4FAANu2+7TP559/HoBdu3ZRVVXFlClTohuUiMgAUP4UiW0qsSWm3XDDDdx55508/vjj3H///fz0pz8lHA5z1113MWvWrMhrVqxYwSOPPMK8efP47Gc/i8fjoaysjMsuu4yDBw/i8Xh6vc9Dhw6xdOlSLMviJz/5Cenp6f0UnYhI/1H+FIltlunrWJ2IiIiIiFzSNBIg0gdPPPEEL7300jl/9uUvf5nFixcP8BGJiFwalD9FhhaNBIiIiIiIxBndGCwiIiIiEmdUBIiIiIiIxBkVASIiIiIicUZFgIiIiIhInFERICIiIiISZ/4/+yMhMUUXWKIAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 777.475x360 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with Modin df\n",
    "sns.relplot(\n",
    "    data=modin_tips,\n",
    "    x=\"total_bill\", y=\"tip\", col=\"time\", col_order=[\"Lunch\", \"Dinner\"],\n",
    "    hue=\"smoker\", style=\"smoker\", size=\"size\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<seaborn.axisgrid.FacetGrid at 0x7fc3bbd7fac0>"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAwEAAAFcCAYAAACQkLIVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACckUlEQVR4nOzdeXxU1d348c+9c2fNTPaVfd8RUFEBFVErCiIC7ntbH7X601bbWvXRLj5q9amP1K2ttrZWq+IGgigoihuLCMoi+x4IBJJM1klmudvvj2ggJkDInsz3/XrxejF35p57zmTmzP2eVbFt20YIIYQQQggRN9S2zoAQQgghhBCidUkQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkCRIfxk5/8hOLiYgD+67/+i+3bt7dpfu655x5eeOGFFr/Otddey8KFC1v8OkKIzqk91p1nnHEGU6dOZerUqUyaNInf/va3FBYWAnDw4EGuuOKKNs2jEPFAa+sMCNFQS5curfn/3//+9zbMiRBCdBztse684YYb+OlPfwqAbds899xz3HjjjcyePZusrCxmzZrVxjkUovOTIEB0CPfeey8A119/Pc8//zxXX301Tz75JFVVVTzxxBPk5OSwa9cuvF4vN910Ey+//DK7du3ivPPO47777gNg8eLF/PWvf0XXdTweD7/5zW8YNWpUrets376dX/7yl3Wuf9111zFjxowG5TUvL48pU6awevXqOo9nz57NokWLUFWV3NxcPB4Pjz32GH379qWwsJDf/e537Ny5E1VVueKKK7juuusA+Pjjj3nhhRcoKipizJgxPPTQQ6iqdOQJIY6uI9SdiqJwyy23MGfOHJYuXUqfPn1q6synn36affv2UVhYyL59+8jKyuJPf/oTmZmZnH322UybNo3ly5eTn5/P1KlT+cUvfnHUPD/99NOsWbOGgoICBg4cyOOPP94M77IQHZQtRAcxYMAAOxgM2rZt2xMmTLDXrVtnf/nll/bgwYPtDRs22LZt2z/96U/tyy+/3I5Go3YwGLSHDh1qHzhwwN61a5d94YUX2sXFxbZt2/bWrVvtcePG2ZWVlY3Oz29+8xv7H//4R53je/futUeOHFnv47fffts+6aST7Pz8fNu2bfvBBx+07777btu2bfu2226zH3vsMdu2bbu8vNyePHmyvXv3bvuaa66xf/azn9mGYdhVVVX2uHHj7JUrVzY630KI+NJR6s7bb7/d/vvf/16rznzqqafsc845x66oqLBt27Zvvvlm+8knn6wpy6OPPmrbtm0fOHDAHj58uL1nz56j5vmpp56yJ06caOu63uj8C9FZSE+A6PC6devGkCFDAOjRoweBQACXy0VqaioJCQmUlZWxcuVKCgoKuOGGG2rOUxSFPXv2MGjQoJpjzdETcCxDhw4lOzsbgCFDhrBo0SIAli1bxq9//WsAAoEA8+fPrzln0qRJOBwOvF4vvXr1IhgMNktehBDxq73VnYqi4PV66xw/5ZRT8Pv9QHWdWVZWVvPcOeecA0BWVhZpaWmUlZWxdu3aI+YZYOTIkWia3P4IId8C0eG5XK5aj+ur3C3LYsyYMfz5z3+uOZafn09mZmat1/Xr14+5c+c2KT+KomDbds1jXddrPe/xeOp9raZpKIpS89zevXtJSUmpee5I6QshRGO0p7rTtm02bNjANddcU+e5I9WZAG63u85zR8vzokWL8Pl8jc6nEJ2JDCoWHYbD4cAwjEadO2bMGJYuXcqOHTsA+Oyzz7jooouIRCLNmUUAEhMT0XW9ZgWO9957r8F5fPvttwGoqKjg+uuvZ/fu3c2ePyFEfGnvdadpmjz77LOkpKQwevToJqfXmvW9EB2Z9ASIDuP888/n2muv5emnnz7uc/v168eDDz7IXXfdhW3baJrGX//6VxISEpqUp5kzZ/LMM8/UPJ4wYQJPPPEEv/71r/mv//ovUlNTOf/88xuU1m9/+1t+//vfM2XKFGzb5uabb2bYsGFNyp8QQrTHuvPFF19k3rx5KIqCaZoMHz6c559/vklptnSehehsFFvGFQghhBBCCBFXZDiQEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnOtwSocFgCMvqvAsapaT4KCmpautstLp4LHc8lhmk3M0hIyPQqPM6e/0J8fn5iscyQ3yWOx7LDM1f7sbWoZ2N9AS0M5rmaOsstIl4LHc8lhmk3KJlxeP7HI9lhvgsdzyWGeK33C1NggAhhBBCCCHijAQBQgghhBBCxBkJAoQQQgghhIgzEgQIIYQQQggRZyQIEEIIIYQQIs5IECCEEEIIIUSckSBACCGEEEKIONOiQUAoFOLCCy8kLy8PgGXLljFlyhTOO+88Zs6c2ZKXFkIIIYQQnYxl2xSURSiqiNK5tz5seS22Y/DatWu5//772b17NwCRSIT77ruPl19+mZycHG6++WY+++wzxo8f31JZEEIIIYQQnURplc6L721i3Y4iFAXGj+rGjPF9SXDLZmKN0WI9AW+88Qa/+93vyMzMBGDdunX07NmT7t27o2kaU6ZMYeHChS11eSGEEEII0VkoMH/ZLtbtKALAtuHTb/JYtaUARWnjvHVQLdYT8PDDD9d6XFBQQEZGRs3jzMxMDh482FKXF0IIIYQQnURV1OTz1fvqHP/k6zzOHJGDxAHHr8WCgB+yLAvlsFDNtu1ajxsqLc3fnNlqlzIyAm2dhTYRj+WOxzKDlLutxEP9CW3/PreFeCwzxGe547HMAKnJXtKTvRwIVtU63jXTT1pKAg6HrHVzvFotCMjOzqawsLDmcWFhYc1QoeMRDIawrM47FSQjI0BhYUVbZ6PVxWO547HMIOVurrQao7PXnxCfn694LDPEZ7njscxQXe5IVYxrJg7i8Ve/qTnuUBUmntqD4uLK405PtGIQMGLECHbt2kVubi7dunVj/vz5zJgxo7UuL4QQQgghOrBB3ZP57Y9P4dtdQVyag+F90uiS6m3rbHVYrRYEuN1uHn30UW6//Xai0Sjjx4/n/PPPb63LCyGEEEKIDkxVoFeWn97Z1UMb7c7dsdniWjwIWLx4cc3/x4wZw7x581r6kkIIIYQQopOSm//mIbMohBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQrQKT2Ue7lgQAIcVwVu+CxWjjXMVnyQIEEIIIYQQLc4d2kvRW49Q9v7TePUg5rr3KJj1B5Qdy1BtCQRam9bWGRBCCCGEEHFAc+PwJqIX7Kbgpd+AaYCi4gikEVMcbZ27uCM9AUIIIYQQosVFPZmkTv0lKGp1AACkTroVPWsIoLRt5uKQBAFCCCGEEKLFOawI4U2fg23VHKtY+S6uaHEb5ip+SRAghBBCCCFanLNsLxVfvQuKSuoFt6IlZ6MX5BJZsxBNMds6e3FH5gQIIYQQQogWpyf3JPHMK3GmdkPPGkJyRi+qvp6Pe+QkYrbMCWhtEgQIIYQQQogWZyouHAPPRlc1bFsh6snEffp1xGy5HW0L8q4LIYQQQohWYSpOsA89NiQAaDMyJ0AIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEmTYJAubOncvkyZOZPHkyjz32WFtkQQghhBBCiLjV6kFAOBzm4Ycf5uWXX2bu3LmsWrWKZcuWtXY2hBBCCCGEiFutHgSYpollWYTDYQzDwDAM3G53a2dDCCGEEEKIuKW19gX9fj8///nPueCCC/B6vYwePZoTTzyxtbMhhBBCCCFE3FJs27Zb84KbN2/mnnvu4YUXXiAQCPCrX/2KE044gRtvvLE1syGEEEIIIUTcavWegCVLljBmzBjS0tIAmD59Oq+++mqDg4BgMIRltWrc0qoyMgIUFla0dTZaXTyWOx7LDFLu5kqrMTp7/Qnx+fmKxzJDfJY7HssMzV/uxtahnU2rzwkYNGgQy5Yto6qqCtu2Wbx4McOHD2/tbAghhBBCCBG3Wr0n4PTTT2fjxo1Mnz4dp9PJ8OHDuemmm1o7G0IIIYQQQsStVg8CAG666Sa58RdCCCGEEKKNyI7BQgghhBBCxBkJAoQQQgghhIgzEgQIIYQQQgjRDny/jH5rkCBACCGEEKIdcUWDuCOFACjYeKv247D1Ns6V6GzaZGKwEEIIIYSoyxUrpnzhs9hGlJQpd2EG91Aw/2mSxl+FlXxBW2dPfKeyspJ7772X3NxcVFVl6NChTJ48mT//+c/k5OSwa9cuvF4vN910Ey+//DK7du3ivPPO47777gPg9ddf5+WXX0ZVVdLT03nggQfo3bt3rWs88sgjbNmyhb/85S84nU4ef/xxVq5ciWmaDBkyhPvvvx+/38/ZZ5/NCSecwJYtW7jrrrv40Y9+1KAySE+AEEIIIUQ7oVgmthHDCO4j+PrvKZ7/NNgWdiyKbVltnT3xnUWLFlFZWcncuXN56623AMjLy+Pbb7/lpptuYu7cufj9fp5//nmee+45Zs+ezauvvsrBgwdZvnw5//jHP3jppZeYN28eF154Ibfddhu2Xb2Zo23bPPjgg+zfv5+///3vJCQk8Pzzz+NwOJg9ezbz5s0jMzOTxx9/vCY//fv3Z8GCBQ0OAEB6AoQQQggh2o2oJ4OUKXcSfP33WOHqXXIDo6fgGHIuDo8PKuJvx+D26KSTTmLmzJlce+21jB07luuvv57i4mK6devGkCFDAOjRoweBQACXy0VqaioJCQmUlZXxxRdfMGnSJFJTUwGYPn06Dz/8MHl5eQC8+OKLBINB3nnnHVwuFwCffvopFRUVLFu2DABd10lLS6vJz8knn3zcZZAgQAghhBCinVCwMYN7sSKVNcfCO78hech4IL3tMiZq6d69O4sWLWLFihV8+eWX/PjHP+bBBx+suWn/nqbVvdW26unRsW0bwzAAGD16NCeeeCL33nsvr7/+Ok6nE8uyuO+++xg/fjxQPRwpGo3WnO/z+Y67DDIcSAghhBCinfBU5VM8/ymwLQKjp6CldcUI7qPi479jVJW3dfbEd1599VXuvfdeTj/9dH79619z+umns3Hjxgade8YZZ/D+++9TXFwMwNtvv01ycjI9e/YEYNiwYVxzzTUEAgGeeeYZAE4//XReeeUVYrEYlmXxwAMP8MQTTzSpDNITIIQQQgjRTsS8GSSNvwo7FsUx5FySh4yn4uO/Exh/HZovESplOFB7cPHFF/PVV18xadIkvF4vOTk5DBw4kIULFx7z3HHjxnHDDTdw/fXXY1kWqampPPfcc6jqobZ5RVF45JFHuPjiixk/fjy33norjz32GNOmTcM0TQYPHsw999zTpDIo9vezEDqIYDCEZXWoLB+XjIwAhYXx9wWPx3LHY5lByt1caTVGZ68/IT4/X/FYZujc5a5eDtTGVKqHlriIEMPTqct8NM1d7sbWoZ2N9AQIIYQQQrQjpuKs9TiGp41yIjozmRMghBBCCCFEnJEgQAghhBBCiDgjQYAQQgghhBBxRoIAIYQQQggh4owEAUIIIYQQQsQZCQKEEEIIIYSIMxIECCGEEEII0Q48+OCD3HHHHbWOLVmyhHPOOYdQKNSs15IgQAghhBBCiHbgl7/8JevXr+fjjz8GoKqqit///vc88sgj+P3+Zr2WbBYmhBBCCCHEcfj06728tGATRSVh0lO8XHfBYM46qXuT001ISOChhx7ivvvuY8yYMTz11FOcffbZeL1errzySiKRCCkpKfzhD3+ge/fu/Otf/2LOnDmoqsoJJ5zAgw8+2OBrSRAghBBCCCFEA3369V6eeXMtUd0EoLAkzDNvrgVolkBg7NixnH766dx7773s3LmTV199lauvvpq//e1vdOnShS+++IIHHniAF154geeee44vvvgCh8PBf//3f3Pw4EGysrIadB0JAoQQQgghhGiglxZsqgkAvhfVTV5asKlZggCAe+65h7POOotnn32W/Px89u7dy89+9rOa50OhEA6Hg1GjRnHJJZdwzjnn8OMf/7jBAQBIECCEEEIIIUSDFZWEj+t4Y/j9fhITE+natSuhUIhu3boxd+5cAEzTpKioCIC//OUvrFmzhs8//5wbb7yRxx9/nFNOOaVB15CJwUIIIYQQQjRQeor3uI43VZ8+fSgrK2PVqlUAvP322/zqV7+iuLiYSZMmMWDAAH7+858zbtw4tmzZ0uB0pSdACCGEEEKIBrrugsG15gQAuJ0OrrtgcItcz+Vy8eSTT/Lwww8TjUbx+/089thjpKamcvnll3PJJZfg9Xrp3bs3M2bMaHC6EgQIIYQQQgjRQN+P+2+J1YEOt3jx4pr/jxo1irfeeqvOa2644QZuuOGGRqUvQYAQQgghhGgVqmpjWQoAigLYFnYHHJ1+1kndm/2mv7V1vHddCNEg7uItuCMFNY89Vfm4S7ZXV7pCCCFEK3PpZdjr3sWll6IoCs6iLWj7VqMoVltnLS5JT4AQnZCrcANFc59AC6STPPWXKJZJ8Zz/xYqESJ12N3paf2y7rXMphBAiXjjRCa94i6qNX+DeuwX/iecTnP8kmCYZV/6eSKBnW2cx7kgQIEQn47BjxPK3gWVilB2kZM5j2KaBVVUGgFGwEyWlN7YiX38hhBCtQ8eJ78QLiOxeR3TvBqJ7NwCQMGoihi+jjXMXn2Q4kBCdjKm4cAw7n8ApU6ofVwRrAoDEcZehDJyAJQGAEEKIVqb7u5I8/uqax4rmImHU+RgOXxvmKn5JECBEJ2SqHryDxtY+qGp4+o3GVFxtkykhhBBxS1EUtMJNFH/4fM0x24hR9tE/cOmlbZexOCZBgBCdkKcqn+Db/1v7oGVQ/M7jtSYLCyGEEK1BtaLE9m4E0yBh1EQyrnkE1ZdELH87arSirbPXruTl5TFw4ECWLl1a6/jZZ59NXl5es11HxgQI0ck40QmtmodZWQJAYOwloEepWPkuRtlBIusW4Tz1Cgzb0cY5FUK0F4oC5RGDotIIHpeDrGQvaiuuJKYoYNrgUJBFCzopU3GhnXABaV36Y6X1JeLwkTr9N6BHiAa6g/zda3E6nTzwwAPMmzcPv9/fIteQIECITkbHie+0yzAry/D0GIY65FywLQKKgl6Uh/vEKcQkABBCHGZ9bil/m72OyogBwOjBWVx93gASvc4Wv3YwFOOz1Xl8s6WQYX3TOOek7mQkulv8uqL1GaoXMofXPI76ulT/p4MFALapc+CNRwHImvErDr79OADZl92D4mie70xmZiZjx47lscce43/+539qPfe3v/2NefPm4XA4GDduHL/+9a9xOI7/d12GAwnRCcVcKQTOvRl1yLmYigtT9eA4YRK+8dcT0xLbOntCiHbkQGmEma99UxMAAKzcdJCFX+1p8X1FqmImT7z2DfOX7mZ/USUfrtjDH19aSXnYOPbJQrSRA288SmTPRiJ7NrLnqZtq/v99YNBc7rnnHpYsWVJrWNDnn3/O4sWLefvtt5kzZw65ubnMmjWrUelLECBEJxVzJtWaBGyqHnRnUhvmSAjRHu0+UI5VT0vsR1/tafGb8bzCSvKDVbWOlYZi7DkoY8RF+2cbMaxoFbYRa5H0/X4///M//8MDDzxAKBQC4Msvv2Ty5Ml4vV40TWPGjBksX768UelLECCEEELEMdOsfyyGadlYLTxA37Tq3ynWqC8qEaKdyJrxKxRH7RH1ikMja8avm/1ap59+es2wIACrnu+MYTQuWJcgQAghhIhjPbMD9R4fO7xLi88J6JbhJ+CrfQ23y0HPrJaZCClEczj49uPYZu0bb9s0OPj2n1rket8PCyooKOC0007jvffeIxKJYBgGb7/9Nqeddlqj0pUgQAghhIhjXVK9/HTKUNTDlgPqnuln2pl9aOkFggIejXuvG83IARloDpUhvVO5/4bRpPplPxPR/imaC9XtQ9Fa9vP6/bAgXdc566yzOOuss5gxYwaTJ0+mS5cuXHPNNY1KV7HtjrUYVzAYwurE3YQZGQEKC+NvLGQ8ljseywxS7uZKqzE6e/0J8fn5ao4y29gEK2LkB6vwuTW6ZiTg0VqvndDCJhKzcDsdOBoYecjfOn40d7kbW4d+rzVWB2oNskSoEEIIEecUFNIDbtIDbbM0p4qCzyVLF4uOQXE4ybnygZrHh/+/I5HhQEIIIYQQQsQZCQKEEEIIIYSIMxIECCGEEEIIEWckCBBCCCGEECLOSBAghBBCCCFEnJEgQAghhBCdnqL88HFL74IgRPvWJkHA4sWLmT59OhdccAEPPfRQW2RBCCGEEHFCUSycB9bhjhQA4NJL0fJW4rBjbZwz0Zl8++233HHHHW2djQZr9SBg7969/O53v+Mvf/kL8+bNY+PGjXz22WetnQ0hhBBCxAFFAWfBJoJzZ1I69//wVu0n9NHfKX7vGdixBFXp3BvoidYzfPhwnnrqqbbORoO1+mZhixYtYtKkSWRnZwMwc+ZM3O622ZxECCGEEO2PooDDimIo1fcHTmLouBqVlm2DkpiNM7MXesEuCv5zHwCqLwmty2CitgwLEsevsrKSe++9l9zcXFRVZejQoUyePJmHH36Y+fPn89Of/pSioiIAqqqq2Lt3LwsXLqRLly48/vjjrFy5EtM0GTJkCPfffz9+v7/Vy9DqPQG5ubmYpsktt9zC1KlTefXVV0lKSmrtbAghhBCiHVIUcJXsILb8NVxmJe6q/YQXP48rVtzoNKPuNFIuuKXWsbQLbyfqy2lqdkWcWrRoEZWVlcydO5e33noLgLy8vJrnX3jhBebOncubb75JVlYWd911F7169eL555/H4XAwe/Zs5s2bR2ZmJo8//niblKHVewJM02TVqlW8/PLL+Hw+fvaznzFnzhymT5/eoPPT0lo/UmptGRmBts5Cm4jHcsdjmUHK3Vbiof6Etn+f20JnKrNZFaLoi8VUblyKFS4jmr8Tq6qMwIgJZAzqWeu1DS23XnKQgg9ernWs5MO/k3XpPbgzezRb3ltDZ/pbH4/2Vu6TTjqJmTNncu211zJ27Fiuv/56iotrB6qWZfGrX/2KPn36cNNNNwHw6aefUlFRwbJlywDQdZ20tLRWzz+0QRCQnp7OmDFjSE1NBeDcc89l3bp1DQ4CgsEQltV5x+9lZAQoLKxo62y0utYqt4qFK5SH4ctCsXQc0RKi/u7YbfCRkr91fGnOcjf2x7Cz158Qn5+vzlhmz2mXY4RKCe9YDUDyOT8mmjqQ8GHlbGi5FQWcB3cQzV2P6ksi7aJfULr4JfSCXYS2rCSkJmN1kCFBnfFv3RDNXe7mCCi6d+/OokWLWLFiBV9++SU//vGPefDBB2u95uGHHyYcDjNz5syaY5Zlcd999zF+/HigelhRNBptcn4ao9WDgAkTJvCb3/yG8vJyEhIS+OKLLzjnnHNaOxsiTrmr8il49bcETp2KWREkvPUrMq55hLArva2zJoQQ4jtKrBK96NDQiti+rXh6noSlHX9vlm2DkTmYlPNvwZHeg4ivC0kX/D+MvetQ+pyG0UECANG+vPrqq3z99dc8/vjjnHHGGQSDQTZu3Fjz/PPPP8/q1at5+eWXcTgcNcdPP/10XnnlFcaMGYOmaTzwwAP4fL42WS2z1YOAESNGcOONN3LVVVeh6zrjxo1jxowZrZ0NEaeiviySzrqWsk+ru4VTp/yCiDsNOnfjqBBCdBhOooSWzsKqKiP5nBuI5m6gavNSvANGo+SMbFTPrYWG3fM0jO/OjbrTUPpPaJNeYNE5XHzxxXz11VdMmjQJr9dLTk4OAwcOZOHChRw8eJAnnniC3r17c80112BZFgB33HEHt956K4899hjTpk3DNE0GDx7MPffc0yZlUGy7Y30FOnt3tnT1tSynESKy4nWqNnwBQGDMdLShEzHU1l+hSv7W8UWGA7WOePx8dcYyu2NB7OI8jKyhOMwqlIJtmDnDMJVDdXVnLPexxGOZoX0OB+oMWr0nQIi25IhVENnxDakX3YlVWULFyvdIGTgWwyXL1AohRHsRdaWh5KRh22BpiShdT5JWeyGamQQBIq5EfDmkX/UQEWcyim2S0mMEUWdKW2dLCCHaDU1TsSy7zXuNDr/plwBAiOYnQYCIO2EtBWyw0SQAEEKI7+imzcY9JXy0ci+9sgOcdWI30vyN26CrNRjlQfhuAzHNioJtYjh8bZspITqQVt8sTAghhBDtz5a8Up58fQ0bdgZ5b9lunnxjDVHDauts1csd2sv+l3+Lu3Ifmh3D2PABsW/moplVbZ01IToM6QkQQggh4pzDobByU0GtY3kFIQrLInRLa1+t6xoxKlcvwCg9QPHsP+LtfyqV6z4GRcU3eByGv2Nt/iVEW2lwT0BZWRmhUKgl8yKEEEKINmBZNr1zaq+Y4nY5CHidbZSjIzNw4Tvtcrx9RmGFQzUBQOqUXxD1d2/r7AnRYRwzCNi5cyczZsxgzJgxnHrqqVxzzTXs37+/NfImhBBCiFZg2zCqfwajBmQAkODRuP3SESQntM85AZbDgyvzUIu/4tBwBNKwkY2/ROcQCoW48MILycvLO/aLG+mYQcC9997LpZdeytq1a1m9ejUTJ07kv//7v1ssQ0IIIYRofUk+J7dOG8b/3jaOP/5sLEN7JNMetxJyWDHMDR9S9uVcUFS01C7YRozg7D/irpJGStHxrV27liuvvJLdu3e36HWOGQSEw2GuuOIKnE4nLpeLa6+9lqKiohbNlBBCCCFan0NRSA+48bu1drssp6U60dK7geogdcovSJ52L+7eI9GSc7A1T1tnT8QRwzAoKirCMIxmTfeNN97gd7/7HZmZmc2a7g8dc2Jwnz59+OabbzjxxBMB2Lp1K926dWvRTAkhhBBC1MdGwcgZSdef/C8Vaio2Cv6zbgDbIuZKbevsiTixdu1afv7znxOLxXC5XDz55JOMGDGiWdJ++OGHmyWdYzlmELB//36uvfZaBg4ciKZpbNy4kYyMDKZMmQLAu+++2+KZFEIIIYT4no2CO6sX5YUVAMScyW2bIRFXDMPg5z//ec2CObFYjJ///Od8/PHHOByONs5dwx0zCPjVr37VGvkQQgghhBCi3SstLSUWi9U6FovFKCkpIT09vY1ydfyOGATs2LGDvn37kpCQUO/zQ4cObbFMCSGEEEII0R4lJyfjcrlqBQIul4uUlJQ2zNXxO2IQ8L//+78899xzXHrppeTk5NRaISAcDrN8+fJWyaAQQgghhBDthaZpPPnkk3XmBHSkoUBwlCDgscceo7S0lL59+/Lyyy9j2zaKoqDrOtdcc01r5lEI0QwUhXa72ocQQrRXqmLhKsvFTMhEdyTgipWgxiqIBnpInRrHRowYwccff0xJSQkpKSktEgAsXry42dM83BGDgF/+8pcsXboURVEYM2ZMzXGHw8HEiRNbNFNCiOblNCtRCrZgZQ3GUL24y3MBm2hir7bOmhCik4gaFnmFlRRXREgJuOme4cetHXMl8gZzGhVYDhem4gZALyvEgYZ57OmNjaZg4dj7DYXvP0vC8AkkjJ5C2cf/IJa/jbTpvyGa3LfFri3aP4fD0aHmAPzQEb85L7zwAlC9Wdgf//jHVsuQEKJ5aVaE6FdvUfntJ/hPmoS3/ykUzX4MsEm/7AEivq5tnUUR50JRg/xgFQleJ1nJHhyK7Pra0YQiBi/M38ja7Yf2ERrZP4OfXDgYv7vpN+lOo4LIslk4kjNxDD8fhxGmaNE/cfUcgaP/eEylhQIBRUVJSEJxaFR+u5jKjZ+DaeBISAaXr2WuKUQrOea3RgIAITo2Q/Xg6XcKlRu+IPT1+4S+fh8A74BTMV2JbZw7Ee+KKqL88d+rKAlFAbj2gkFMGNkFZJhFh7JuZ7BWAACwZlsh63dmctrgrCalrSgK9v71VG1eCkDA0AkX5hLN/ZbwrnVkdBmA6e/epGsciW2Dntqf1It+QXDOn8Cs3hQqddrdRHw5LXJNIVpL8/XTCSHaLT1zMMlnX1fzWPUkEDjzanQt0Ia5EvFOURS+3HCgJgAAeOWDLQQrokc5S7Q3qqrw6Td59T73ydf7UNSm9ezYto3S5QQShp8FQMWq+URzvwUgZcK1GP6mBRnH4oyWEFr1fq1jVd9+jNOsatHrCtHSJAgQogGcRghP1f6axw7bwFu1D1Wx2jBXDecqy6Xs89dqHluRSqpWL0Szwm2Yq0PcVmXN/xVFwW2GjnmOogKKgowc6bgUBcpCtdfatiwb3egY36v25vDvUfXjY3+PmksgwVX/cb+zWb6jltNLwsDTUBzOmmOu7N4YJftxFO9q+gWOQFVs9G1Lie7dgCMhmdQLb0fRXFSu/RilaBuKYksd1ECKYuGt2ofDNmqOear24zRa73MqapMgQIhjcBohIstfo+jNh3BX5OLAhB1LKPjP/Tj2ft0mgYCiQNS0iJnWMX+AnHaUyq/fxY6F8Q44lbSpd4GqEVr9AVo42DoZPgp3+W5KZz+EO3wARVFwFm2mbN6fcEcL6329adls3V/O02+v5w//+opP1uynPKy3cq5Fc7Asm7En5NT6DJ88KJP0RE/bZaqDclftp/Tth3CH9lQH0uW7KZ39MO6q/DqvdShmvf9vDEWByqjBuSfXPxzn3JN7YJlNH9vliJVTvvJdbPPQdz12YBegQCCj5phTL8MVra7XNLMKd6T+eqShLFtBG3AmCcMnkDLtN6hZ/Um74BYCp07FyhiAs3AzrpIdEggcg6JYaHnfUPCf+7G3f44DA3fFHorefIjIstckEGgjim13rAWugsEQltWhsnxcMjICFH63DXo8ac/ldpdspejNRwBQ3D78J5xNxcr5AKgeP2lXPkjEmXrc6Ta2zKZts25nMS8v3IxC9RjqE3qnoh7lV8ill2LuWIHabyy6FsBVuAmw0DOHtvoSd4eX22OUEZz1AFZVOQ5/Cknjr6Z44d/ANPD0HonvR7eic6iFUVHgm+1Bnn5zba00+3ZN4s7LR+Jztd81mpvzM56R0bhhXO2x/rSx2VtYxY79ZSQnuOnfLQm/p/GTPNtzXdJSUjw6+1/+LUZJPqrbR/KPbqTkw+exYxGcmT1Jmvobokr1JFanUYG+9n3cQyeAZRDd+iXO4RPRHfVvDHo0VTGTxd/ksWB5LqMHZdK3ezKzFm0lHDXwujWumTiI0YMy0Jo4HEhRFLTc5dV1A5B48gUYZYVUbVsFKGRe/SDhhO449TKqPvs3ZlUZKZNuo3L1B0S2ryTl4t8Q9TZtyJCGjmrpVH78PLH8bQRGT0FL60pw3pMoqkr6Fb8n4uvSpGs0REf9fHv0kuq6Plx9sx8YfSGhdYuxo9VDqtIvuZdo6sAjnt/c5W5sHdrZSBDQznTUL3hTtedyq5gou5ZTuugftY97Ekidfi9Rf7dGpdvYMu8tquR3/1hR69gfbjyN7ulHX6lCVaA9fHUOL7eigLN4O8Vz/hfbODQsxJGYQcrFvybqyax1bli3uO+5ZXWGkAD8+uqTGNw9qWUz3wQSBLSO9lyXtJSMjADluzZQ/PajWNFD49RVr7+6jko4tAKYO3yQknceAxRsQ0f1+kmechdR9/Evc7hiSwHPzVlf87hLegJ3XTGKSMwg4HOR6HXSXLcYDiuCtfZdFIeDqu3f4EztAti4u/RH7X0yMWcqmhXGWPMeFavmg6qBZeDqNojAuTcTczV9J1dFAWdwK8Vz/lSrR8J/0iSco6ZgqN4mX+NYOvLn212ZV/0ZjdRu9U8+9yfYfcZhceRGnHgLAp555hkWLFgAwPjx47n77rtb5DoyHEiIY7BwoPY5FU+vEbWOp/zoRmKBxgUATXGwpO44/oKSY09Qa4/3frYNRlo/ks68otbx1AtuqRMAAFRG9HoDAICSikiL5FGIjiAW6Enyj26sdSzlvJuI+WsvARz1ZpEy8WbMiiBWuJyUiTc3KgBQVYUNO4trHdtfVElV1KBLqo+AR2u2AADAdrhwpHahatvXaElZJJwyFcXhpvybRSih6uE/hurFM+oCHP4UsKrHnSef85NmCQDgu/oqfSCJp19ac8yRkIz3xMmtEgB0dDF/N1LO+69axzy9huPoO+aoAUC8WbZsGUuWLGHOnDm88847bNiwgUWLFrXItSQIEOIYHLaBtW0pkd21h6AUf/h3XOW5rZ6frJS6Lf5ZqR1zvWpFAa1oC6WfvVrrePF7z+AJH6jz+gSPRnLAXW9aqTKOXMQxV9kuSj58vtaxkoV/w1Wxt9YxT/ggxe//pXq9fX8Kxe8/e8T5N0djWTbD+6bVOpaY4CLlCN/PprJsFbv7SQROm4b3zOuJ+LqSPPZiks+5gVhqPwA0K0zVqnmYoRIUrXoYYemHz+OKFR8t6eOiFWyk7Is3ah6blaVUffUOmqwUdEyu8j0Uf/BcrWOR3d9ibP2i1mThjqKkpIS//vWv3Hbbbfztb3+jpKSkWdLNyMjgnnvuweVy4XQ66du3L/v37z/2iY0gQYAQx6CV7qB08YtA9RCgxDHTAbCjVRTPfgyP3nw/MA3RNc3HXVeMIjvVR3aqj7uuHEWXDhoEuPUySt5/tnrzncQMUqf8HEVzYYZKqFj6Ok5qt/p7nQ5+PGlInXQG9UyhZ5a/tbItRLtiVBRT+uFz2LEIqtdP2tS7UN0+rGgVZR+9gNs+dINqOhNIGH4WKVPvJmXab0gYegamdvzzAQCG9U7lih8NJDHBxeBeKdx9zUkktOC8HFP1YHQ7Gd1Zvb+JO7v3d/OaquccKKaOVVmKq9sgMq57jMDJF2JWlaOYzbNwgNOqonLdx2AZ+E+aRPqMe1AcTqo2fYEWK2uWa3RWHr2E4jmP1cwBSBwzHdVTXWeXffISWsmOtszecSspKeHyyy/npZdeYsWKFbz00ktcccUVzRII9O/fn5EjRwKwe/duFixYwPjx45ucbn1kTkA705HH+zVFey6306gksvJNIttWkjr9Xgx/DuxaTulH/yT1wtsxckZgNyKebmqZ9e++B84mTrprbT8st7tiD2Ufv0DSxFuJ+bJwFm+jYukbJJ53C1FXWp3zLdtm18EQn3ydR2FpmDNHduWEvmkEmjCZtDXInIDW0Z7rkpaSkRGgYs82Shc+S9J5txDzd8VVsYeyj/5J0vm31hla51AsTFut8//GUBSFiG6iOVQcrVwV1fe3durlKJZBzF09R8ChVzZquNORuPQyrPxN0G0EpsOLM7gVRXMRTezVbNc4mo76+Vaw0fLXUjz/KZLP+TH0GYtWmU/x24/i6XsSntMuO+rk9PY2J+Cvf/0rL730Erp+KMB0Op1cd911/OxnP2tq9gDYtm0bN998M7fffjvTpk1rljR/SIKAdqajfsGbqr2X22lU4tAriHizgerJwu5wARFvVqMCAGj/ZW4p9ZXbZVcR+271EkUBl1lFVD1674bDUf2+m2bHWFNegoDWEY/fq+/L7LaralYBAuo87mza6m+tKErNfAdFoVVXWHP7XARLqvC6tFYPuppKwcYTPkDUm1kzB8AdPoDlChxzdar2FgTcdtttrFixos7xU089lWeffbZJaQN8/fXX3HHHHdx3331Mnjy5yekdSftuOhOindC1BPTDuswtHIS9smV8c4kddqNi2xwzAICOc/MvRGv54Q1/SwcALr0ULIuYu3qJZG+0gKgrFUvp3LcWh7edtlYAYNo2m/eU8sqHWzgQrKJ/92Su/NEAemV2nGGQNkqd383odw1rHc3w4cNZvXo1sdihIasul4vhw4c3Oe38/Hxuu+02Zs6cyZgxY5qc3tHInAAhhBBCHBeXXkbok39S8eFfceuluIKbKfjPfah7VqJ2wEmeTaFwKBJoqdGZ2/eX83+vreZAsHpM/ba9pTz84kr2F7ePXd/jzeWXX04gEMDprN7B2uVyEQgEuPzyy5uc9gsvvEA0GuXRRx9l6tSpTJ06lddee63J6danc4frQgghhGh2ihHBCO7DrAhSOvthjIoSsAz0oj1o3UdCJ+8N+J67PBezvACr24k4w0UY+zah9RmDoTbfKkk2MO+LXXWOm5bNys0HuXhc72ZdjlUcW0pKCrNmzWLWrFmsX7+e4cOHc/nll5OS0vTlaO+//37uv//+ZsjlscXHt1QIIYQQzSbqzSJ12t0E33gQo6x6idGEEefiHHVR3KyZ77FClCx4FqOsgKSzrqHkm4WY5UVkZvXCCPRqtuvopsXB4vqXIM0vqkRVFUxTgoDWlpKS0myTgNuKDAcSQgghxHGzKkuwYtGax3pBLqoRP5v2RVQ/yRf+HNXrp+zT/2CWF5E04TpijdxF/khcmsppw+ofOz9qQKbMjxKNJkGAEEIIIY6Lp2o/wTmPg2XgGzYeRyCNWP42Qp+8gMuOo3HqDg3F4ax5qLi82EozTwyw4axRXetsxNavWxJDejXPbsgiPslwICGEEEIcF92bRuCUi7BiVbhOnIpvVDll7z9J4LQZxFQvtPHoFEUBdyxI1JWKbStoZhWqGSXmar6bZrcZonT+k5ihEgKnXkzluo8p/eA5Mq/sQjjQs9muA5AecPPbH5/CrgMV5BdV0jUzgT45ifjdchsnGk8+PUIIIYQ4LqbixjFsIpptYqheDK+XpGn3EXP4W3Xd/PooCrhKdlD0zp9IueBWlMwBRFbNRi/IJTDx1mYLBGKan6RzfoxZvB/6jiO132hiu75B92Ue++RGSPI5+dGpPeNuHwzRciQIEEIIIcRxM3+wAk5U9bd5DwCAahvE9m3GjkUonvdnXF36E9u3BcXhRI2WQTMFAbYNemp/lOTeWGiYCV1xDMvG+G4jLCHaO5kTIIQQQohOw0RDHXIugVMuAtsitm8LKCqp0+4mmtirWa9l29TaHM2UAEA0gyeffJJJkyYxefJk/vWvf7XYdaQnQAghhBCNpiitt3NuQymWiRU9bFlN28bWw1R3VbTQjl4i7kQiEYqKikhPT8fj8TRLml999RVffvkl8+bNwzAMJk2axPjx4+nTp0+zpH846QkQQgghRKOomLgKNuDSSwFwxYpxF21Coe2WrXRgYKx7n8q1H6E4nHgHngbYFM/7M+7SuptuCXG8DMPgiSee4JxzzuHKK6/knHPO4YknnsAwmr5b9imnnMJLL72EpmkEg0FM08Tn8zVDruuSngAhhBCihamqgmW1s+byJlIUUPesomjBX3H3GErShOso+eA59AO7SJt6J3r2CW3SQ2Ci4e5/KlUbl5B8wa3Yqb3QUnLQC3IxfWmtnyHR6Tz11FPMnj2baPTQPhmzZ89GURTuvPPOJqfvdDp56qmn+Oc//8n5559PVlZWk9Osj/QECCGEEC0kalis2Rnkn+9vZtnGg1RGmt5SeLycRgXu4CYcmLir8nFX5DZLujHDZoejH9tG/pxg5skUzfoD+oGdOBLTUZKy23SIUNTfndQr/oCeNgAdF47h5+Mbfz26M6ntMiU6hUgkwttvv00kEqlz/K233qpzvLHuuOMOli9fTn5+Pm+88UazpPlD0hMgRCvRrDCqESbmSgXANg08epCIU1qmhOiMFAVWbSnkhXc3APD5mn2cd2oPrpjQr1XzoBTtoGjen0kcM53itR+jJaaReOGviKqNH2JgAx9/k8cbH28Dqne1vXfiT0hd/gxpF/2CsKdllsk8HhFHUs1qRabixnS6j36CEA1QVFSEqtbfhq6qKkVFRXTr1vhdo3fs2EEsFmPw4MF4vV7OO+88tmzZ0uj0jkZ6AoRoBZoVJvbNXMrfexJ3NIiqWIQ2LKHolftxl+9u6+wJIVqAYcH7y3fXOvbRV3uoiLZeb4Btg5U1hISR51G+fDa2HiX5/FubFAAAlFbGeOuT7TWPY4bFp7kaWko2ZV+8VjNHQIjOJj09Hcuqf86Lbdukp6c3Kf28vDzuv/9+YrEYsViMjz/+mJNOOqlJaR6JBAGizRwoDbNicwHrc0sIlnXebeYVBdSCLVR+sxC9MJey959C2bGEwnefwY6FKXl3Jh6zvK2zKYRoZg4FemT6ax1LS/LidrTuT68jHCS8dQWoGrYeIbZ3PardtEDEsqtveA5nqi4cviSiueuxD25DUWQVHtH5eDweZsyYUWc1oCMdP17jx4/nrLPO4uKLL2bGjBmMGjWKyZMnNynNI5HhQKJN5AWrePCfKzDM6h+RsSfkcMMFg9A64Y+GbYOdMYCEEedQufZj9MJcSj/6JwCK5iLlwp8T1RLbxSY7QojmdfH4vmzLK6O4PILXrXHztOG4tFZuf4uGcCQkkX7ZA4S3LEcvzsfVS8dyNP4WINXvYtKYXry3bDcADlXhnNE9Cfiux7NvI3aXE+oECUJ0FnfccQeKovDWW2+hqiqWZTFjxgxuv/32Zkn/9ttvb7a0jkaxO9i3NBgMdboVFg6XkRHo9FuCK4rC25/vZP7S2ku1PXzzGHJSvG2Uq5bntsOUzXsM/eDummNp036Nnjm03a2x3ZLi4TNen+Ysd0ZGoFHndfb6E9rn5yscMwmWR0jyu0n0as3+fT9WmRUFXGaIqOpHs6JgmxiOpi85GDUscg+GKK+M0i3TT06KF9sGVbGx7JZv0GmPf+uWFo9lhuYvd2Pr0B9qiX0CWpMMBxJtwMbtqrurokPtfL0A31MVC2vfevSDtVflKF/yBq5IsI1yJYRoDV6Xg27pCQQ8Rw4ANLMKV6wUAHesBIetH/d1FEVB01Q8RjGKYuOwddyxEmwbomr1sCRDdTdLAADg1lQGdE3k5AEZZCd7a8rWGgGAEO2Bx+OhW7duHTIAAAkCRBuwbRg9KJMEr7Pm2NTxfUlP9GADhm2327Gkh8cp1Vk8dpOeooBWsJHi9/8C2CiaC3evEQDVcwQWPIVbL2uR/Aoh2j/NjqGvmU9o8d/xhvMpnfsY7FrO8bSLBEMx3lm6m6feXMvK9fugaCf2ts8ofecx3LHilsu8EKLDatM5AY899hglJSU8+uijbZkN0UCKAqGoiUNV8DrVI7ZofT/hzFY1Sioi7DpYCUCv7ERSEqpv/DOTPDz0X6eRV1RJgkejT9ck1m4rYv7SXZSFoow/sRtjhmaT7HPWf5E24NLLMLYvw9lvHIYrCWfRJmw9hpEzHPsoLV+2DUpSDs7MHhjF+aTNuAc7MQfXV29TsfYjEkach+70H/F8IcSRRU0L3bCO2srenq5pA6GogdfpQPvuLt9UXXj6nkTl2o8oePleHAnJaNn9iTYw7dKKCE++sYa8ghAAq7bALyem0WPlKySefhlGA+qXsG6SeyBEeVWMLmkJ5KR5cRxHY4wnfBDTmYCu+XFHi7AVBzFXSp3XKQpUxSws2ybg0Tr98LQjcUcKsB1uYs4kXHopiqkT9WS0dbZEnGmzIGD58uXMmTOHs846q62yII5D1LD4dPU+Zn+2A59H48aLhjGkRxIKtX8kVNtAyf0KRVHICwzjwRe/IRIzAfC6NR748SlkJ1d3myX5nCT1SEZRYPn6Azw359uadN78eBsrNhzg7qtOxFfP0KHW5iRG1fJZhDcvx5u/A9+w8QTnPQm2ReaVvyfs73HU86OuNJIm3YESDRFN7IltQ8qEK/EMHEMsuRcWbV9GITqabfkVPD/nW0pDUSaN68XE0Uf/HrbUNRtaR5WHdd7+dAdL1u2nW6afmy8eTpfvxtHjTUJ1J2AaMdRAOraz4UN28ouqagKA732zz6ZPUgZaWjcM1XnUTsvKqMFf5qxn0+5DPQY/nTKUccOyGrRggacqn+DsR/H0HoF/9EWUvP8UittH4NxbiLmSa15n2zZfbw/y4nubMEyLK84dwNih2Tgd7bPnt6W4I4WUvvsEjuQsks66jrLF/8IMlZB84Z1E3U1bXlKI49Emw4FKS0uZOXMmt9xyS1tcXjTC9n1lvP7xNnTDoiwUY+Zr31BYHq3zOsU2MEvyKVv3KW8t3loTAACEowbvLtlZ5zelImLwnwWb6qS150AFuQfbxwQoHRcJJ05C9SQQ3r6K4Dv/B5ZBwqjz0H0Na72JutKIBHrWtBxqCclEkvtKACBEI5RUxvi/V74mWB7BtGze/WIXm3JLWvyaj//gmpv3lDboXEVRWLr+AF+s3Y9tw96DIf48azURw8Kp6FR9NRuwSZvyc4yiPZg7vmzwcKCAz4nzBysOdc/y4+o2iJIFf8EdO/r7sn1/ea0AAODf72+iJBRr0PUtpw9ndh+qNnxOwYu/Qi/cg7vbEKwfBDL7SyI8+9Y6KsM60ZjJv9/fxO6C9lHHtybL6cPVdRDRXWsp+Ncvieaux91tMKbWeRfGEO1Tm/QE/Pa3v+XOO+8kPz//uM9NS+v8wyaaa9Z6cwquP1DrsWVDKGIwtO8P8xpAP2USFTHYtqKqTjpb9pTi9rpI8h/aubE8r5TKSP1rVofCRvt5PzKGYE+4luCCvwGguDyknjYFZ0rjd8ZsN2VrZVLuttGZ6s/9JYXEjNob9uw5WN0a3lLv8/6SQvQfXDP3YAUTx/Q65rmWZbNma2GtY0VlEQwLktNTiZ11JXYsiju7N+60bLRAGlpiw8ph2zZ3XD6SZ95cSzRmMrxvKmNO7E16Qk/0k8/HqgqSkhXANg1iRXvx9ToBRTs01LJ4bd3fYsO0iOhWA9/LAJ6zr2HfztUAOBKSSR49EWdi7d3QN+XVnftUFoo16e/V1t+pxgkQO30GVZuWgGmgaC5Sx16MMzW7QWd3zDI3XbyWuyW1ehDw5ptvkpOTw5gxY5g9e/Zxn9/Zl7hrr8t/dUmr3aLj0lRSElx18uq0wkSWvIwjuJ8T+0zns/VFtZ4/aVAmekSnMHyohcmpQkrATUlF3Z6FZH/da7QFRVFwFm0i+OE/ao7ZsQhFH/4L7xnXoTsTjzvN9vq3bmlS7uZJqzE6U/2Z6NVI8DqpDB9aRWdAjySAFvt81XvN7kkNup6iwLgTurBtb2nNsV45ibg15bvzA+AIQGEFuHIgSvX/GyAjI8CwHik89rOxRGImaYluHIpCScSB11IoeOOPePuPxoqFieVtJuPqhwm7D/VgZqfWbYH2ujUS3FqDyuaOFlH2/pMAqB4/ZmUpwcWv4Dn1MnTtUOCZFnCjqkqtz2BGsrfRf6+OWpe4YiWEFv8DTAPV48eKhChY+Hf8439ca/hUfTpqmZuqvS4R2tJaeu5sqwcB77//PoWFhUydOpWysjKqqqp45JFHuO+++1o7K+I49MoKcMelI5j92Q6S/C4uPbs/qX5XnUlxuurFO3gcXhQu9PRmw94Kisqqb+6zUn38aHT3OhvIeJ0Obpw6jD/95+tax08cmEGPjPbRcqmaYSI7vgHTIGHU+fgGjyU4+zEiud/iP7W8UUGAEKLx/B6Ne687mbc+2c7B4iouOr03/bsmtdtr2jacNDCdaGwgi7/eS//uyVx0em+czbQ0sm3bJHqdJHprL6YQ8WWTeuHtFL9bfZOePuM3RDwZtcb6984OMOGkbnzydR4AmkPh1hknkORr4MRnu/pfYMx0vEPOovyTF8G2+OGEgsxkD/ddfzJvfLyNqG5y6YT+9EhvnuVKOxwbEkacS8LoqYRWvI1ZISs4dTShUIgPPviA3NxcevbsycSJE/H7m++epTXmzrbpZmGzZ8/mq6++Oq4IpzO1ZNWnvUf5pl29TGZDf7bCukleYSUo0DU9AZ+z/vHvySkJrNlykC83HKC4PMppQ7MZ2COZhHYwKfh7TrMKpXArVuZADNWLO7QXTINYcu9GrUrS3v/WLUXK3TxpNUZnrD9twLJtNFXBtlvn8/XDax4PRVHQLat6ZaAjnatAJGbi1Bw0ZM7s0crs0ssIffpPorurF17wDjwN79gr0bXanyHDsskvDhMKx8hK8ZEacB3XLubuWDGm04+huHDqZaA4avUCHO77AVVNnZTYkesSV6wUy+HEcCTgNEIolnHMXgDo2GVuivbWE7Bq1SruvPNObNsmEong8XhQFIWZM2dy8sknNzl/paWl3HTTTUyaNInNmzd3np4A0bEd7yIOXqeD/l2O3Uru1FR6Zfrpk90fRVEwTeuY57Q23eFDyRlZ86Mf9Xev/k/nuqcSokNRAIdy/DfjbXVN27bRlCMHAKWhKr7ZeIA5y/I4b1QG/fvm0K9LUs1yosedVzOKVVFM2iX3gh6j4ss5KGaszq+/pip0T/cB37XMH6VsLr0MNVxMNKkXtq3gKd+N5UnCUFwA6M6j947IBkXUuuE/UrAk2qdQKMSdd95JOByuORaJRAC48847WbBgQZN7BJoyd/Z4tOl3cfr06bJHgKjFsux2GQB8r+36zYQQHZFmRVCU6orDYcdQqX8RBIBQJIK1fTkjzbVcOz6b8cYX+Mt2sPmweQTHK+rJJOnie9BT+6NnDSFw4S+IudOOfeIROPUyQp+8QOEbD+E8uBFXcDOFbzxExUfP4TrGKkQdXTvdw1K0sg8++KDOsObv2bbNBx980KT0D58729KkJ0AIIYRoAU6zkuiqOXh6j8TOGoixaTGq24faZyxWPT+/+w5WkB4L4/p2LsMSPsMMV6BljWD2p9sYcPXJuLTGtdtF1YTvWvYVYmrTWihthxtXZk+iu9cRfOfx6jtj28KZ2QtL8zQp7faqrEpna14pBSVhumX46dslEb9Hbp/iVW5ubk3L/w9FIhH27NnTpPRbc+6sfIqFEEKIFuCIllG1cQmV6xbj7T+a8NYVaKk5JHc/gagzuc7rdxVEKFb7MyKQillRjNLrZBbu9rA7P0jUsBodBDQnQ/XgHHkhnuC+6sUSbBt3j6F4Tr6YGJ0vCCiqiPLHl1bVWr2ud5dEfn7pSBK9cgsVj3r27InH46k3EPB4PPTo0bRNC//1r3/V/P/7ubMttXhO29coQgghRCcUTehC+ozfgG0R3roC1Rsg5aJf1hsAAIzolcCw8s8xK8tQ+o7B3r2Kad0LOG1oFj532yySoCqHhj14w/twmpUowV1Edq2rOR7N2wwFO2qGPXUWigJfrN1fZ/nqXfvL2bA72Ea5Em1t4sSJKEcYG6YoChMnTmzlHDWeBAFCCCFEC1CtGNF9W2oeW5FKrNIDR7xZTk5KItJjDKHTbuLVilMxx1zPbiuH88f0wtEGA9I1swo2LsQdLcRdup3oli9xVuZTsuAvYBn4Bp5C4MTzwDIpWfDsMXcm7mhips2XP9go83tfby7A4ZBbqHjk9/uZOXMmXq8Xj6e698vj8eD1epk5c2azLhPa0nNnpS9LCCGEaAGucBEFS96oHgI0/hqC85+m5MPnSbviQSJacp3Xe10OrB6D2Lm/Am9CId8q2fTvnUROiu+IExFbkqNsLyVfvI624XN8/U+h/Kt3ieRtJvXs64js2YAZCeEdfDqKx4+r+3CirtROtVqaU1XITvNRWBqu81zXDH+b/E1E+3DyySezYMECPvjgA/bs2UOPHj2afZ+A1iBBgBBCCNECIgk5pE27GyWQRsyTQdr036A43cQOW6PfYeuYyqENvvxuJ326JDKgezJep4JlUetmU1EOrVJ2+P8Pp2JQEQPNoZJsBjG0ALrqwWFHccbKiRy2W/DRGCm9STz9MsqXvEH5irk4EtPx9j6B4MK/48rsTuD0Kwkn9MAxrAsxpYEbi3UwF47rzbc7ag/9cagKpwzJ7nR7bjREzLTQDRuf29Hg/YI6K7/fz4wZM9o6G00iQUAnZ9k2UcPG61JbpIXGHS0EG2LeTNyVezHcqRiOON0BUgghDmPbCnrGoOqbYxv0pB6oe1ehFubi7D4CR8V+wgd24+k5HBuFQjuVhStyWfTVHhK8Tq780UBOGpBes0eAwzZQcr/CkdUP2+HG2rMGpdfJwKGgoqwywtzPd/D5uoMk+d1cPSaFUYmb8PY7mci3H1K+7hNSp9+D5fKj6wZRZxJJZhBL0Yi5Un5QAgXU6rkIjsR0ksdOxwpXgEMjYfBYorvW4ByeVb3OfSe9H+6akcDV5w9i8aq95BdV0qdrEhed0Yfu6V6s9ruadbOzsdmSV86L72+ksCTMqUOymTGhH2l+V1tnTTSBDGjrxIoqojw1+1vufPJzXvpgK2VhvVnTd9lRKj55kdL5M9H2f0Pha7+HfWuPOGFGCCHizeGt4y69jLLPXqXs81exdn/FwTf/iMPSKX3/GUIr5zNvyU4WfpmLadmUV8Z47p1v2ZpXVnO+Uy+jfOmblMx9nIqP/kbpp//BESmted6y4eUPt/Hp2oNYNpRURHnmwwNsC/kpnfMoFV++gxWuwK4sZuv+EPe9vIlfP7uUhasLKd+yEie1fyO0kp2Uf/4azpy+JJ16EcEP/wmqSsaFt1Hy2SwULDi4GbWRG5l1BOWVMWZ9uIWe2YlMOb0PSQkuPlm1B1WNr9unvKIwf3rlawqKw9g2fLnhAM+8tRbd7KTRX5yIr09xHDFteOHdjazbVkTMsPj0mzw+XpVHc/bfxRQ3gTOvwQyVUPzuk7i7D0HpMlTGSQohRD0izlRSp/8G2zSI5e/A03UAJV+8jlFWSGzEdD79Jq/OOZ+v3VczATXiSiNt6i8xywqI7d1EyqTbiPq71ry2uCLKN1sK66Sx5qCjugUfSJ1yBwW+fjwyazMHiiOEwjovfXqA3KQT0XHWOs9K6k7i2EtIOu9naKldUDSNkk9epWD249iWiTM1B7IHH3FYjKIoFIdifLu7mG37y4noZqPfu7aS7HeTnuzhy/X5vLtkJ6u3FjJ6SDaGEUfdAMD2faV1hnvlHqigoJ75EqLjkCCgk6qMGGzZU3ulhpWbDmI0YQyjO3wAd9V+VMXGU7YTlxnCDpdhG9WtR2Z5IaoRPUYqQgjRNhx2DHeseny3K1aC0wi1eh7scDm2oeMIpGJWllYfM3UUI4pWz2ozCR5nTcOKUy+lbNmbKA4nqsdP2eev4gofrHmtw6FQX6O816lgm9X1tFFaQFVlGPMHvwX5wao65+kOH+rwyUTd6URT+pNy9vWADbZF4ikXQa9T0R0JRyzr9vxy7vnrMmbOWsMfX1rF/81aQ1lV8/ZItzSPpnLnFSdy0qBMUhM9XHHuAEb1S2/rbLU6l1b/ErWyQlLHJn+9TirB42Bwr9rjO08dmo2zkd22LqKEPn+F4tmPoWz7lMLX/wcluJPwxs/x9hlJxlX/gxWpwgrmynAgIUS7oyg27FxOyexH8VbmUfHRc8RWv4tG692UeowSiuc/hcMbQEvJwQyHSPvRj/H1Own31g+ZflafH+QZzhzZ5VBLu6KhJaSQOv03pF32AM60rqBUT+1TMck0D3D+qd1rpeFQFU5Ii5AwaCygUP75q3SPbiM10V3rdTk+o973wrQVFAWcxTso+ehfoKjg0ChfMRf7wKYjLncaMSyem/MthnmoxXzHvjK+3dnx1tfPTHRz68XD+OPNY5g4ujteV9vs2dCW+ndPwu2sXe5Th2aRmdT5NohrD6699lomT57M1KlTmTp1KmvXrm2R68jE4E7KoSj85MKhvLV4G+t3FXP6iBzOPrFbo1dviOEmMOEGgq//ntLF/8bb/xSs9H54UnuBbRNxJpF22e+IuRJlOJAQot2xbQWtyyCw51Lwyv0oTjeJ4y4npjhbbVJr1JlC6kV3gW1jpvYh45LuGGVFeE6/Gmw4kwApiT4+WrmHlICb80/tSY+MQy3tuubHNeZKdNWNbYPv3J8RpXpiprNkJ0VvPsz5p/+EbpP68fHaQrJT3JzTT6F/so7d5QpSc/oTWrMIR1Y/bpvh4KUFW6io0rnkrN7075OEcdhwICc6Ok4UBRxWDDtWBbZN2sW/RHH5CM55DCtcgWKZ2ErdW4lQWKeorO6Oqpt2lzB+RBdMs2MNp1EAp0OJ29+3rCQPv/3JKXy0ai+788s5Y2QXThqQWW/PUzwwDIN58+bx2muvUVhYSEZGBldeeSUXXXQRmta0W2vbttm9ezeffPJJk9M6FgkCOrE0v4ubLhpK1LDwOh1NqrwULMzgHqxwdfd5dN8WEiKlRH1dal4TcaU2Oc9CCNFiHC7UhBTMimIUpwfcCa26rKVtg57ar+b/Vd6uKL5u2LaNqoDHhlMGpHHq4AywqXesvaG4a4IWnUMrs1j+bLz9RxNe8k+GaS5Gdh1M4tgZKAkp6Jofy1JQup9EIHsQMS1A7yz47+tOxrJt3JpaewJzNEjVsln4x1yKFS4nsm0F7pMuJuPaPxJ2VQ+FSb/qIWKuFCzqbxX3e52kJXoIltcOBAb1TO5wAYCo/rzmpHi5fuJAbOzqUWHxGQ9hGAZ33HEH69atIxKp/nyHQiGeeOIJPvroI5566qkm3bzv3LkTgJ/85CeUlpZy2WWXcc011zRL3n9IhgN1cgrVYxqb2nqh2Dr5US9lE35N4IancCQkY5cX1jv0xxUrxWFVfzE8RufaQVII0TGpKhi7VmEE95F28V0oDo2qle/gVBo/HOjw6q+hoyDtH9w82baNuyofZftnOBUdbd/XOIM7jrvO1p0BfEPPrE7TiKHoYUhII6oGsCzlu2uptfYo0FQFv1mGasVQlOr6WlFAsXRiB3YQfOshiuf8L0bpQbD0mgAAIOJKP2IAANW/OzdPG47mOPTG9OqSyAl94288fWdiWTa2Fb8BAMC8efNqBQDfi0QirFu3jnfffbdJ6ZeXlzNmzBieffZZXnzxRWbNmsXSpUublOaRSE+AOKaIbvLe8n0sWL4fy4auGSF+ccndeJPq7mLpMiqo+Ph53F364+kziuA7j5N60V1Ek/ocIXUhhGh5lgXOvmNI7z6UaEIXUi7+NbbDRcx2HvvkeiiKhbNwMyRmYzvcKEU7MDMHYiruY598GFUB48A2Sj/+F55da4jsXIN/9GScyd1rDc85Gt2wKC0OEt5bSGJWPzTVJrZ/K+Elr+AZd/URJ++69FLKFz6Nt+/JuLoMIPjun0mbfjfRQA+Sz7qW4LtPApB0+hWEncnHVS6AfjkB/njLWPIKK/G4HPTI8uN1xt94etG5vPbaa3UCgO9FIhFeffVVpk2b1uj0R40axahRo2oeX3LJJXz22WeMGzeu0WkeiQQB4pg27y3jvWW7ax7vKwzx3LtbuPuqUWg/6EsyNB++weMo/fDvVKyYiyunP7b3hxvQCCFE69M1f83GVlFvdpPScsdKCb73DI6UbLTkbMJbviTzqgcJJ3SreY2qKsfcVdayq4MTz641RHZ8g5aShWfkZKLHCADceimG5idmK/zn/Q3M/WIn2WleRvW7nKljuuJY+iKevidhaN4jznkwnX58g8ZR9unL1Wn2HIblTsQdyqNowV9xdR2IWV5IcN5MUqbfS7QRQz7TAm7SAscXGDUn07YpKI0QjhrkpPkkCBFNVlhYdxne43n+WFatWoWu64wZMwao7i1sqbkBEgSIo3I4FL7efLDO8e15pZRWxkj/QeVu4cCT1q16BQnbwpnTD1PztlZ2hRCiVUScqaRN+zWFs/6AfmAnKRfcSiShes3+yqjBup1BNueWMrJ/OkN6puD+YYvJdxRsyFtLZMdqtJRsjJID6FuX4BhwFqZSfyDg0kso/+CvJJ1yIUaogtWbHdx2bgZ9yOOVnRo7i7IYcub1GJoX2z7yqF9L0XBl9jqUbk5/TM2HrWgknXE5jl4noxhhzIM7MJyBI6ZzLO5oISE1kZIqmxS/k0SjmIgno8WHlFi2zYcr83hz8Tagevffu64cRYpPdrkVjZeRkUEodOTlhTMyMpqUfkVFBU899RSzZs1C13XmzJnDH/7whyaleSQSBIijsiybLhn+Ose9bg1PPS0qHquC4vlP4crui3fgaZR9+jIZfUdhpgxojewKIUSrcJqVVH77SXWDh6oSWr2QxKx+xNypvP9lLguW5wLwxZp9/PjCIZw5PLv+m15FQU1IxT96Mp6Rk6sDgKQsjHpW3Pme5fCReMpkSj9/Db04n9tHTUfbugS7vIArzr2LnZUxdEf6MVc9cutlBOc/ibvnMNzdh1K+5HUye59AONAbZcDZmLYCziSU3lnYduOWgXFHCtiZe5CnPtlGYWmE7DQft4910KtbiFhy70YFAlHDYm9hiC83F+JxqfTJSSLRW/f9KiyL1gQAAPsKK9mws5jTh1X3AlVGTQpKwyT6XKQFJDAQDXPllVfyxBNP1DskyOPxcNVVVzUp/QkTJrB27VouvvhiLMviqquuqjU8qDlJECCOyrbhpIEZvLd0F1URo+b41RMHEfA668wJiKgBUi+6C8vpw3AlkpnTDz2had3uQgjR1g4f2qMoCoqpY1aWkDrlF6gJyZR99A8UUycUNfhgxZ5a576/bDdjhmbhVBXcoTxMdxK6FsBTlY+leYil9EFJ6sHOEp1i14lkeRPIVJQjD+NxuHE4ffiHnkHp1wtxrJ6NDURPuJiXV0S4alLdhpv6RLQk0qbdjeXyYzr9ZHYfQsxfveLb4Tf9jQ0AAGK6wYtfhiksrb5hOhCs4s0NifwivRJsi+NdnySim7yyaBtL1+2vOZbid3PPdSeT8YO9DyKxujsUl1fpKAocLIvy2MurKKmI4lAV/t8lIxjZNy1ulwAVDXfRRRfx0Ucf1Zkc7PF4OOGEE5gyZUqTr/GLX/yCX/ziF01O51gkCBDHlJno4fc/PZXNuSWUV8YY0COF3ln+I1aW33eJA4QDPVsrm0II0SI0Kwy5q9G6DEWxYliFuzC7jsJ/zi3oDh82CkkX30NU8eEyLTKSPRwsDtec3y3Tj/ZdAFD0+oN4+40m4aQLCM75E1pSJoFJd7B8V5Rn31oHVAcc910/mj5Z1TfzDsXCPGxYjydWQtG8P5M8/nJUhxPL4URLzUFNCHDNBUPISnbR0M3hI4fNYWiJ+rrKk82ewq21ju0uiBJJG4nruwDAsuFAaZhQOEaK30NGkvuIAdDO/IpaAQBASSjK+1/mcv3EAbXOy0710rdrEjv2lQHV7+vwPmkoisJHK/dQUlG9w71p2Tw/dz2P3jqWgLv1bot0y8aybLwuR/WqO1T3crgcatyuv98RaJrGU089xbvvvsurr75as0/A1VdfzYUXXtjia/s3p46TU9GqFGw8lfuIeTOwHG66qUXkDE7G0Jq+rrZu2YTCOn6v86g7GGt2FPO7TXGcxGqtiX24759TFHCYUQy17SahCSE6Hy1STNm3n6DlbSJ2YCfuLv3xZfbDtgxc4RLsQAZK+UEcCZlEdRc3XTyc4vIIBSVhqiIG407IARtMdyLegWPxdu1DxeevYOtRks68gvywxp6DRST5XZSFYliWzZuLt/KbK0fi04NUbVqKa+i5mIoTU3VjaD4yL7+f4Pt/waoqJ23SLcTytxHb9zXpvfsR3bQEV9ehFDiy8HucaG14R5nggnNGZbNw5aEb93NHppOgmeholIV1XvlwK6s2Vc89UxU4/7ReTBrTE6dDJRTRCXiry6AoClv21L/s9NK1+7n0rL74DtvN162p/L8ZJ7B5TwnllTqDeqXQPd2HolT3SBwuHDXQdQta4ecjopus3h5k7uc7iMRMJo/tRf/uKby/fDebdgXp1SWRaeP70TsrAYX6/3Y2UFal43Y68LnUuF6ysy1omsa0adOatApQeyBBgKhDUcBZtJmC2X8iccwMXN0HUzT7MXyDxuI69XIMtfHbhBeUR/nL2+vYc7CCPl0SuWXa8DqTiwGcRjmRL9/EN+wscPkILXuDhDOuIeZOq/U6VzRI5Rf/wT/uMuxIJeGNn+M55VL0JkxiE0KIwxneDLy9hoOp4xo8luiBnSixEKGv38OT1Yvwnk0YQyay4UAlX23azapNB0kOuJk8rjfrthVy3sndAdC1RBIGnEThOzPxDzsTZeQUXv+6ivmrvsSpqUwa15uvNhwgryBETDfxlO2kfOV8fP1OxNr5JVYshmvAWMJfzUYZdhbJZ12FYpkULXgOK1xBYNR5RHaspnzFO6heP3tG/D/mr6vipouHku5vm8YRZ8EmzvFsJG3cAL7ZZzG6u4MTjJWYG3fCkInMWnQoAIDqXoH3l+8m0e9izdZCNueW0K9bEjdNHUZ6wE3yEcqR5HehOeoOLUryOTl1UOahAzaYps3E03qyfmew5vDpI7qQktDy8wIUBT76Oo/Zn+6oOWZa8Mi/V2J8t4nahp3FbNz1FQ/8+BR6ZdYd2hWKGLz16Q4+X7OPxAQXt04fzsBuyTKUSRw32SxM1GHboPgz0FK7UL7sTYpefxDb0PH0PRnT0fgfEtOGlxZsYs/BCgB27i/njcXbqW/vSMUyMStLCM5+jOI3/wej9CCKXfeVim1ilORT/Mb/UDznfzErS1HsQ+NAnXoFnordKIpdvRlOxR5cMdnATAjRcFq0FKMiSNmKd6lYvYhI7npsI4Z/7BWgOYkV7WVDgcKBkkjNDW1pRZR3Pt1O7y5JbNtXCoC7Kh+9MBdnSjahbz9l074wc78qxLRsIjGT2Z9s55Sh1XOoZpzoh3AFkbzNlC6dTckn/8GOVqJYBmZlKcVv/ZHiD/5B+ablpP3oJ6CoVKz+kPIV74CiEhp5Ff9ZWsT2vFLe+WznseYItxg7uRuByr1M7Bnm/mtHcd4QD97CjTj7nMSB0hgrNtZdfQ5g7uc76ZmTCMD2vDLmfLYDGxjcK6XWBmTfu/Ts/rjqOX4kA7slce91JzP1zD7cMm04l07o1+AN35qiOBRj3hc7ax6nBNwUlFTVBADfs21Y+GUu9XUErN1RxOdr9gFQXhlj5qw1lFbGWjTfonOSIEDUK+ZJJ/nMK2see/udBFkDmzRBLKqbbN9bWuvYltxidKPuzX3MlULiuMuwjRhWtIqkM64g5q277FbMm0nSmVdhRauwjRiJYy8l5koGQNPLCS99mcJZD+I8uBFXyQ6K3nyIio+ek0BACNFgtuLA23MYitODWVmKf9h4SMzELMmjYtUHJJ44kdKYk9355bXOq4wYOJ0OCkq+mx+guaja9S3J42YAUKrXbXlOS1C5f2ICQ9wHqdi4lMDIc6sbNzQX3hPOJepOJ3HspdimjlkRJGHAqST0PxlP7xE1aajdhvLJ/gBFZdU3hptzS9DNtgkDYs4kEs7+L6weo4nqoKcOJHnafUQ9mVRGjrxbczhq4DpsBbpNuSXohkV2spd7rxtN9+/mS/g8Gj++cDAj+qYdKal6aapC/y6JXDyuF6cMzCDgaZ2BEZVhHeOwv0VqooeCkqp6X5t7oAKjnskdO/fX/pxFdZPyKgkCxPGTIEDUy1W6k+D8p0BRUX2JhLetRF//IQ6r8RWNz+XgjFFdax0bP6obbmfdj6EnVkTwnf9DS8nG1XUQxe89g7tyf53XuSv3U/zeM7i6DUZLyaZ47v/hiVV38doOF1pyNtgWwbn/R9Fbj2AbMbSUbGy1cbuECiHij+30EPr2M8DGN3AMFas/xC7ajVG4h5Txl1P6xRt0DVgM7V37RjQnLYGSigg9s6uHJ0ZdaSSffhlFC/8OikpXv1Hr9S5Npa+5k275iwkte4PA4NMo/2o+3r4ngqJStujveGMFFM89VDdWrllEybK3iexcXZOOtfdbJiZs5OS+1dc968SuuLXqBpxQ1OCb7UE+Xr2P3IJQq/QQ6FoAi+obetu2iTqqb+BTAu56W/UBMpK9lH43cRfgrFFda34remf5+e/rRvP0L8/isZ+N48zhOUfch+FYWnsETXLATYL30O/P/qJKemYn1vvakwZm1jufY+SA2g1i6Uke0pMaP0xXxC+ZEyDqp7lRXR6SJ9+BmpxNybtPoPoC2GrTdlucMrY3SQkuVm0u4LSh2YwbnoOCjREqBRw4rDAoTnRnEklnXoEjqx+26sTK+xbDnVwnPcOdTPKEa1C7jUAxY5gF29Gd1RWqqXpwjrwQX2kBVVu/BBtcXfrjO+1yoopsYCaEaJiYmkDCSZPwn+bFSuyCu8dQ7OTuqOkDQA+Rcs4NJHXLwllkc92kwXy+eh/ZaQl0y/QTqorRO/vQHCXL6UV1eUmadBspKT34tXqAd9dV4fc5mTo6g16JVZjp52CGSnAkppNy9rXECveQPvlWLMVBzJVM4hmX48jsh+1woZbuoXzFPFBUUqf8AhQonvdn3EVbOaXvCfTvk8PYYTnYdvWQzFc+3MKKDYeG4Pz3DaPpm902c6jS/C4uHt+Ptw5by/97V503kH2FIXpkBxg7PIexw2rvs+ByKHTNTqSwsKJDTYoNeDR+OmUoT72xBqju8dAcKhnJnpplVAH8XidnjupS7ypJg7olcduME1iwfDc9sgNccFov2QlZNIpid7CZJMFg6JjbsHdkGRkBCgsr2jobAHiMUqLOJGxbwW2U1mrNaQpFUbCpHupo2zaugvVUfbuYwFnXUbXmQxyBNOh/JjhcNX/rw9forpveodacH/7fVbKD4OzHsI3vejAUlbSpv0TPGtKkoU3NoT39rVuTlLt50mqMzl5/Qst9vg6vW35YH9V6zqEQipgUlYZxaiqZyR4cPxhs7jHKiDoTsW0Fl16K4UrEVtTvbvhsXGW7wZdCVEvGbVdB8R70tP7YigPbrn19VVUIKBVEivYTS+kPCriKt4M/nagrtaaeBSgoi3DPX5fVyssZI7pw44WDMdtouFDUsPh6ayFvfLyN8soYXdITuPJHAxjUIxmnQ8WyqVWGw3XUusSyYW9hJWu2F1IZ1hk1IIPMVB9b95SwfW8pPXISGdY7jTR/3eFitcqsKNWfvU7+nYbm/1s3tg79oX379tUsEdq1a9djn9BAixcv5plnniEcDjNu3Djuv//+Zkv7cNIT0Ekd6cb4eES05JpWiKiW3FxZq6nMbarzhtNDJHc9kZfuwdYjpJz/MyzFUetH9mg3LoeX7fD/u41yit97GtuI4Rs2HtXjJ7TqPYrfe5qMax4m7Dy+MaRCiPh1eN3yw/qo1nOmjc+p0iMj4YhpRbQkHJaOpTqJOZNRbQNssFEBhWhi75rXRhUfpA367kJ1r29ZNs70bMrM6iE2ChBN6V+TscNz6tRUNIdSa0x6SqKnTVvS3ZrKuKFZnNg/nYhu4XM7cH23ys/35exst7iqAj0zE+iV5UdRDpVzzOAsxg3NbnigbtsdqhekM9m4cSOPPPIIu3btwul0ous6vXv35r777mPIkCFNSnvv3r387ne/48033yQtLY3rr7+ezz77jPHjxzdT7g+ROQGdkNMox7H9M1xmJe7KfWj7vkatdw2etmfbYCd3x9trOLYewRFIR+s6GKsZPpoRRyIpF92J/8Tz8Zx6GdrIKSSOmUbq1LuIuCQAEEIcmaYcWmXMgXGUVzYibSuCuW4+rpIdaIqJsn0JWv5alEbU05oVpfzrD3BHg7hiJTh2fI5mRep9bUqCi2vPH1zzONnvYtzwnDbvHbJt8DgdJPucNQFAPLBtu85739Z/C3FsGzdu5KabbmLz5s1Eo1FCoRDRaJTNmzdz0003sXHjxialv2jRIiZNmkR2djZOp5OZM2cyYsSIY5/YCNIT0Ak5qoopXPxv3L3XoOfvQPUlkjy1H1EtqdmuoSjgKs8FpxfDnYJWshMzqRuG48itX/WnY2NtX0rV9q8JnDqV0NcLqFz2Op4zbkCn6ZN3o4GeOE/uUpOWY9gkdNUprSdCiCNyxUqIrl2AZ8QFEC4ltm8TjsHnYCrNs9a+Fi6iZOV8Ql8vIGH4BEKrP8CRlEnKjD7HXU9rsTKCn72Gw58KDgdGyQHSuwzE8NQ/UXTcsCz6d0+iokonJ82HvxV3yBWiM3jkkUeIROoPtCORCH/84x95+eWXG51+bm4uTqeTW265hfz8fM466yx+8YtfNDq9o5FvfycUTepJ8jk3UPrRPwFIv/wBws0YAAC4zBClC/4Kto1vyDiKls8mfcZvMNIGH/vkw9i2gtZjBFkz7iaaPoi0nsPB6SXaDAHA94zD0jIVZ+frWxZCNCtVr6Ry4xIiu9dhhkpxZvYk0H8sprN5goBIQjfSpt5FcM6fCK3+ANUbIHXqL9FVDw5br66nGpqWJ5OsGb8i/5XfA5B+yb1EPFlHfL2qKGQne8lOlsURhDhe+/btY9euXUd9zc6dO9m3b1+j5wiYpsmqVat4+eWX8fl8/OxnP2POnDlMnz69UekdTfz0u8URV2U+5UvfQvUlVm8as2IuTqN5J09FVT8pU36BURGkfPlsAqdchJHat3FpudJIGHQqpq0STe5HNKH5JtcIIcTxivq7k3LuTzBKDmDrEZLP/QkxZ3Kzpe+wDcyygprHth6FSAX2lk+xt31+XMOPXNEgwU/+g+L0oLi8lH32Kq5oUbPlVQhxSGFhIU7n0YN0p9NJYWFho6+Rnp7OmDFjSE1NxePxcO6557Ju3bpGp3c0EgR0RqqGK6cvaZf9jtSL7kRNSMRWmrfTx2HrxPauB8sARSW89Su0aGmzXkMIIY6HSy8jWpALgCd8AKdZ2ah03BV7KPnwHzjTu6G4fZQs/Bveqrzmy2f4AKWL/40jMZ3k0y/BtkyK33saFZPKbz9B00MNT0zVcCZlknbJvaRd+t9oKZmgSie/EC0hIyMDXT/yJncAuq6TkVF3c9OGmjBhAkuWLKG8vBzTNPniiy8YOnRoo9M7GqkpOqGoJxPfuT8jggslOxUtcwBGM41l/Z5qRokc2EXi6Zfj6TOK4nkzIVoJcb5fSVXMZF+wEqdDpUuqD1cjN7ARQhwfDYPI13Mp3f0tKefdSHD+0/hPvgB16CSs41wO2HYH8J9wForqwJXVi9iBncT2bkIblI1hN/1nM+rLIXXy7ahp3VBMnRRVA1Wl/JsPSZ1+L5Hj6HWIOZNIv+BmikPV4xy9428k1kz1vaKAw4phKC4cdgxLcWLTtksrC9GWunbtSu/evdm8efMRX9OnT58mLRc6YsQIbrzxRq666ip0XWfcuHHMmDGj0ekdjewT0M40Zi1cRQGnWUlMTcBhhkFRMdXmvemvj9MIYTlcmIoLj1lBVAs0esJtR13v+XDlYZ0nZq1mz8HqVrwzRnbh6nMHHDEQ6Axlbgwpd/Ok1Ridvf50hw9SPPuPWJWlaGldSb7wLqLuxq0E5lEilC94htj+bbh7DcN/1k+Jqr5my6uiKLijhUQ3f0FozUfYho5tGqRNuQM1pSsRd3VLojtSgKX50DX/EdNqie+UooAzuJXozm/wnjSF6IZP0NK6YeSMaDeBQDzWJa1RZpvq3zPNoeB3O+vdo6G1tad9Ar5fHai+ycEej4fnn3++ycuEthZppuzgFAWcBRuoWPg0XqMYY+18rI0f4bBi9b7eaVTgCe1FVRXcVfm4GzB2tLA8yqdr9/PRN/vYur+cikj1eFVd82Mq1ZuZRByNDwA6i+37ymoCAIAv1uxnf3FVG+ZIiDhj6tX/ADsWrR6u+J3CiihfrD/A0g0HCVZEa53miR7EV7ETlxHCW7kXT+UeduZX8IX/fDYM+3+Uebtj7V+PtyoPVbFwR4twV+U3KouH9g2zUUKFlH81H9WXRMr4K1Gcbko++ifG7m9wl+/GEzlIyZzHiCx/Dadx7CFCGrWHKWh2/b8DDWLb2JEQoW8+oHjW7yhf9hZWVRkK5rHPFR1WWZXOvxZs5q6nvuDuZ5fx6dr96Gb7XGK8rQwZMoTnn3+ewYMH43a78fv9uN1uBg8e3KECAJDhQB2ebYPiTkAv2E3hS/dgGzFSzr8ZS627s6+qgrVzBcVfzCL1vJso/uwV3N0G4T3rxiMuxxkMxfjDP1dQ9d2Nf5LfxdQz+zJuWDZOtX20BrUXMb1uRakbUnkK0Ro0DCKbl6AF0kmadjfF859Cz12DOvg8iip0HnxhBZXf12MJLn7/01NJ8jlx22HsolwOLnweX7+TUYA9KSfz0MKKml6TAd268es+AWLzn8Z/8mTK1iwCbJKm3Ve9mdcxuCv3galjJPVAK9iEndSFmCsFI60Paef9BKOskPJvPiRz+q8wyosIfvB3FFVDcbqxqsowSgtQzNhRf7GdRojYN3PxDDmDWKAnrrJdRLZ+iWvkFHTt+JZuBrBRULoMw9NrOJHd69BSc9D6jCYqtw2dlg289ekOlq7bD0A4avDv9zeRmuhheK+Uts1cOzNkyBBefvnlFtsxuLVIT0AnYAWy8fYehW3EUH2JaF0GY1E3CLAscPQ5FVdOf4oX/AVFUfGPveyo6/HvPlBeEwAAlIVilIWiFJSEW6QsHVmfrol4XIfe964ZfrqmH/+PrxDi+BlouEZeSOaMXxIJ9CRl+r2oA87EshV2H6ioCQAAyipj5BZUDy2IKl6U9J4kj7mYqi1fEs79lg0VybWGTW3NK2e/ko2r+xBKP/onRskBks+/tUEBgJMYlStmE3z7Ucy171I0509Y+9ajqqAU76Xk09co/3ohRkk+sbIilJwheHuNwNYjWFVlaKldSDz/NmLu1CNew7Zt7D1fE1qziOK3H0XLW0Vw9qOEvlmIvX8dinL8DTaqYmNu+4LI7nV4B56GUXyAyOr30Jp54zTRfpSFdZZ9u7/O8S/W7sMRR5u4HY+uXbsycuTIDhkAgPQEdHiqqmDvXEXVluUkjplOxdfvU/HZS/jOuRmduvMClFgFRnH1l9yMVGCFilDS0o44lMftrK9HQcFVz/F4l5Xk4fc/PZUd+8twaQ76dU3C55L3SYjWomt+XGkBKKwg6k6vOe6p53vo1qqPKVjYoWL0YHW9aBs6fmfdHjyPBnr+9urXmAZm8T7UhGws++g3Rzou/GdeS+z131Hx5Rx8A09D6XUKlgUYUVxdB5B41nWE136EFQ6h6VXECnbXnG9WFEO4FI4yWVhRFJQeo/D2W094+0qK33saAO+gsShdhjVqTLdlK3i6DiFl4k0ovU7B2/8UVH+a9AR0Ypqq4HY6iMRqD/lK9rvbxbwA0fwktOvgLMtG7TqM9Ol3o55wIWnT78V/2oz6AwDFxizMRfX6ybz+MTw9TyC6ex0OM1pPytV6ZwcY1ufQxLoT+qfTNd1PemLLTzzuaGwbMpM8jBmcxUn900nyNd+GZ0KIxuuVFeCEfoeCglOGZNEjq3qirWaGsStLiORtJfW8n5I8ZhqDXflkpx7aTOuSCf3IVkswq8rIuPohEkadR3jrVziMY/eIqpgY+zdjVpahaC7CO9egVlTPJ9Azh5Aw4UbM0gN4+5+Mu+/JlMz7P8yKIK6cfngHnIKtRwjOfhRv7Ojzt3QtkYQTJ9Y65h95HrrW+AmQEV8OZq8x6LaG3mUU0cSejU5LtH8Bj5NLz+lf65iqKpwxokunXlAgnsnqQO1MQ2bAu+woMcWNioVixTDVhq/LqWLg1ENEnck49TJshwtDPfrOkVHDIq+okqhukpbkISPgobmnA8gqD/FDyt08aTVGZ68/4cjvc9SwyC+uQlEgJ9WH67DhDU69As2OEHWl4jJDKLZF0EokP1iF162RmezB6QC3XkZES0Yzq1AsHd157J3YHVYUa9PHWNFKfCecS9miv+M/eTJ65lBsGzyVeyl87Q8oqkratLuxy/Kp+PZTAqMmoiVnEN71LVpKNlpKDoY7pd5VgjIyApTv+Jbg7EexYxEUhxPb1FHdPlJn3EPU36NJ72l7FY91SUuXOWZYbN5bymer95Ga6OHMkV3onpFQPWGgDbWn1YE6E+nX62Dclfuo+OJVEs/5KcaBbRhFe3GMmIx5jBv571loRL/rVm7IDxhAghplYPRb1C5DUKwQVt5arG4nHtfW9kII0ZbcmkqvzPqX2dSdAXSqbwoijup60Qf0zTl0o2DbENGSATAcPuqZdlUvU3XjGHIuqmUSdnjxT7wN3ZFQMwTTdKfiG3o6Vd9+StGbD6H6EvEPOwPV6eLgqw+SeNpUVF8ShbMexDdkHK5Tr8D4QcOP/d1KPrYewztoLIFxl1Hx2SuEd36DHalECRD3q7eJhnFpKif0TmVUv3Rs265uNJDPTqclQUAHpBfkEpz1W6xwiIQTz0dr4S+oFiml6OMXcWb3xY6EMEMlpF3eB9Pd+B3xhBCiPdI0FdO0mzQGWlHAYVShRUuI+buBCa5IkEhCV2Jq7VZVXUvAf+p0wlu/wo5WYVWV40jugp3SHWdGN8q/fAd4B1QNz4DTiNXT86soCnrWUDKu+B0kpBBWE/GNv47EMRdT5e0qN3HiuJmyLGhckDkBHYzuzyEw+kKscAgUlYSRE6my3S1ax0cTckidcgexvE3oRXtJu/hXNRvZtAeHr3zRiEUwhBCCqGGxcmsRj/zna+Yt201p5bHX2FfV6ppXUaonGAMo2LiLt6F/8w5Fs/6AK3819pbFFLzyAM6D6+vUUU4jRGjZG9jRQ3uKlC7+N3a4nMSxl9Yc8/Y7CTu97xHzYtsKtstPxeJ/4QkfgPIDhL75EKcRX8NlhBANJz0BHYyzeDtFX8zCN3gs5Y5U3ly6n8XrN9KvWzKXTuhHRgtM2HVFiihb8jqKywOmSfnyt/BPuJGYK7nZr3W8HLaBum8NanpvYu7U6h9ZfzpRX05bZ00I0YGs3Rnkb7O/BWDz7hK27CnhzstG4DhCy4IrVoy+ZQmuIRNQQoWYJflYvU7DaYQo/XwW/sGnYlsWhXP/DICWnI2SlF1nWI4jWkp4ywoUp5u0ab+matNSwpuWopQfIPjhP0HVcPgSCW9dgTO9G44h5x15R3jLxCjOJ/jGg9iGjqv7ENyWbO4lhKifBAEdjO3PIuX8m1G6n8iStQXMXrgNgFWbDnKgqJL7bzi51oS3ZrkmCo6EZJLOuwX0CJVrPsRuJ03uroq9FLz3DM6MnvhPuoDgB8+jJWWSfMn9RNX6x/8KIcThHA6VT1fl1Tq2cVcxJaEY6YH6b7iVcCnlK+bh2v0tetEenNl9CXQbTlRLIuX8m6lYMgtf35FUbVsFQNKZVxLzZtQZmhP1dyN12t0oDo1Ycm88o3Nwdx2ImtoF1ZdI8nn/hRrIpPS9mTgSM7DUI/9sx3yZJJ15JcF5MwFIHHsJkXbQWCOEaJ9kOFAHE3MmYfYci6V5WbahsNZzeYUhyir1I5zZhGu60/Cd9/+IJnQlltIX7/gfN3hScUuLBboQOG06emEuJQv/huJ0k3z+zyQAEEI0mG3bDOhZe0fUxAQXPvdRbriT+5A84Rpi+duw9RgpP7qRmJaIZlYR2b4SLSmdqm2rUH2JoGoUv/8srpLt9Vwb9LT+RJN6V/9fS8DsOYZwQg9SL/sdsbRBRFypJE35FVaP0fVuBPk9d+W+6ut0G4yWlEXx3CfwxIKNf2OEEJ2a9AR0QN/P2D9pUCbb9pbWHM9M9RJoobXpv993wLZBVxq+JGlLsxQXzvRuNY8d3kTwyNJfQoiGsyyb8SO7sGVPCVtyS0hMcHHHpSOPutmfqyyX4OevVS/dWVZI4eL/UDrkEtLSkvEYOs6UHLTkLJJOuRDVl0Tww3+AVn+vwg+HCH0/KTniSKw5FtWO3fBiuFNInnANarcRKGYUq2Anuqt9NNg0lGbHMBQXAE70o+5oL4RoGgkCOijbhjFDswmWRfjk6730ykniJxcOwaM1vXNHUZTqiW6WjqVo2DZoGBjt8OPirsil8L1nUFxefP1PoXLDZ5QteJqkC39J1CG9AUKIhklJcPHLy0dSGorh82gkuB1HX1bTnUDC4HEc7HEOVvF+PFX5vP5FHv176Uw/7XwsRSO554nYDjc6DtKufKjmpl5TTAy78buJK1gotoWlVNfJll694aPh8KH2HY9h2eAEpWdm05cGVaAqaqIoCqpi4XRoqN9NhFawj9ozcbzcFXsIr1+Me/QMVCNM5YrZ+MZcRsyVWu/rY7rJgdIwxeVRfB6NLmkJuBztY6iqEB1Bm9zVPfPMMyxYsACA8ePHc/fdd7dFNpqdy6pCd/iwbXDZYWJKw9bu/56Nze59JURjJjaQ7AHHYS1HLiLE8KBgo1lhAh4fV0/ozsWn98TldKKYEXYcqMC2LNxuJ163Rk4ACkKgAMl+Fy67Og3NDqPaFjE1oSZdgMLyKCs2HqAqojN2QCLpAQdVlovU8F7cKVlE3Wn1Z76NmN50Ek44G++QMzADOTiSMnBm9iamtf3mJkKI5uO0wujf7YdyeJ11JFHToiwUIzHB1aDGEc0Ko6ou0hPdeO0KorYXh22i2BaqrRM0PER1yPFb6IqHIpKJDLmUdz7dyebcCJmBDLYfCLHjQIRzT+6G1+lAUTRcoX2YB3ZjZ/XGpwRRNCehdZ/iPnEKscOGVSqKQnlEJxo1SQm4jjghWcFCy1+LFa1C7XUaTr2Mog/m4xlxPrbqBJSaRRsaEgAYlk3Jd4GP/weBT2mVzser9rLoqz04NZXTR3Yl2e/i5EGZZJetxzZj0OOU4woEwrpJRZVe/Xt0+IZtdozwtx9Tuf4zzMoy9OA+zLICvP1ORumWVmvJ1uJQjK15pRSXVwc/X67PJ68gRJ+uidw6/QRSE1wNzo8Q8azVg4Bly5axZMkS5syZg6Io3HjjjSxatIgf/ehHrZ2VZuWKlRD69EX8o6egOJxULH0d/9k/JdbAm+aIbjJ3yS4+/GoPDlXl/DE9ORis5LJx2aRnpOOqOkD5xy+QdM5PMcsOEtm2Et9p04lu+AR/IB16jOKVxblEcFEZ1vl6cwFup4PLzh3AohW5FJaGuWRCH840lpE6fBxmcA9V21eTOGY6JYtfJPHsn1CoZvDQi19RUVU9r+DDrxQuO6c/sxatZWjPRG6eBP7mX3yoSXTNj+vUy4l+12XsGHoeuurEtqU1SIjOwqmXEf7iP/iGT0D1JVH2yb9JPPdGop7Mel9fWB7lyTfWsr8oRHqSh59cNJRQlU7/rkkkfTdk0m2UVW/apWp49GIq136EK6MHnvRulC7+N0ljp2NWlmGWB6ks3ENl+kjKCeDf/xHbB1zDX+ZsJBw1GNY3jdOG5fDhij0AdM1IwKWpaOgYa+ZT8s0HJI+bTnTLcipWLyL5zMsJ71iF4vLiOHEapq2iKPDt7mL+OvtbwlGDEwdlcN3EQSR66w6F8YQPUjD/abAtkifECO38mkjuemIFuSQMPJVw3lZ8Z92I3oCd5EsqdZ6ft54tuSUEfE5uv3Qk/bsEsO3qJVP/Onsd2/LKgOqdZD/4MpfThuVQVhFhctVirH0byLiqK5EG7ki8v7iKJ2atobg8QrcMP7dfOqJmRTtdceE+5RLMyjIiO1cDkHj65VhdRtQKAAorovzPP78iFD40/+3Sc/rz3tJd7NxXzvNz1/OrK0ahNfe29kJ0Qq0+MTgjI4N77rkHl8uF0+mkb9++7N+/v7Wz0QJUwCY4+1GCbz2MbejHtWj9zvwKPlixB9sGw7SYv2QX3bISeXNpfnWDtqJiVVVQ9PrvKX73SVDV6ms6nJR+/C/2rF/HR2sKSU308PXmAgCiuskrCzdx2vAcTMvm9Y93kO8fTOGbDxNc9C9sI0po7SLMiiAoCvsKQzUBAFSPk62KGABsyC1n6dbKdrkOv3HYmFFTcUkAIESno4CqEpz7fxS98SB2LAxK/T9fumHy5ifb2V8UAqCoLMKsD7fy1caDPPqfrwlFDVyxYsreexJ1z1e4ircTfON/8HbtR8nCvxHevhIrFqbwnZkEP/gHlm1hWgq+5X8nZ9UzlGeN4um31hOOVteN63cEcTkdeFwOkvwufnxON5yKhYETT7/RKJqL0iVvUbF6EVpSJhg6akIq7qETMO3qMhRXxnj6zbU1aX6zuZC12+uf0Bv1ZpB8zg0AlH7ybyK561G9fhL6j6Z02Rz8J07CcBw7AFAUhSXr9rMltwSAiiqdp99cQyhavaRoXlFlTQBwuBUb8vF63ejp/Uk84wr0hOxjXguqexyem7ue4vJIdfqFId75fEetDlvVCKMXH7of0At2o5qRWnleufFgrQAAYOHy3Yw9oQsAW/eUkl9chRDi2Fq9J6B///41/9+9ezcLFizgtddea/D5aWntdZx3AOeYizmwex02kHL6dBK69qx5tqi0iv1FlaQmeuiWWXfiasWWwjrHTNNi+74KXB4XgdReMPZiit7/GwDJp0zG06U7Mdd4QivnY1ombpdGVaR25WjZYFqHqtmQK52M78aPJvQ/meAn/yFtwrUEuvYmVa9b4TsO667dllfGNZNabtJtRkb8TeiNxzKDlLuttN/681gCeE6bQnjrCmzLJHnsdPxde5FYT6tEeSjGzn2167K8whDD+6WzatNB8ovD9OifitFrGCUfPA+At/cIXKldQFEJrf2YlPFXEVxY/ZyV1psyT18Stn+JHYsQCvQiZuyolX6ooorfXZj8/9u788Co6nPh499zZp/JZJnsCZusYgREREVBFiWiQRZFBb2oxdtXrS2320ut5XbxtS16e0Wteu29Vmtr+7oU0UoFrQhqQUGQRZEdExISkpB9JrOcOefcPyKBQIQEJwszz+cfMoeZ3znPZObJPPPbcNTsI6WsmNTBt2BxezFShhIquJymrasBSDp/Ak3b15A+9RskHff3oepADVq07Q6tpVVNZGYOoT2abSxNH72K7q9vafe88QR2bwDTwO5ykpzRsdfZnuMWloCWQsCg5XW6s50CAFqGGHmdFuzle0m+7B7svo71dlfWBjhU5W9zbF9ZAy63Ha/HgR5upvr9V9Ebqki7Yh6RygMEdm/AM+QiMi+Y0vqYksMnb37W1KzhOm4lp6jZ8++1rhTPsZ1KosbdlXpspufevXu56667WLRoEQMGDOjw42pq/BhG7xvs7QxXcWTZw9hzB6PYHFQtX0rGvF8QcuUSCOv89q/b2FNaT5LLxuI7LiYrpe24mvwMD4pybAyn025BUWDqhdlEwxr+6n0cWfnfuIZeglZZTOUrS8i4cTF1q/4LxWoj16Vx4TlppHrbfgOUkmRH01q+2bFZVTKb9+McNBq97jB16/5KxjV3c2TFk6jpfUn3nsONU4bw1zV7MU24YEgG1fXHvlG5YmQW1dVds/tkZqa3y9rurRIxZpC4Y9XWmeit+fN0nFoNNa8swZbRF0tKFtUrnoDkLELu/JPum5npZerFfXnpnb2txy4tyGH7viNAy8o7dQGw9x0BG/7W0v7AC6j94CVUl5f0qXdQveJJnP3PR/fXYWssx7vp75gpOejJuaSXryMvYyDlR47lxosGenCv/QWK3YF7/kPUBlQs/lrYvZqmrauxJKVihEPUr19O2oSbOPLW79FVO1r6MEwTkl1WctM9VNQEWtscPaz9fGvXGvCvfbalAFAtYOg0bl5F2pW3ESzdTe17/x/P1HuJdGAVt0mj89lx4FiPw7n903BZVaqrm0j7irGfLoeV4b4Q+sYdVL32GN6r7/nKibvHUxWYeGEf1mw+th/D1LH9CAc1Qs0tuzO7L70J56AxGHkX4Bh8GY6Bo9FzCto8D2POzeLjnZVt2h6Qm9xaYKiqgtdpjdscI/kzdu2JHioCNm/ezMKFC7n//vspKirqiUuIOc2eRlrhN1EyBoKiYlbtJepoSYxV9cHWb1z8QY3iw41kpWS2eXyfdBc/uX0sb20owWG3cu4AH2o0woWDfBiGSdSVju+aeyD3PDxaM2ZDBSGHD+/lN6HY3RieLOZnlFGmJ/HtOSP5YFs5vmQnl4/MZc3mMiaMymPq2Hz6R/fBiG9gCVRhBOoxc4bju/Zb6J4srKrCtIv7MHZ4Fno0ipMQ7+9sZMQgH1NGZnLeCetoCyFEd4jYUkmb+q8oaX0xLHbcw3YRdX71B88JI3PxOG18+FkFA/NTCEV0SiubuHBYJn0zPTgitRz52yM4+o/Amp5P3ernyb75J+g2D4bDS/pV38Ca2Q8zGsEIh0iedDulWioH63TG+Wr5/sgRrN1WScnhRqaOzqLPzj9hzxtEpOIA/vUv47zsVqJWF7bUXKy+PJIKxqM6PDRs+BuWpDTMUACj8QiKbzAmFpxWle/NvYD3t5a3tHlxP4bkJbcbmxINEa0pbylYbvgx0cN7qXvnObTaKrzj5mCgdqgAABgx0Me354xi7SdlDO2XyuUj8lrH0melOLjpyiG8vHpvm8d885qBDMh1UOP1EW2oQomGoSPzcE2YNWEgueketuyp4rIReVw4JKPNeP+w3dc6CVi32lD6jWvz/wAFA3xMHJ3Pe1sOAZCZ6uLykXn85e1dAMy6YiAZyQ5ZGEKIDlDME99hXayiooLZs2ezdOlSxo0b1+nH9+ZvshRFaU1Yx/9c64/w7//zUet4z8XfGMvAbO8JjzVxNBTjzO5HuKEGbG7CliQM46vab+k1UBSwRhpBUYnavDgiNWj2VKx2O7puoOsGqtoyX0HXzdY2jvait7ShnJRoj55PUVq+WdF1s937xEoifruRiDGDxB2rts5Eb86fp3N8T+nxP5/o6POsKC3DGf2hKAcr/VhUhfwMN05by0o2zsYvMFw+DIsda+Mhoqn9W+cXqaqCYRzLky3/KihKy3HTNFFVBVVVsPgriexZj31EIWb1AUwtRLTPaExTRVFMXFodRnMDpisVi6KgGwZKsI5IyoCTVtU52mb0hKFBJ3KEqkDXCHvyUYniDpQScmYTtbjP6Lm1WtV2c3zUMCmp9LP7YB12q8rwfAd9s1MxFBuOYCWYOmF3XqfOdfT3croYT8UwTaoaQtQHNIoPNbBs7T7SvA7mTBnMyIHprb/jeCT5M3btiR4oAh588EGWLVtGv37HVhOYO3cu8+bN69Djz9Y/YofrgxRXNJKV5mZAtpcTFy6wGiEiG15CO1JKtLYc38wfEE4ddNp2bUYzwfefR7FY8Vw4jZrlvyFt6p1oOSO//vrQ3SgRE1sixgwSd6zaOhNna/7sjPaeZ0VRqA9ECISiZKQ42ixNGQsWxfhykq+JgoF5Bmvnq0rLHC4ABROTji9w0NXvqVMVXT0pM9PL4apGQhEDm1WJ+e+1N5L8Gbv2RA8MB1q8eDGLFy/u7tP2uJxUFzmp7e8b4NTqiNhTcBVcga3scwJ6FLztL313oqjFhfu8CdS8/gjNO9dhTc9HSc3rlQlbCCG6y7HeUoVPi2t58q/bCWs6A3K9fHvOqJiuJX90lZ+Wj++dLwDsWj3RfR9iHzoBpbkWvfYQRv/Orb/flXrz3xOLouBx9I7nSYizTfyXzb2cI1RF7SsPYCndhH54L01b/kHKxUXQWNGhx5umgprkQ7F+uc26Lw/jy+XhWrqwjw4f6sVZXAghYsARKCN0aC8WRcde/TnOaD0NzRpPLWspAACKK5r48LPDKL1ovWM1VE/DumU0vfUENa8uwb/tHaya//QPFEKIr0GKgB6m25JwDhlL7ZtPUbf2L7iHjoXMIWhpgzv0eIcZouHdZ7Ekp+O75lsE921GqdyFqoK9/gC2w9uxKRqWA+taxnAKIUQcshNEr9hN05Z/oH7xEXrNQYIfv0YkFCQU0dvct7iiEbWLNpM6k3bDyQNIu+oOImW7MMPNpE27p81uwkII0RV6bIlQ0UK3uLDnDCbA2wA48oYQsSdjmqeuz1zhakKOdHTTJG3y7ehWJxFnBpnzcom6M1D1CKE9H+Hf+g6uIWMI7vmY1KsWoAzKks20hBBxx6qHqP3sfbSqEtxDGojUV2N6fGTb/Qzvn8bOLzfFApg4Oh9dP/OJqe2JGga7yxrZtKuSATnJjB6S0e6Ov+1xNJVx5L2/YEnJRG+qo+mfL+GaMB/N1v4KQUIIEQtSBPQwZ7iKqrd+h3vkFBQUjrz5NFnzf03Qmf3Vj/GXUvXSL/BdfTdadQmNu9bhm/PvmKZCKKlv6/1cF80kFAjgd2XgvfAaLIMuQ5cCQAgRh0IWL2njb6Rm9fNUpY1idbOX/eUaVx9UuHNGAZ/srqbkcCOXnp/LsD6x/3D9eUk9j760FYD3OMSn52Zyz8wRWDqQck2bA9egMbgvuQGz8TBaxV4Ma/vr9AshRKxIEdDDQo4sMuf+jKgrA4DkCyYTcJ56UrDu8uEePp7aN58AILXwm0SsbWe6WwyNssO1vHB4NLvK/IwblsL11VV4M3KhE6tOCCFEb2eP1KEoJmY0gmPSnfzXG/WUHakF4L9f/5x7bxjJ1DH5qGofdD3286OsVpV3Pi5tc+yTXdXUXxki3Xv6NfvDjkycE24nbFohPRWLbxC60rFeBCGEOFMyJ6CHmSiEkvoRtbiJWty4+g4/7dJwpqKiOo6tB63aXZhK219lRLHw+7VVbC9uJBI1eG9HHf/Y0YzSReNghRCiJ9iNZvxrnqN53YsYIT/lZZWUHbebL8CHn1WgqmprAXD8pOBYzA82DJN+2Z42x7J9LjxKqMNtaOax7+SkABBCdAcpArqQooCzqQRnuBorEZz1+3AYHVvxwRk8jKO5HFUxcTcV42rYj6KAw1+KPVCBf/NKUq9agHvEZOpXP4dDa2jz+EDIYPfBtsc+3n0ErQu+BesIp78MR6gaFR1n/T6sevPpHySEEKcRUd0kjZlG895N1Lz9LMl2gySXDZtVxeVo+WA9cnAmhmEQ1HS27K9h2QcH2Hqglq0HatiyswxLzR4sZgRHuAZnY3GnCwPDMJk8pi95GW765XiZVziMMedmsbU0TFMo2gVR9342PYCzfh8qOo5gJY7AoZ6+JCHECWQ4UAwpivnlesoKqmJgjwaoXfEYqsODa+Ao6jauIKPoXhxp+afcZdFOmKb3/0yk+iApk24lWFVM4ydvkzZlPjXvv4hnxCSyb/sVIUcmzv5jcF9wNSFbWps2PE4LwweksbP42GS4SwpysKlKt6/57DAD1L/1FKZu4BkxieoPXiR91g8h6/zuvRAhRK+nKArhqIHVAuppekUdhp+wmoRqd6OoFkzdIKl+Dz+YNxelthSrHuKzQDqTfeVYw1ZWbGlm2Zp9rY+fMWEgk/sEqF/2H7gvnkWgZDtGoBbfzQ8QsnRu3kCOU+MnE032kcfSZbtbj194bhb3zCzA0ouWJO1qigJK1R6q33iMlIm3UrflLSwuL8kzFhFW2t8vRwjR/aQnIEYUxcRa8Sm2ys+wE4YdqzC1IOnXfRftSCmNG1eQNGoKTZ+uBf3U3wxFcOCdfAeYOnVvPoUlOQN7/mDq3nkO1ZWEc9TVNDtzMRQrmsVD2JVzUhsWRWHB9AIuOS8bj8vG1Zf2Y8qFfXpk05ew4iH12u+gN9XQ8MGLJF04DT2jY0ugCiESR0gzeHfrIe7/3Yc8+sp2KuqCX3lfR3MF9ct+iTtUTsOa57Fl5JM+/ds0797IMEc1+TUb8H70NFPVj6j/++NoBz9j4462+698+GkF/zzkJHr+dJo3voZWeQDfjB90ugAAUPQQ9vKtrPu8rs3xT3ZVUdMY7nR7ZzPTBCN7OJ4Rk2l4788YQT8pV98jBYAQvYz0BMSIRQ8R2r+J5s//iSN/GOGynaT78gj761vvEy7bTcr4G9GS+xzdw6tdCgZ6zUGMYADF5kAxDbTKEgD0pjrM+grISD3tNaUn2fk/MwoIRw1cNgtmN1cAqqnBl3MVokdKsOcMJHxoN6EDW0k9/0p05+knzAkhEsfnJbX8aeUuAGobQzx0eDO/vGscHvvJO8KaVgeqy0vlCz8l5eIiLKnZGDkF5N3+K4KWVDzjbiJcugv/J2/hHHwRDLyUyLtb2rSRluykv0/BuuPT1mPh0h1Yzs3p9Lj8iCMDzxW3kb2uqs1xu1XFYUu8HW0t4QZCJS3Pq6mFiFYdQO2X3mt2QRZCSE9AzERVF0njbkR1uAmX7SRp9NWoWYMJf7GF5IuuwTd1AUYkiNF4BFvN3lO2pRoRIof24Bkxmcx/+TWGFsKWnkf2N/4T1+AxaJUHsJhah65LAZxWtdsLAIsZhX0foHyxHrsZQgk3Y8/uT/ZtSzANHYL13Xo9QojeTVUVPjtQ0+ZYYyBCbWP7k2s1eyqe8yeCEaXho9dRU3KIqi6ceYMxFAvawe3ojdWoTg+hL7ZirS/h7tkjWj+Qu51WLj4vmxQzgCXSRNrcB0iZeCvh0s9Ro1/dA3EqIcXDhFF5ZKY6W2P61xkFpHoSa6KvogDNdSiqhazbH8Z7URHh0p2oemL1iAjR2ylmd386/JpqavwYRu+7ZKsRJLL5dQJbVmHL6INWU076rB+Arx9KQznYXVicXiKHdqHmDSdiS223ncxML9XVTVj1IJg6UWsSTr0R0zQJW1OwRhtBdRBVe/ca0s5IDbV/fRDdX4en4AoCn/8TxeYg4+afYdrdhE/obj8adyJJxJhB4o5VW2eit+bPozbsruJ3yz9rve1xWvnVPZfhdZzcae0KlFK/+llSLplB49Z3safn4SqYSFK/YdRU16HsX4feVItrxBSa3v8z7hGT0TLPpS4Qpa4pjC/ZQSSiE44anJOsodlSsJgaqh5Es369fQSCEZ3K+iBJLhsZXjtdvSxzb3xPKYqJI9pEyJKM1QiiGDqaNSmm5+iNcXe1RIwZYh/3mebQeCNFQIyoGKilH6MoKmpeAaFP3sA5fAJhT9sJwKqqnPL64+kN7gwe5shLP8eMhACFzHk/J+Tt3+594ynujkrEmEHijlVbZ6K35s+jmiNR/rGpjDfXF5Pjc3PnjAL6Z3ranctkMyOo5duoeet/yJjxbzRtXolp6GRf/0PqghZUoihGFF11YjOCaGr8jkeX91TiSMSYQYqAriJzAmLEQMXsOxYAExXb2BtaNn4BHOEj6FYXUYsHR7CKkCPjtHsBnO0UTPSGw5ha5MsjJlr1QVRvHxkTKoRol9tuZeblA5g6ti92i4r1FKuZaYode9YQbLmDqX7tEdLGz8GaPxzV4cJT+wXN7j5YzDDWcDVhR2b3BiKEEGcBmRMQQyYq5pdPafRoARBtoP5v/0lk03JsldupeuHHOBqLe/Aqu4dDq6Vu1dMoNgfps36AJSmN+nf/gCNU3dOXJoTozUxw2yxYO7SxoYKiWki7/AYaNq0kvH8TTVtXU/mXn+Oq20144zLq31iKI9rY5ZcthBBnG+kJ6GIRazLJl82h9u+/JbDtHVxDL0F3Z/T0ZXW5sD2d9NmLQLEQSemPb/aPMAN1hFzZp1wZSQghOkJRgJoviBzajXfsDLwjIzR89BoAnuGX07h5FcH9n+CbvpBIjMeii7OfokBTKEpEM0jz2GOyc7QQZxspArqYiYJiP7YUpuJwYyrx3wFjmhBOGfjlDQi5clDcOT2yT4EQIv6YJujZBWTe8v8IuzKx+I+0/p9id2IG6o79LJ3e4jiKAp8V1/P08u0EQlEuG5HL3CuHkOSUj0QisUhm7GKOaAP17zyLa9g40qbdQ/Ona7EGKnv6snqEFABCiFjSFRtBZzb2SD31a/6EZ3QhaZNuwb99DSmXzMQ19BLq33kWh97x4UCaYVIXiKD14gnU4uup9Ud47OWtBEItG3eu/7SC7ScsTytEIpCyt4uFrSmkzVqEYXMTtSaRNb8fYVdWT1+WEELEjZDNR/pNPyVqT8abmoQj/1yCnnxc4/PwaM0nLUn8VY40hXl6+accKG+kX7aXb90wkqzk3r0cs+i8pmaNqG60OXawsonx5+f06tWzhIg16QnoBmFXNprVi4lC0JUrq+MIIUSMhVw5RC1urE4PAU8/DCxoVi9hV3aHHm8AL7+7jwPlLb0GByub+NOqnejymTDuZKY4yUhpu2P9qMEZUgCIhCM9AUIIIbpMc0SnrilMmteB2957vwAJajr9crxk+dz8c+shGgMR9h6sJ6zpMb1ui6lhD9cQcuVgC9eBqRNxxP9iEb2Jy27hh7eO4Z2PD1JWFWDapf0Ykv/1NogT4mwkRYAQQoh2NQajbN/fMlZ65KB0kl2d+5NRXhfkP/68mQZ/hJQkO//31jHkpbXdtEvBxNF0ECMpCzVQjeFIIWJLiVkMHVHjD/P4K9sorfTjsFu4ftJg/vbBAS49PyemBYCigOXw51T//Ql8M/4N/+aVmFGN5Gu/S1h1x+w84vSykh38y9ShKIqCfsLQICEShRQBQggh2vXKmr2s214BwOUjc7mzaHiHl/jVTZPn39xJg79lw8AGf4Q/rdzFD+ddgOW49RitejP+TSswQgG0qmLSr/8RdGMRoKoK728tp7TSD0A4ovP6+/u5c0YBg3Jjex2mCaQPwN7nXGqW/wYsVjLm3E/E4palk3tAy/AfeeJF4pI5AUIIIU4SNU32H2povX3gUCN6J8ZMa7pJebW/zbGyqia0EwbZR60e3COmEC79HFtWf4yk7l04QVUViivarh7UHIrSNzOp0z0fHWNiGvrRHzF1vQvOIYQQpydFgBBCiJPYVIW5Vw1FVUBV4OarhnRwF98WLpvKNeMGtDl2zbgBuGxt/+zY9Gb8m/5O+qwfojqSUJsOx+LyOywaNZhyUd82x0YOziAtyR7zcykKUFuCVrGPjBvuw9H/fPwfLcOmN8f8XEIIcToyHEgIIcRJTBNGnuPjoW+NByDda+/UXh+mCZNG5+NLcbJ93xFGDs5g5MD0k9qIqG6SCu9Bs3hwpZ9D2NL9Y+PP65fG9+eN5qPPDnNOXjIXDctqM2QpVkwT9KzzyLj1QcLOLJIm5YChE5H5AEKIHiBFgBBCiK+U7j3zb8TddguXnpvF5QU5p5x8GVE9YIJm8Zzxub4Om0Xh/P5pjBrowzDMLt3YUFds6I4sMCFiS+26EwkhxGlIESCEEKJLnS2rr+iyKYAQIoHInAAhhBBCCCESjBQBQgghhBBCJBgpAoQQQgghhEgwUgQIIYQQQgiRYKQIEEIIIYQQIsFIESCEEEIIIUSCkSJACCGEEEKIBHPW7ROgdmLb+rNVIsTYnkSMOxFjBok7Uc/fXRIlzuMlYsyQmHEnYsyQuHF3JcU0u3JvRCGEEEIIIURvI8OBhBBCCCGESDBSBAghhBBCCJFgpAgQQgghhBAiwUgRIIQQQgghRIKRIkAIIYQQQogEI0WAEEIIIYQQCUaKACGEEEIIIRKMFAFCCCGEEEIkGCkChBBCCCGESDBSBPQwv9/P9OnTKSsrA2D9+vVcd911FBYWsnTp0h6+uq7xxBNPUFRURFFREQ8//DCQGHE/9thjXHvttRQVFfHcc88BiRE3wEMPPcR9990HJEbM8+fPp6ioiJkzZzJz5ky2bduWEHH3hETLoZI/JX/Ge8ySP7uRKXrM1q1bzenTp5sFBQVmaWmpGQwGzYkTJ5oHDx40NU0zFyxYYK5du7anLzOm1q1bZ958881mOBw2I5GIedttt5lvvPFG3Me9YcMGc+7cuaamaWYwGDQnT55s7ty5M+7jNk3TXL9+vXnJJZeYP/rRjxLiNW4Yhjl+/HhT07TWY4kQd09ItBwq+VPyZ7zHLPmze0lPQA96+eWX+dnPfkZWVhYA27dvp3///vTt2xer1cp1113HqlWrevgqYyszM5P77rsPu92OzWZj0KBBFBcXx33cF198MX/84x+xWq3U1NSg6zqNjY1xH3d9fT1Lly7l7rvvBhLjNX7gwAEAFixYwIwZM3jhhRcSIu6ekGg5VPKn5M94j1nyZ/eSIqAH/fKXv+Siiy5qvV1VVUVmZmbr7aysLCorK3vi0rrMkCFDuOCCCwAoLi5m5cqVKIoS93ED2Gw2Hn/8cYqKihg3blxC/L5/+tOf8r3vfY/k5GQgMV7jjY2NjBs3jieffJI//OEPvPjii5SXl8d93D0h0XKo5E/Jn/Ees+TP7iVFQC9iGAaKorTeNk2zze14snfvXhYsWMCiRYvo27dvwsS9cOFCPvzwQyoqKiguLo7ruF955RVyc3MZN25c67FEeI2PHj2ahx9+GK/Xi8/nY86cOTz++ONxH3dvkAivL5D8KfmzRbzFDJI/u5u1py9AHJOTk0N1dXXr7erq6tZu7niyefNmFi5cyP33309RUREbN26M+7j3799PJBJh+PDhuFwuCgsLWbVqFRaLpfU+8Rb3m2++SXV1NTNnzqShoYHm5mYOHToU1zEDbNq0CU3TWv94m6ZJfn5+3L/Ge4NEyKGSPyV/HhVvMYPkz+4mPQG9yKhRo/jiiy8oKSlB13VWrFjBFVdc0dOXFVMVFRXce++9/OY3v6GoqAhIjLjLyspYvHgxkUiESCTC6tWrmTt3blzH/dxzz7FixQpef/11Fi5cyJQpU3jmmWfiOmaApqYmHn74YcLhMH6/n+XLl/P9738/7uPuDeI9l0j+lPwZzzGD5M/uJj0BvYjD4WDJkiV85zvfIRwOM3HiRKZNm9bTlxVTv//97wmHwyxZsqT12Ny5c+M+7okTJ7J9+3ZmzZqFxWKhsLCQoqIifD5fXMd9okR4jU+ePJlt27Yxa9YsDMPglltuYfTo0XEfd28Q768vyZ+SP+P9dy35s3sppmmaPX0RQgghhBBCiO4jw4GEEEIIIYRIMFIECCGEEEIIkWCkCBBCCCGEECLBSBEghBBCCCFEgpEiQAghhBBCiAQjRYAQQgghhBAJRooAEdcWLFhAbW3t177Phg0bmD59+mnPN2zYsHbbWr16NQ8++CAA8+fPZ9WqVZSVlTF69OjTtimEED1B8qcQ8U02CxNxbd26dTG5z9d15ZVXcuWVV3b5eYQQIlYkfwoR36QnQMStH//4xwDcfvvtbNy4kfnz53PdddcxY8YMXnvttZPuU1FRwZo1a5g7dy7XX389kyZN4tFHH+30eR999FFmz57NzJkzWbNmDQCvvvoqd911V0ziEkKIrib5U4j4Jz0BIm79+te/5tVXX+X555/npptuYtGiRRQWFlJZWcmNN95I//7929wnLS2NRYsWsWTJEgYMGEBlZSWTJ0/mtttu69R5+/TpwwMPPMCePXuYP38+K1eu7KIIhRCia0j+FCL+SREg4t7+/fsJh8MUFhYCkJ2dTWFhIR988EGbMaWKovD000+zdu1aVqxYwf79+zFNk2Aw2KnzzZs3D4ChQ4cyaNAgtmzZErtghBCiG0n+FCJ+yXAgEfcURUFRlDbHTNMkGo22Odbc3Mzs2bPZsWMH5513HosWLcJqtWKaZqfOp6rH3laGYWC1Sq0thDg7Sf4UIn5JESDimsViIT8/H6vVyttvvw1AZWUlb731FpdddlnrfaLRKCUlJfj9fr773e8yZcoUNmzYQCQSwTCMTp1z+fLlAOzYsYODBw8yatSo2AYlhBDdQPKnEPFNSmwR16ZNm8Ydd9zBU089xYMPPshvf/tbdF3n3nvv5dJLL229z/z583nssceYNGkS11xzDXa7naFDhzJ48GBKSkqw2+0dPmdpaSmzZs1CURQeeeQRUlNTuyg6IYToOpI/hYhvitnZvjohhBBCCCHEWU16AoTohGeeeYY33nij3f+78847mTFjRjdfkRBCnB0kfwrRu0hPgBBCCCGEEAlGJgYLIYQQQgiRYKQIEEIIIYQQIsFIESCEEEIIIUSCkSJACCGEEEKIBCNFgBBCCCGEEAnmfwEf1V+EKeAwDAAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 777.475x360 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with pandas df\n",
    "sns.relplot(\n",
    "    data=pandas_tips,\n",
    "    x=\"total_bill\", y=\"tip\", col=\"time\", col_order=[\"Lunch\", \"Dinner\"],\n",
    "    hue=\"smoker\", style=\"smoker\", size=\"size\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='total_bill', ylabel='tip'>"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4f0lEQVR4nO2de3wU5fX/P3tPdjebkBsESEADAVTuogaUi5KgxBCIAoGKWqWKl9JiLVDqr/bnCyva/kqlSu23+rWtV6AFEYsIilokFAUF1IKBcAmRkCy5bXazt+zO74/NDnuZ2Uv2Mrs75/16+ZLdnZnnnJnJc57nPM85R8IwDAOCIAhCdEiFFoAgCIIQBjIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQhEiRCy1AuLS3m+B0Bg5dyMnRorXVGCeJhEMsegLi0VUsegLi0VVoPaVSCfr103D+lnQGwOlkghoA93FiQCx6AuLRVSx6AuLRNVH1JBcQQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYiUmBoAo9GI22+/HY2NjQCA2tpaVFZWory8HOvXr49l0wRBEEQQYmYAjh49ikWLFuHs2bMAAIvFgjVr1mDjxo3YuXMnvvnmG3z66aexap4gCCLxkAAGsx0NehMMlh5AIqw4MTMAmzdvxpNPPon8/HwAwLFjxzBkyBAUFhZCLpejsrISu3btilXzBEEQiYUEON7QiVUba/HrVw5i1Yv7cbyhU1AjEDMD8PTTT+Paa69lP7e0tCAvL4/9nJ+fj+bm5lg1TxAEkVAYuu3YsPkIrHYHAMBqd2DD5iMwdNsFkylu9QCcTickksumjmEYr8+hsvJPtWhpN3P+tqh8BBbPGgkAyMvL6JugSYZY9ATEo6tY9ATEo2teXgYuntKznb8bq92BbrsDxUXZgsgVNwMwYMAA6PV69rNer2fdQ+Hw3EOTAxZX0Ou7kJeXAb2+q09yJhNi0RMQj65i0RMQj65uPdUqOVQKmZcRUClkUCtkMb0PUqkEOTla7t9i1qoPY8eOxZkzZ3Du3Dk4HA689957mDp1aryaJwiCEBRduhzLF4yDSiED4Or8ly8YB51aIZhMcZsBqFQqrFu3Dj/+8Y9htVoxbdo03HrrrfFqniAIQlgYYFRRJp59eDI6TDZkaZSuzl/AapExNwB79+5l/11aWop333031k0SBEEkJgygS1dAl65gPwsJRQITBEGIFDIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQRDgkWFWvSIhbMjiCIIikp7eql7uwizuj56iiTMHz+vQFmgEQBEGESCJW9YqEpJsBBKoINmfKUMy96co4S0QQhFjoMNo4q3p1mGyXM3wmEUlnAIJVBCMIgogVWRkqzqpeWRqlgFL1HXIBEQRBhEgiVvWKhKSbARAEQQhGAlb1igQyAARBEOGQYFW9IoFcQARBECKFDABBEIRIIQNAEAQhUsgAEARBiBQyAARBECKFDABBEIRIIQNAEAQhUsgAEARBiBQyAARBECKFDABBEIRIEcQAbN++HRUVFaioqMCzzz4rhAgEQRCiJ+4GwGw24+mnn8Zrr72G7du349ChQ6itrY23GARBEKIn7gbA4XDA6XTCbDajp6cHPT09UKlU8RaDIAhC9MQ9G6hWq8VPfvIT3HbbbUhPT8ekSZMwYcKEeItBEAQheiQMw8Q1memJEyewevVqvPLKK8jIyMDjjz+OMWPGYOnSpfEUgyAIQvTEfQbw2WefobS0FDk5OQCA6upqvPnmmyEbgNZWY9CSkHl5GdDruyKWNdERi56AeHQVi56AeHQVWk+pVIKcHC33b3GWBSNHjkRtbS26u7vBMAz27t2L0aNHx1sMgiAI0RP3GcCNN96I//73v6iuroZCocDo0aPxwAMPxFsMgiAI0SNIScgHHniAOn2CEAsSwNBtR4fRhqwMFXTp8qQuo5hKUE1ggiBihwQ43tCJDZuPwGp3QKWQYfmCcRhVlCm0ZAQoFQRBEDHE0G1nO38AsNod2LD5CAzddoElI4AknAGs/FMtWtrNnL/NmTIUc2+6Ms4SEQTBR4fRxnb+bqx2BzpMNoEkIjxJOgPw3EOTg24DJQgiMcjKUEGlkHkZAZVChiyNUkCpCDfkAiIIImbo0uVYvmAcVAoZALBrADq1QmDJCCAJZwAEQSQRDDCqKBPPPjwZHSYbsjRKV+dPk/iEgAwAQRCxhQF06Qro0hXsZyIxIBcQQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQYgFCWAw29GgN8Fg6QEkQguUpKTQfaRIYIIQA4Hy8lNkbuik2H2kGQBBiADKyx8dUu0+Jt0MIFA9gJLCTKz+wcQ4S0QQiU+gvPxsjh4iKKl2H5POAFA9AKJPCFWXNkHq4VJe/uiQavcx6QwAQYSNUH7bBPIXu/Py+8pCqZnDI9Xuo4RhmKQSu7XVGHQGkJeXAb2+K04SCYdY9AQi09VgtmPVxlq/UduzD0+O6bS9L+3G9Jm6ZyMJkpc/ad/fMO+j0HpKpRLk5Gg5f0u6GQCtARDhIpTfNuH8xZSXPzqk0H1MOgNAawBEuAjlt001fzGRetA2UCLlEaouLdXDJRKdpJsBEETYCFWXlurhEgkOGQBCHAjlt00hfzGRepALiCAIQqQIYgD27t2L6upq3HbbbVi7dq0QIhBEapBCicmI+BN3F9D58+fx5JNPYsuWLcjJycE999yDTz/9FNOmTYu3KASR3CRQoBmRnMR9BrBnzx7Mnj0bAwYMgEKhwPr16zF27Nh4i0EQwhGlUXuqJSYj4k/cZwDnzp2DQqHAsmXL0NTUhOnTp+OnP/1pyOcHCgRbVD4Ci2eNBOCKvhMDYtETiL2uTieDpksmtBnMyNaloyBXA6k0uj4Vp5PBga+bsP6tL9lR+4pFE1A6uoBtK1Q9L57ScwaaddsdKC7KjqrcsUIs72+i6hl3A+BwOHDo0CG89tprUKvVeOihh7Bt2zZUV1eHdH6wQDC9vkvw0Ot4IRY9gTjoGid3isFsZzt/wNVhr3/rSwzo50oPEY6eapWcM9BMrZAlxXshlvdXaD0TKhVEbm4uSktLkZ3tGqHMnDkTx44dC9kABJoBzJkyFHNvujJqshLigc+dEu18QdFMD5FqicmI+BN3AzBjxgysWrUKBoMBGo0G+/btwy233BJvMQjCi3jl7YlqeggKNCMiJO4GYOzYsVi6dCkWL14Mu92OKVOm4I477gj5fMoFRPSZALn545W3J+qjdgo0IyKA0kEnMWLRE4iCrsF8/PHcUhkgnXBeXgb0l7oSoohMrBHL+yu0ngm1BkAQQhDUxx9Pd0qAUbvTydDefiJuJJ0BoEVgoi+E5ONPAHdK0yVTXBajCQKgXECESHD7+D1JxNz8bQYzr6EiiGiTdDMAWgQm+kKybJnM1qVTERkibiSdASCIPpEkWyYLcjVJYaiI1IAMACEeEsDHHwypVJIUhopIDcgAEESiEQ9DJQGMlh4YzD2wWHuQm5mWsttNCX5CNgCdnZ2QyWTQarn3kxJhECAgiUhQYvnMPK5tYyRQShHb90EC1F/oQqPeiLf31NF2UxET1ACcPn0aP//5z3H8+HFIJBKMHz8ezz33HAYOHBgP+VIPyuGefMTymXFc+9H5Y3H10CzAGQ3h/TF021F/wYBtn5yi7aYiJ6gB+MUvfoH58+fjjjvuAMMw2LRpE375y1/i1VdfjYd8fgSKAygpzMTqH0yMs0ThEa+kYylLKCPxaI3We69jtDoCP7MI2uN6H17YchRr7p2Ewlz15esEayMMGTqMNjgZhnO76aUuq7BrDuHcSyFm0ik2ew9qAMxmM2pqatjPS5YswebNm2MqVF+51GkRWoSgxCvpWEoSykg8WqN1j+tUTSvmf2ZqRUTt8b0Px8+2IVOtYA1MNNNYZGWoIJVIOLebnjrfCavVIcyMNBw9hJhJp+DsPWgg2JVXXokvv/yS/VxXV4fBgwfHVKi+cuPoAqFFCEqyBCQlIqFUwIpWlSzf6/A9s0jb43sfnE6wwV/B2ghXBl26HMUDdagpK2HbVilkWDxrBD784pxgVcXC0UOIamipWIEtqAG4cOEClixZgurqaixYsADV1dU4ceIEKisrUVlZGQ8ZQ+bd/WeFFiEo7oAkzz88dp83EZBAs6dwjgm3rb2HGrDQp7N0P7NI29Oly/Ho/LFe115YVoJ9RxrZQUGwNsKWgQGKB2Zg4og8/PKHk/Dj+eOwYOZw7PjsNC51WASLPA5Hj2g951jJlywEdQE9/vjj8ZAjZJI+EjhJApISkVBSNkcrrbPndS51WLCz9gzmTR+GYYMzkatTsc8s4vYY4OqhWVhz7yQcP9sGpxPYc/Ac7rp1VMht9EkGBtCq5HAywNOvfpEQkcfh6BGv9N1CtxlreGcA9fX1AACNRsP533XXXYfrrrsuboKmFL37vItyNWwmSiI4ocyeojXD8r1Ol8mOwnwtrhyg9XpmUWnPCRTmqnHDVf0xbkQuVt010cuvHKyNSGQQbEYqAb5vMaJBb4LB0gNIwpNFCLlTcfbOWw/gwQcfxJ///GdcddVVKCgogOdhZrMZBw4ciJuQnlA9gMuIRU/AQ9cAufRZQjkmFKRAq8GGVoMFObo05OiU3Fszo9UeAjzTYG1EIkMU5Q+1Pd7FVIQhS7zl7mObQv+dBqoHwGsAOjo6ALh2/bz22mtgGAYSiQR2ux133XUXPvjgg5gJHAgyAJcRi56AALoKtOND8Gcah22OBrMdqzbW+rlSUnUrtNDPtE8FYX72s59h//79kEgkKC0tZb+XyWSYNWtW9KUMEaoHQMQDUcZrxMno0VboxIHXALzyyisAXIFgzzzzTNwEIohEQIydVLyMXioupiYrQbeBUudPiJGEjdeQuFwonoun0SJe2xxTcTE1WUm6bKBJvw2USAoSsoBMjF00cRuZ926Ffv6x6bjYaqSt0AKSdAaAIOJCAsZrxNpFE1ejxwCD8rVQShj2MxF/ks4A0CJwgiJ0kqxot++x3U+TpoDRbAckEkGTf8V8XSIBjR4RW5LOAJALKAEROklWtNvnuN7CshI2Qleo5F9xcdEkQdU0InoEXQQmiGDELEmWx4Ln9y1G3gXPaLfPdb1Ne+pw07jBsUn+FaKetHhKRJukmwEEcgGlKWXY+Ni0OEtExMQ1EcaoPtrt810PkhhsBQ1n9kIuGiLKJJ0BIBdQ4hGWayJEX33ABU+1wusa2ZlpUXWN8OkDJvoul7AXdslFQ0QRQQ3As88+i/b2dqxbty7kc5K9IlgqEvLukSiM6i8ZrPi+tRvnm4348Itz6DLZsXzBODy+eAJ+9+aXUdm9wqWPew0g2rtixBhwRiQOghmAAwcOYNu2bZg+fXpY54l6BuAzes6J9X0IdWeNh2vCaLFDpZSj02hDU7sFGWoFtCrX6NlztJublYabJxbhUmc3WgxpsFh7kKW93AbXKLwgRw2b3YlzF7sglUhQPX0Ytn5yih0xB3SNhLNLyMfVkqFWwmp3oKh/BnIz07jP8Uwcl5mGnAyexHE+pERUrNA7wIg+I4gB6OjowPr167Fs2TKcOHEirHNFOwPgGD2vWDQBJYMyYvPHFu7OGgbQqRX4/lI3Nmz+gj2npqwEg/O0KB6YwY52c7PSMHvyFdhz8BzKrh+CJ//nP35t+I7CC3LUuGPGcDy/6Suva1dMvgJ/23kclwxWyKUS3nq5Ye8ScrtaQin5KAWO1rfhpa1fs8csqx6NscXZQY1AQgachYPQO8CIiBBkF9CvfvUrrFixAjqdLqrXHVnUL6rXSyS4fMXr3/oyZuXo+rKzhuuct/fUof6CAYZuOzvavXliEburZtOeOu42PEbhv156PZYvHI+/bP/G79r52WpXLdvGTvz6lYNY9eJ+HG/o9NpJE8kuoVDObTXY2M7ffcxLW79GqyGEFAo+ej7/2PSk6jxTsUyimIj7DGDLli0oKChAaWkptm7dGtVrv7v/LH5UPRaAKwVrKnHxlJ7TV9xtd6C4KDsh2uM7x8kw6LY7cPUVuVixaALONnV67aoJ1EZe7/eH/nuR81irzYGashL8q/YM+92GzUfw/GPTMShf22ddwrkPJy9e4Dym3WjFyCtyAl7fTV7wQxKSSN/LVPs75SNR9Yy7Adi5cyf0ej2qqqrQ2dmJ7u5u/OY3v8GaNWsivvacKUOh13cJnn87FqhVck5fsVohi4mufWmP7xypRAK1QobWViNKBmUgR6fCp182YuiAjJDbGJCj4Ty2IFeDN3efwKUOC/u91e7AxVYjm2YgbF08fNoatQIFOWo0tXbznpvN48fvp1WF/WyS7d2N5L1MNl37itB69qkgTDzYunUrPv/887B2AYm2IEyirwFIAKOlB2cudmHjP45xrgGw58mAIyfbsOWjOpRdP4R1AwVqIydHi8+OfO8nT2F/LX7+x88CFxcJRxeOY5dVj8bmD+vQ1Nod9TUAX5Lu3Y1gDSDpdO0jQutJBiBV8ClHd8XgfmhtNcatPd6FSY9OIEOjwOzJV2BgnhbadAV06XJo07wXZVuNNjzx5wNeu4GkUmBcSR7ydSrONvLyMqC/1OUvD0LsgELUha9a1doHS2Gy2PnPDbV8ZBDi8u7GMG9SOMFpKft36oPQevapIlg8qK6uRnV1dVjnBNoFlK1T4XcPT4mGaImJTxCQVBrFZPAhtMf3R+25EGjtcODvO4+zo3Ctyr9zaTVY2A72UocFmz+qAwAMLdAhP0MVtjwhRcfy6eLTGbZ2WTl92iaLHUW5Gv774ARytErkaJXs54QkFrt2KDgtaUm6SOBA3Di6QGgRREm4wUw5PJG7OTqePfbB6GsHxNEZPrZ4QvLvyw+AKEtdEryklAEg+kiELgG+oC1NmgIX2rqRnqaA2eaAxdqD3Mw05GQqserua1HX0A4nA+z7qhELZpZcdptwycMh7yWDFekqOXRaBRx2p/fxkuCBWVyBaY0tXVi5ZCJe3v6Nl88/afblB6HDJPLIYwpa8yKlDMCJhnahRUg+ouAS4AraWjCzBL9/60tU3nQlrDbXnn3P3zwXTB+ZPxbXXJEFOPjlycnW8sr7o6pr8M+PT3p12JAw2LDpaMBFWd/ANM/F6PvnXA2TxY7hhf0wsF9aynQSmjQF5wxHkyaOzp+C1rxJqXTQqRwI1idCqB8blUAej2CmtQ/egJ/UjEdzWzfuum0UDCY72/kDwE3jBvsFTb245SgMRntAeZoumXh//8v2b3DTuMFex59vNgUNzPINTPM8/pV3v4XV5mTTWPTl3iYiVlsPFpaVeKWUXlhWAqu9R2DJYg8FrfmTUjOAd/efpYpgbkIc7UQtGRlHKoiashI4GZ9gL57gL3d7fPK0dZkxQJfG68Lw7IDdwWe+x7QaLK5F2l43gNFsx6Pzx+J7vZHzmoX9tdyunyQeSWrVSuw5eA5VU4td94wB9hw8hwnDUzSFigeUeM+fpDMAok4GFwahLvZFMxmZb5tOBpBKJJzX52uPT57sjHRAwkDCcz3PjtcdfOYJu8js03kX5Kjx6Pxx2PZJvd81B+Wow09VneAdiS5djrtuHZW8uYciICUS70WZpDMAot4GGgahjnZCSkYW4sKZ0Wy/PLIE8NV3zZgxsRA1ZSWsG2jfV4340dxr8Jd3vmHbe3T+WOg0CsDJL0//bDVOnW/H2aZO3D/napeLpvf3B+aNxj96t5J6rgG4/9jdawAqpQxGS49X593U2o0XthzBo/PH4oUtR0PqFJN6JCniojJJn3gvBiSdAQgEb6peERLyaCdYhxDI3YHLhiE7Mw3tJhu2/7uePW5hWQn2fdWI++ZcgycKr4O+3Yz2Lgv+8/UFNieQ0wm8+cEJr1q7vqmYbT0O1H59ga0BoJBJsWLRBDS2dMFmd6J/dhpW3TXRW34JsHZZKZrbzGhsMeKND06gy2THQ3eMQYZGAWvH5fvS1NqNfhnKkDtFwUeSke5kEeu+fREbPz5SygDQIvBlwhrtBOgQ+Nwdv310Cs63mNjfaspGYNsnp7yO27SnDmvunYQcrRINvccCwIJbSrD+rS+9OlAvF0rveoK+w4Jjpy6xswe3UdlZewbr3/oSVVOLsf3f9Zh8TX9/+RlAIZXgjx6yA8Cf/nkM86YPw9t7vmO/Uylk0KYpQu4Uue7tsuoxcACubRV9DQKTAN+3GHHxkom/Yw/RINMWRx7Eavx4SCkD8K8D52gR2E1fRzs+o0uj2c7p7jCYe3z8/QzncRab61h32cYMjQL9s9VBXSiGbjvqLxg4jUrV1GJs/qgOUin65Kop7K/1cg+F7QZw39tHJqNRb0JjiwlvfHAcXSZ7n3MAhbqwHKpBTqaFaUI4UsoAKOQptas1csId7fR2Qq/vOo6bxg2GVAqMHJKNUUOyoO+0oGLyFcjPVoNhGNh7HFgwcziydWnI1KqgkEmguf0q7PjsNJuZU6WQocfhxNFTegwb3A+PLR4PQIKLrSYvF0puVhpmThqCHgcDg6UHOrUc3TYHBuVpUDWtGHsPNQAAbp5YBEiAoQUZGDUkCxNG5MNg8q885obPVTMoRx25G4AB7D0M/rj5qNf1X9r6NdY+WOq128g3oI1rlB7qwjKfUfM1yHFdmPbVUy2HwRTiTIQCswQlpQwAERmGbjte33XcL0Pnw3eOgcPhRKfRhtffP47Km66ExebA5g9PssfUlJUgTSnDHdOH4Z+fnEKXyY6lVdfgvX31mHR1AZ565SB77OJZI3BPxSj87V/HkaFRoGLyFV5uHt/sm/dUjIK9x4k3P/iOPebB6tFY/9aX7DFcWUd1ajmWVY/2y9J5ecE5MjeAZ04jN+x20wwl54heKZf61S4eVZQZ8sIyn1GzWHuEWZjm2FXlG+jHNxNxOpmk3U6bKqTUkFmdRvYsEjqMNs4qXRv/cQxpSgXe7q3iZTDZ2V087mPe3lOHTpMdnb1F2pcvHIetH5/E6GH5ftd784PvYLb2oGpqMX54+9VegWLuoC3PwC6Dyc52/u7v/uxzjLvymNHSwwZotRps2P2fs6iaWowFM0tcrqMP62AweQT+RBDQ5c5p5Il7uynfiL7+goEzEMndsftey3dh2b3+4BnItXzBOOTyyBLrhWlfPbkC/fiCrZoumTjvkZgDs+JNSvWYbQar0CIkNVkZKkil3IFaZlsPG3DF5+93B1/Zepw439zlKqLCE/hltTux+aM61JSVBA3sksskIQV/qZRSnG7qwp/+ebkegXvR2LNgDDsqjjCgKydDyTnDyNEp0eARicx1jzy/6zDZUJSnCW3Rnm9tBxBki6PfzCVIoJ8nbQZz8m6nTRGSzgBQIFjs0KXLMWpoNqeLIV0pZ0eYfMFd7uCr3AwVFL3Xcf/GFbilUshwxcDMgIFduVlpKOzPXTnMN/irfz8NWzQe8F80dh/nHhVHHNDlBMYWZ2Ptg6V+dQD4XDVcAWpZGiXbsT//2HRcbDUGXpuIJC12lOHTM5Qtstm6dArMEpikcwGt/FMt7lu3l/O/dW8cFlq85EMCGK09uNBhwemLRmRqlXhs0Xg/F4Na5fKz7/uqETqNAotnjfQ6pqasBJkaBYoH6qBTK1CYp8aj88fi3IUOVxCWx7FLq65BepoM86YPg9Fs87vWsurR2HekEQAwc9IQ/PW9b/3y1zx05xj2GHf7Ep7Rp7T3LfcaFSNwQFfI9NYBKBmocy389u7+4XPVFA/U+X3nlgcMMChfi6JcDbsdNix6DUOfz+8Dvnru+6rR73l76ehBQa6G8x5xHUvEBkErgvWF+9fu5o0EnjNlKObedKXgFXjiRcR6SoD6C11o1Bu9FmGXLxiHwnwN2rqsXi4Gl3+9Bw6HA9p0Jax2B0yWHqQpZZDLpdAoZd7Vv2TA0VNt2PxhHburqKSoH/Kz03Cp3QJNmgLdVjuUSjnkUglM5t6KWxoFDCZXumeHk8Ezf/uCTdfszl9TUpSF3EwVLnVakaaSQ5cuh5MBVr24329EyVfNy2Dp4Tw+ajtnuCplIXD1rKR8d3317H1+wWYivFXekqpHCo7QzzRhS0L2BVGXhPQhJ0eLM43tfd5CZzDb8Z//tnjttQei1wl6ln70vPb/feAG3tKPfjLydNLzpg/DDVfle8vYhzrGibYLRSzvLiAeXYXWM2FLQvaFQLmAZFIJ/rJyRpwlEggJcODrJjaiti+dV4fRxrvAGo2FOL5tkhf0JrR2WEKSVZcux0N3jOFc2B05JMtbxj4EvxXma/B/7r8eFlsPcjNUsRmB0l53IkFJOgNAi8AuDN12r3QKXguYagV/hyMF2o12mCx22BxODC/K4lyIS1PJcbrZiHSVnDPIyg+fKl0ZagXy+nEv8qmUMmzYdMTlmjHbA3eKDHDlQB3mTR/m2kHDADtrz6DLZOdeLAw1+I1n9K/rTakQNRJwliEKyOiGRNIZAMoG6oJvAdNosffm5OfocCTA8YYOtLSZWZ//qCFZeGDeaPzPtstbGZdWXYNzTZ3409ZveIOsvODo5NznLF841qsy19Kqa/DOpy6X0+HvWvD2nrqgnaJWJcPQAh3Wv/UlMjQKzJw0BIX9tYBEwq4JhEu8Ujonc+ropIWMbsgk3S6gQIgpGyhf4JBKIecNrmk12HC+2eQVeDV6WD7+8VGdV7DU1o9PQqtWsee7g6z4AnS4Ojn3Of2z1Vhz7yQ2OGz3wbM4eb4TKoUMzt4dM0EDgBigdHQBfvvoFNx16yhs++QUfvv6Yax6cT+ON3T2qRpXVHYAJVA7xGWo8lfopJQBqDvfKbQIcUOXLseKRRP8ttCZLNzJ2zpMrgLpfkFcElc65M0f1WHzh3XY/JErBYO7BKP7fCfD8HZafJ2ck2HQ2mnFH7ccQYfRig2bjrCd//1zrsbeww1+MvIhlUrgdDLsWoD7nL7+YYcaeRsp8WqHuAwZ3dBJOhdQIEoKo+y/TWR6R8UD+nkveBrMPazf3b11Uip1FQPXpCtwtqkrpMAdm93p9VkqkfB2WoGCntRpcnSZ7NhZe4YtFiOVSGC29uBSh8VPxkAunWgWYolXcRAqQhJ/BK/XkETQNtAkhlNPj4yevkndHl88AXanE82t3dh98BxuGjcYmnQZMjPS8Kd/XN5l45uMjXcNoHehzWzrgb7Tihc9KmrVlJUgNzMNQwfq0HDR6NUBLp41Agq5FDv2nfaTkc9Xm5eXgfrz7Xj2tUOuHEC9bp99XzVi1V0T++ZP59qnH4u/hjDaEcu7C8RQ1wRbAxD6maZUHAAFgl2GV08J0NrlvQffnXJ5eGEmMjRKtHaa8Ye3LmdwXFY9BgzD+AViuYOsAFeuJc+0xp6GJk0pg8XmQKZGhTSVDC1tLiOz6q6J0KkVuNDuKu4CBqzr54e3X40Nm474jdS4Fkjz8jKgb+3C0fo2v9w7AfPvJ9luELG8u0CMdY2XcQ8BoZ9pSsUBBOKzr5uoIAwAMIDJo5BLblYaZk++wmukXVNWwpZGbGrtxm/++sXljtfpsZWSZzRVmK/Bhs1HUDW12JVvZ5or06YvbveMNk2O7Z96F14/39wVlkvHYLL7ZZp8aevX/DtqEmwkSMQRqvwVEim1CNxt6RFahITBc/Hx5olFfimZ395T50qt0AvfIhnfjgqjxYEMjcIr+2OgxU6u3DijPBLGcZ3jS7iLewm1GySCtNMEESsEmQG88MILeP/99wEA06ZNw8qVK6Ny3aSrBxDIPRHMdSFxjaC/b+mCSimDWiWHxdYDbboSOrUcYBgsXzgO55uNIaVTdgd/ndEboU1TwGSxQ5OmgM3egzX3TkKbwQqVUoptn5xCe5cVHV0W/GDWKGTrlFi7rBQ2mwNX/3ASWjssSFPJoG83Y2CO5vLUuzdK97ePToHB3AOLtQeZGiUeXzzBr0CKV86cXv3teiMUCllYi3tGs51deAaAvYcacKnD4pUO2u0mcOclkstkoQW+hfksQ5qJuGsCt5qgSVPAauuBVq1MeLcVkbzEvcesra3FZ599hm3btkEikWDp0qXYs2cPysrKQjo/ZSKBgxT3Dthh8AReqZQy7Nh3GvNnlmCLxyLu6nuuDZia2H3+me872WpeC8tK8MW3Tbh50hC8vP0btp1H7hwDg8mG5zd5LuqORJpSiv/d8V8vebjgqlv720en+CWe49Kv9usLqCkr8Utcx+nflQDtRhu2/7veK4XEnoPnXAaD4x66fy+/fkjgwLdwnmWo5R4DyHPXraPIbUXEhLgvAp88eRImkwnjxo0DADz11FMYMmQI7rnnnpDOD7QIXFKYidU/mCj4oksoGMx2rNpYy7kAKpVKUPtNM1s8ZO+hBnSZ7GyHwXfuvOnD4HQy2P7veq8c+K4yfSPw0tbLO31+VHUNTBY7TJYedmG2y2Rnz1MpZFi+cJzfIm1N2QjO5HHzpg/D23u+8/vOM2FbIJ09ffh8x1VNLcbeww2YOWkIhg3ORK6OP3cP3zXW3DsJhblqGLr529j+73ruZHN9eJa6dAUa9Cb8+pWDfuf9eun1KMrVBNV5+7/rUzJyOBn+TqOB0Hom1CLw8OHD2X+fPXsW77//Pt56662oXLvufCfy8jIAgP1/onLxlJ7TLWN1OnGxpZvtZD2Tn3XbHSguyuY918kwl33yHu6dptZuKGQSVE0tRv8cNZpbu9FltuFv/zruL5jk8vUsNodfO8Gqgfl+55Y5kM6exwQ6DhLgUocFb+/5Dr95eLLXOb7wXUMulyIvNyNgG1yyByKYXjaGu4DOgBwt8vK0QXXmukepQqL/nUaLRNVTMKf5yZMn8eCDD2LlypUYOnRoVK6ZrVNBr+8S3OKGglol53bLMBJs/Id3tOumPXWYN30Y1AoZ9Pou/nMlrmhZlcK/WpZUKnHNDKYVs//nuob7PJVChjSlv889WDUw3+/cMgfS2fOYQMd5yuZ7Tqj3V6MMfA/dlcp8ZQ9EML2UUu5yjUopE/TeuOUJVZZkIhn+TqOB0HoGmgEIsgvo8OHDuPfee/Gzn/0M8+bNi9p1bxxdELVrxRq+ilF8qRwK+2td7g4JIJUAD90xxuvcmrIS6DQK7DvSiKVV1/hVy9r26SksnjUC+75qxMLeyl6+VbZqykqw93ADO+t4b189llZd43XM4HwNHpjr/d3iWSPRL0Ppdy13dTBWZ7Uca+6dhJqyEiyYWYKCHDVnBSiue+MpWyhVo/jur/s8rt8XlpVg35FGTtkjacszTfWvl16PZx+e7OfTDyQPVckiYkXc1wCampowb948rF+/HqWlpWGfn1KRwBzBKgZzgCpVagW7UJihUWD25CtQkKtBmlIGhVwKm92JuoYOHD5xERNHDkD/bDW6La5dKRs2He0twO6ESilFfj81urqtGJibgW6LHTqtCjIZ0GWyQ5uuQLelB+o0ORjGCYlEik6jDdp0BV5//7/Qd1rYjJyD8tSw9zDoNFqhVSthMNmQrpIjU63wrg7Gscj56PyxuHpoFncQl8+9SU9XoulSkFq5Idxfrkhmz11AMpkMunS5t+x9fJZhL9pKAJtTgoutRtcuIHsPtGmKlE0bkTR/pxEitJ4JFQm8du1a/POf/0RR0eU96DU1NVi0aFFI56fKIjAvAXaUBFq43PxRHX5+10TkZ6Xj8HctcDovR9zOnnwFCvO1rjz8m4/gUoeFPT83Kw133TrKq+AKb7BUbydntNihUshdZRa1oUXXhroAzEdSP9MwEIuegHh0FVrPhFoEfuKJJ/DEE0/E5NqXOi3BD0p0AlS14guESk+ToaZsBKw2Byx2Bz78ooFNtMYVAfyv2jOsEZg5aQg27fnOa7/867uOc+fXYQCdWtFbb+CLsKJro5nIjSCI6JBkkVMigSeMnSvLYUGOGtp0Jd7e/Y1fJ88XAezesqlSyFA8SAelwjsh28KyEhgtdu50DH0scEIZGgki8SADkGhwlFZ0R6Xq1HI8Mn+sV9bNpXOuwXOvH0aGRoGqicW92wadWDZ3NCw2B2ckbPHgTKy++1rk6FwFdH73hndpyU176rD2Qe71mUsGq1eOoZsnFgESwGjpCeirDpoWOZmStiWTrAQRgJQyAEmXC6i343SnRsjNTIO+3eyVGoFNxTwoA8fPdaLdYL7cqTPA+ZYudkHYcxT/o7nXQCmXckbC1jd2sjOAn9aM53TNmCx25Gj9R+fpvdsVfdvc/mkQV1Cggu3JlLQtmWQliCAkXTrolNkFJAHqL3ShUW/0Sm2weNZI7PisnvXRuyNqJ47IwxN/PuDax++RVXPlXRPR0GwMOTp3xaIJePndr9nr80X28rl0jDYHDp9ogdXuDOu8QPehtcvmWrhmLs9SfK+VKM800sXsYCSKnvFALLoKrWdCLQJHSqCi8ADwv6tvjqM0fcfQbUf9BYNXJ2q1O/DmBye80ji4o1JbDRZY7Q589V0z7p9zNV5591tY7Q7oO7rRP1sdcnRuY0uX1y6gD784h4fuGOO3C4jPnaNVyTA4T4suM3/pyZA7Qp78Nzt7F6kTcYGYFrOJVCLpDEAgkqkkZIfRxptWwS9Lp1KGHF0aCnLUKB09ENs+OYWashL0z1ZDJpcCjGsxuKm12+s8rujcHod3r95lsuPKggxu1wwXDFA8MAOtXbaIF3W5FpQ37alj898k4gIxLWYTqURK1QMYWdRPaBFCJitDxaZV8IQrS2f/7HTkZCqxrHoMNu2pg93hBAPg+U1H8Mxfv8Dv3/wSd8wYjoIcNXve4lkjkKlR+ETs+n+3fME4aNPk0KUrUJSrcY1igzkFGSAnQxk4+jUE+EbT0t7UCYkY/Ro06pcgkoiUmgEkE7p0OYoH6vzSGz+6YCzaOi1YMLMEYIB/1Z5hM4EyvTOGqonFfts7/7L9G6y5dxLau6wYkKNGT48DOrWSHdm7I0s9v4uoVF6gRd0Q4RtNTxyRj5wMZWIuqkZB75hBu5OIMEkpA5BUJSF7XSn9s9MxvKifaxeQTgWj2Y7/98aXfod3mGzI0vZW+fKowuXGanfg2zNt2PxhnVeaYQAevunLbgqdWgFDtx0NLaa+dxYRlt3j2xqasJ2/m0QsN0i7k4g+kHQGIGUKwgAAA2hVcmhVHo+BJ9tmlkbJdpjnW4wBM0cG9UcnSmeRyKPpJKOvAXqEuEk6AxBoF9CcKUOjMwOIdCodQjlHvt+DBUyNKspEYb4GA3LUXjt33Hv8A+3gcWO09OB8ixFV04oBuLZeRr2zCPUeJuJoOgmh3UlEX0g6AxBzIh0dBzs/2O/BRsW9s4YxV/TD849N98ocOWH4xOAjaAlwuqmLs+BM1DqLRJlhiAjanUT0BQoE8yHSQJ9g50czkKgvegYqJxlqCcS+thHJDEPoYJp40Wc9k9Do0jOND6IJBJNJJfjLyhkRXT/SqXSw84WeqvO1zxaciUJnIbSOooTWU4g+kHQGINaLwJFOpYOdL/RUna/9QTnqqHUWQusoWmg9hQiTlAoEiwaRBvr0pRRhPAOJ4tG+0DoSBBEatAbARaTl/cIoRRjJVD0Sf3E02o9nG0L7UeOFWPQExKOr0Hqm1BpAXIh0Kh3sfKGn6vFoX2gdCYIICrmACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRIogBmDHjh2YPXs2ysvL8cYbbwghAkEQhOiJeyqI5uZmrF+/Hlu3boVSqURNTQ2uv/56DBs2LN6iEARBiJq4zwBqa2txww03ICsrC2q1GrNmzcKuXbviLQZBEIToibsBaGlpQV5eHvs5Pz8fzc3N8RaDIAhC9MTdBeR0OiGRSNjPDMN4fQ4GX1pTX/LyMsKWLRkRi56AeHQVi56AeHRNVD3jbgAGDBiAQ4cOsZ/1ej3y8/NDPj8u9QCSBLHoCYhHV7HoCYhHV6H1DFQPIO4uoMmTJ+PAgQNoa2uD2WzG7t27MXXq1HiLQRAEIXriPgPo378/VqxYgbvvvht2ux133nknxowZE28xCIIgRI8gFcEqKytRWVkpRNMEQRBELxQJTBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUQbaBJgvv7DuNuTddGbPz171xGKt/MBHr3jiMkUX9vI59Z99pfPZ1E24cXeB3jXVvHAYATBw1AB/85yzaDVZUThnKHu/ms6+b8LuHp+Dxjftx4+gCnGhox8iifgCA3V+cBwAU9dfiUqcFuZlpqDvfiTlThmLuTVfi8Y37kZuZBgAYWdQPJxra2X+7r/vw7z9F+aRC9lrlkwrxrwPnAADFg3TssTeOLsBnXzcBAPvvbksPyicVYsf+s36yu/V16+nW9fDxi6ysDc1GFPXXsjLNvelKr/v4zr7TAMDq7JbZ9xj3eb56rv7BRL9ruHHL5/7ds213O77vgSee8nk+23VvHGb1dL8Xq38w0e9aXOd7fn+iod3vPK5ruGX2lMn3euHgK6/vNbiu6Sl3sGOD6RPOb9H42w52n4OdH0n70YIMQADe3X82oocU7Py6853s/+vOd3od++7+s7zX8DyP63hf2gxW9nvPczw/txmsXu21Gazsd57neP7bYnN4tef5b7dOvt9z/dv3/259+dr1lct97zzvo68svnK5j3Gfx3Vv+K7hls9TXk99ffF9Jp7yeT5b32twXc8ts+/5vt8HwlNvdxue98PzmHDgun++77TvNT3lDnZsMH3C+S0af9uREGn70YJcQARBECKFDABBEIRISToXkFQaWuroUI8LRH6/9IiuE+x89+/5/dIBeMvs/s73e9/fguF5/Wie05frhtM+0Hc9+WTjOiaUY4PJ53tssOcV6jPnen/43plA75IvXHr7tteXd9/3nGCffWUJdmyo7YbyWzT+toG+90eRth8OgdqRMAwTOLcyQRAEkZKQC4ggCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkpJQB2LFjB2bPno3y8nK88cYbQosTdYxGI26//XY0NjYCAGpra1FZWYny8nKsX79eYOmixwsvvICKigpUVFTgueeeA5C6uj7//POYPXs2Kioq8OqrrwJIXV0B4Nlnn8Xq1asBpKaeS5YsQUVFBaqqqlBVVYWjR48mtp5MinDx4kVmxowZTHt7O2MymZjKykrm5MmTQosVNY4cOcLcfvvtzNVXX82cP3+eMZvNzLRp05iGhgbGbrcz9913H/PJJ58ILWbE7N+/n1m4cCFjtVoZm83G3H333cyOHTtSUteDBw8yNTU1jN1uZ8xmMzNjxgzm+PHjKakrwzBMbW0tc/311zOrVq1KyffX6XQyN954I2O329nvEl3PlJkB1NbW4oYbbkBWVhbUajVmzZqFXbt2CS1W1Ni8eTOefPJJ5OfnAwCOHTuGIUOGoLCwEHK5HJWVlSmhb15eHlavXg2lUgmFQoHi4mKcPXs2JXW97rrr8Pe//x1yuRytra1wOBwwGAwpqWtHRwfWr1+PZcuWAUjN9/f0aVeRnvvuuw9z5szB66+/nvB6powBaGlpQV5eHvs5Pz8fzc3NAkoUXZ5++mlce+217OdU1Xf48OEYN24cAODs2bN4//33IZFIUlJXAFAoFNiwYQMqKipQWlqass/1V7/6FVasWAGdTgcgNd9fg8GA0tJSvPjii/jrX/+Kt99+GxcuXEhoPVPGADidTkgkl9OeMgzj9TnVSHV9T548ifvuuw8rV65EYWFhSuu6fPlyHDhwAE1NTTh79mzK6bplyxYUFBSgtLSU/S4V39/x48fjueeeQ0ZGBrKzs3HnnXdiw4YNCa1n0tUD4GPAgAE4dOgQ+1mv17PuklRkwIAB0Ov17OdU0vfw4cNYvnw51qxZg4qKCnz++ecpqWt9fT1sNhtGjRqF9PR0lJeXY9euXZDJZOwxqaDrzp07odfrUVVVhc7OTnR3d+P7779POT0PHToEu93OGjqGYTBo0KCEfndTZgYwefJkHDhwAG1tbTCbzdi9ezemTp0qtFgxY+zYsThz5gzOnTsHh8OB9957LyX0bWpqwiOPPILf/e53qKioAJC6ujY2NuKJJ56AzWaDzWbDRx99hJqampTT9dVXX8V7772H7du3Y/ny5bj55pvx8ssvp5yeXV1deO6552C1WmE0GrFt2zY89thjCa1nyswA+vfvjxUrVuDuu++G3W7HnXfeiTFjxggtVsxQqVRYt24dfvzjH8NqtWLatGm49dZbhRYrYl555RVYrVasW7eO/a6mpiYldZ02bRqOHTuGuXPnQiaToby8HBUVFcjOzk45XX1Jxfd3xowZOHr0KObOnQun04nFixdj/PjxCa0nVQQjCIIQKSnjAiIIgiDCgwwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQIiS++67D21tbREfc/DgQdx+++1B2xsxYgTntT766COsXbsWgCuV8K5du9DY2Ijx48cHvSZBRErKBIIRRDjs378/KsdEyi233IJbbrkl5u0QBBc0AyBExy9+8QsAwD333IPPP/8cS5YsQWVlJebMmYN33nnH75impiZ8/PHHqKmpQXV1NaZPn44//OEPYbf7hz/8AfPmzUNVVRU+/vhjAMDWrVvx4IMPRkUvgggXmgEQouOZZ57B1q1b8be//Q0LFizAypUrUV5ejubmZsyfPx9DhgzxOqZfv35YuXIl1q1bh6FDh6K5uRkzZszA3XffHVa7gwcPxlNPPYW6ujosWbIE77//fow0JIjQIANAiJb6+npYrVaUl5cDcOWTKi8vx759+7x88BKJBC+99BI++eQTvPfee6ivrwfDMDCbzWG1t2jRIgBASUkJiouL8dVXX0VPGYLoA+QCIkSLRCLxy83OMAx6enq8vuvu7sa8efPw7bff4qqrrsLKlSshl8sRbhotqfTyn5vT6YRcTuMvQljIABCiRCaTYdCgQZDL5di9ezcAoLm5GR988AEmT57MHtPT04Nz587BaDTipz/9KW6++WYcPHgQNpsNTqczrDa3bdsGAPj222/R0NCAsWPHRlcpgggTGoIQouTWW2/Fvffei40bN2Lt2rX44x//CIfDgUceeQQ33HADe8ySJUvw/PPPY/r06bjtttugVCpRUlKCYcOG4dy5c1AqlSG3ef78ecydOxcSiQS///3vkZWVFSPtCCI0KB00QRCESKEZAEFEgZdffhk7duzg/O3+++/HnDlz4iwRQQSHZgAEQRAihRaBCYIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRMr/B0x5tiafPYALAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with Modin df\n",
    "sns.scatterplot(data=modin_tips, x=\"total_bill\", y=\"tip\")\n",
    "sns.rugplot(data=modin_tips, x=\"total_bill\", y=\"tip\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='total_bill', ylabel='tip'>"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4f0lEQVR4nO2de3wU5fX/P3tPdjebkBsESEADAVTuogaUi5KgxBCIAoGKWqWKl9JiLVDqr/bnCyva/kqlSu23+rWtV6AFEYsIilokFAUF1IKBcAmRkCy5bXazt+zO74/NDnuZ2Uv2Mrs75/16+ZLdnZnnnJnJc57nPM85R8IwDAOCIAhCdEiFFoAgCIIQBjIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQhEiRCy1AuLS3m+B0Bg5dyMnRorXVGCeJhEMsegLi0VUsegLi0VVoPaVSCfr103D+lnQGwOlkghoA93FiQCx6AuLRVSx6AuLRNVH1JBcQQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYiUmBoAo9GI22+/HY2NjQCA2tpaVFZWory8HOvXr49l0wRBEEQQYmYAjh49ikWLFuHs2bMAAIvFgjVr1mDjxo3YuXMnvvnmG3z66aexap4gCCLxkAAGsx0NehMMlh5AIqw4MTMAmzdvxpNPPon8/HwAwLFjxzBkyBAUFhZCLpejsrISu3btilXzBEEQiYUEON7QiVUba/HrVw5i1Yv7cbyhU1AjEDMD8PTTT+Paa69lP7e0tCAvL4/9nJ+fj+bm5lg1TxAEkVAYuu3YsPkIrHYHAMBqd2DD5iMwdNsFkylu9QCcTickksumjmEYr8+hsvJPtWhpN3P+tqh8BBbPGgkAyMvL6JugSYZY9ATEo6tY9ATEo2teXgYuntKznb8bq92BbrsDxUXZgsgVNwMwYMAA6PV69rNer2fdQ+Hw3EOTAxZX0Ou7kJeXAb2+q09yJhNi0RMQj65i0RMQj65uPdUqOVQKmZcRUClkUCtkMb0PUqkEOTla7t9i1qoPY8eOxZkzZ3Du3Dk4HA689957mDp1aryaJwiCEBRduhzLF4yDSiED4Or8ly8YB51aIZhMcZsBqFQqrFu3Dj/+8Y9htVoxbdo03HrrrfFqniAIQlgYYFRRJp59eDI6TDZkaZSuzl/AapExNwB79+5l/11aWop333031k0SBEEkJgygS1dAl65gPwsJRQITBEGIFDIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQRDgkWFWvSIhbMjiCIIikp7eql7uwizuj56iiTMHz+vQFmgEQBEGESCJW9YqEpJsBBKoINmfKUMy96co4S0QQhFjoMNo4q3p1mGyXM3wmEUlnAIJVBCMIgogVWRkqzqpeWRqlgFL1HXIBEQRBhEgiVvWKhKSbARAEQQhGAlb1igQyAARBEOGQYFW9IoFcQARBECKFDABBEIRIIQNAEAQhUsgAEARBiBQyAARBECKFDABBEIRIIQNAEAQhUsgAEARBiBQyAARBECKFDABBEIRIEcQAbN++HRUVFaioqMCzzz4rhAgEQRCiJ+4GwGw24+mnn8Zrr72G7du349ChQ6itrY23GARBEKIn7gbA4XDA6XTCbDajp6cHPT09UKlU8RaDIAhC9MQ9G6hWq8VPfvIT3HbbbUhPT8ekSZMwYcKEeItBEAQheiQMw8Q1memJEyewevVqvPLKK8jIyMDjjz+OMWPGYOnSpfEUgyAIQvTEfQbw2WefobS0FDk5OQCA6upqvPnmmyEbgNZWY9CSkHl5GdDruyKWNdERi56AeHQVi56AeHQVWk+pVIKcHC33b3GWBSNHjkRtbS26u7vBMAz27t2L0aNHx1sMgiAI0RP3GcCNN96I//73v6iuroZCocDo0aPxwAMPxFsMgiAI0SNIScgHHniAOn2CEAsSwNBtR4fRhqwMFXTp8qQuo5hKUE1ggiBihwQ43tCJDZuPwGp3QKWQYfmCcRhVlCm0ZAQoFQRBEDHE0G1nO38AsNod2LD5CAzddoElI4AknAGs/FMtWtrNnL/NmTIUc2+6Ms4SEQTBR4fRxnb+bqx2BzpMNoEkIjxJOgPw3EOTg24DJQgiMcjKUEGlkHkZAZVChiyNUkCpCDfkAiIIImbo0uVYvmAcVAoZALBrADq1QmDJCCAJZwAEQSQRDDCqKBPPPjwZHSYbsjRKV+dPk/iEgAwAQRCxhQF06Qro0hXsZyIxIBcQQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQYgFCWAw29GgN8Fg6QEkQguUpKTQfaRIYIIQA4Hy8lNkbuik2H2kGQBBiADKyx8dUu0+Jt0MIFA9gJLCTKz+wcQ4S0QQiU+gvPxsjh4iKKl2H5POAFA9AKJPCFWXNkHq4VJe/uiQavcx6QwAQYSNUH7bBPIXu/Py+8pCqZnDI9Xuo4RhmKQSu7XVGHQGkJeXAb2+K04SCYdY9AQi09VgtmPVxlq/UduzD0+O6bS9L+3G9Jm6ZyMJkpc/ad/fMO+j0HpKpRLk5Gg5f0u6GQCtARDhIpTfNuH8xZSXPzqk0H1MOgNAawBEuAjlt001fzGRetA2UCLlEaouLdXDJRKdpJsBEETYCFWXlurhEgkOGQBCHAjlt00hfzGRepALiCAIQqQIYgD27t2L6upq3HbbbVi7dq0QIhBEapBCicmI+BN3F9D58+fx5JNPYsuWLcjJycE999yDTz/9FNOmTYu3KASR3CRQoBmRnMR9BrBnzx7Mnj0bAwYMgEKhwPr16zF27Nh4i0EQwhGlUXuqJSYj4k/cZwDnzp2DQqHAsmXL0NTUhOnTp+OnP/1pyOcHCgRbVD4Ci2eNBOCKvhMDYtETiL2uTieDpksmtBnMyNaloyBXA6k0uj4Vp5PBga+bsP6tL9lR+4pFE1A6uoBtK1Q9L57ScwaaddsdKC7KjqrcsUIs72+i6hl3A+BwOHDo0CG89tprUKvVeOihh7Bt2zZUV1eHdH6wQDC9vkvw0Ot4IRY9gTjoGid3isFsZzt/wNVhr3/rSwzo50oPEY6eapWcM9BMrZAlxXshlvdXaD0TKhVEbm4uSktLkZ3tGqHMnDkTx44dC9kABJoBzJkyFHNvujJqshLigc+dEu18QdFMD5FqicmI+BN3AzBjxgysWrUKBoMBGo0G+/btwy233BJvMQjCi3jl7YlqeggKNCMiJO4GYOzYsVi6dCkWL14Mu92OKVOm4I477gj5fMoFRPSZALn545W3J+qjdgo0IyKA0kEnMWLRE4iCrsF8/PHcUhkgnXBeXgb0l7oSoohMrBHL+yu0ngm1BkAQQhDUxx9Pd0qAUbvTydDefiJuJJ0BoEVgoi+E5ONPAHdK0yVTXBajCQKgXECESHD7+D1JxNz8bQYzr6EiiGiTdDMAWgQm+kKybJnM1qVTERkibiSdASCIPpEkWyYLcjVJYaiI1IAMACEeEsDHHwypVJIUhopIDcgAEESiEQ9DJQGMlh4YzD2wWHuQm5mWsttNCX5CNgCdnZ2QyWTQarn3kxJhECAgiUhQYvnMPK5tYyRQShHb90EC1F/oQqPeiLf31NF2UxET1ACcPn0aP//5z3H8+HFIJBKMHz8ezz33HAYOHBgP+VIPyuGefMTymXFc+9H5Y3H10CzAGQ3h/TF021F/wYBtn5yi7aYiJ6gB+MUvfoH58+fjjjvuAMMw2LRpE375y1/i1VdfjYd8fgSKAygpzMTqH0yMs0ThEa+kYylLKCPxaI3We69jtDoCP7MI2uN6H17YchRr7p2Ewlz15esEayMMGTqMNjgZhnO76aUuq7BrDuHcSyFm0ik2ew9qAMxmM2pqatjPS5YswebNm2MqVF+51GkRWoSgxCvpWEoSykg8WqN1j+tUTSvmf2ZqRUTt8b0Px8+2IVOtYA1MNNNYZGWoIJVIOLebnjrfCavVIcyMNBw9hJhJp+DsPWgg2JVXXokvv/yS/VxXV4fBgwfHVKi+cuPoAqFFCEqyBCQlIqFUwIpWlSzf6/A9s0jb43sfnE6wwV/B2ghXBl26HMUDdagpK2HbVilkWDxrBD784pxgVcXC0UOIamipWIEtqAG4cOEClixZgurqaixYsADV1dU4ceIEKisrUVlZGQ8ZQ+bd/WeFFiEo7oAkzz88dp83EZBAs6dwjgm3rb2HGrDQp7N0P7NI29Oly/Ho/LFe115YVoJ9RxrZQUGwNsKWgQGKB2Zg4og8/PKHk/Dj+eOwYOZw7PjsNC51WASLPA5Hj2g951jJlywEdQE9/vjj8ZAjZJI+EjhJApISkVBSNkcrrbPndS51WLCz9gzmTR+GYYMzkatTsc8s4vYY4OqhWVhz7yQcP9sGpxPYc/Ac7rp1VMht9EkGBtCq5HAywNOvfpEQkcfh6BGv9N1CtxlreGcA9fX1AACNRsP533XXXYfrrrsuboKmFL37vItyNWwmSiI4ocyeojXD8r1Ol8mOwnwtrhyg9XpmUWnPCRTmqnHDVf0xbkQuVt010cuvHKyNSGQQbEYqAb5vMaJBb4LB0gNIwpNFCLlTcfbOWw/gwQcfxJ///GdcddVVKCgogOdhZrMZBw4ciJuQnlA9gMuIRU/AQ9cAufRZQjkmFKRAq8GGVoMFObo05OiU3Fszo9UeAjzTYG1EIkMU5Q+1Pd7FVIQhS7zl7mObQv+dBqoHwGsAOjo6ALh2/bz22mtgGAYSiQR2ux133XUXPvjgg5gJHAgyAJcRi56AALoKtOND8Gcah22OBrMdqzbW+rlSUnUrtNDPtE8FYX72s59h//79kEgkKC0tZb+XyWSYNWtW9KUMEaoHQMQDUcZrxMno0VboxIHXALzyyisAXIFgzzzzTNwEIohEQIydVLyMXioupiYrQbeBUudPiJGEjdeQuFwonoun0SJe2xxTcTE1WUm6bKBJvw2USAoSsoBMjF00cRuZ926Ffv6x6bjYaqSt0AKSdAaAIOJCAsZrxNpFE1ejxwCD8rVQShj2MxF/ks4A0CJwgiJ0kqxot++x3U+TpoDRbAckEkGTf8V8XSIBjR4RW5LOAJALKAEROklWtNvnuN7CshI2Qleo5F9xcdEkQdU0InoEXQQmiGDELEmWx4Ln9y1G3gXPaLfPdb1Ne+pw07jBsUn+FaKetHhKRJukmwEEcgGlKWXY+Ni0OEtExMQ1EcaoPtrt810PkhhsBQ1n9kIuGiLKJJ0BIBdQ4hGWayJEX33ABU+1wusa2ZlpUXWN8OkDJvoul7AXdslFQ0QRQQ3As88+i/b2dqxbty7kc5K9IlgqEvLukSiM6i8ZrPi+tRvnm4348Itz6DLZsXzBODy+eAJ+9+aXUdm9wqWPew0g2rtixBhwRiQOghmAAwcOYNu2bZg+fXpY54l6BuAzes6J9X0IdWeNh2vCaLFDpZSj02hDU7sFGWoFtCrX6NlztJublYabJxbhUmc3WgxpsFh7kKW93AbXKLwgRw2b3YlzF7sglUhQPX0Ytn5yih0xB3SNhLNLyMfVkqFWwmp3oKh/BnIz07jP8Uwcl5mGnAyexHE+pERUrNA7wIg+I4gB6OjowPr167Fs2TKcOHEirHNFOwPgGD2vWDQBJYMyYvPHFu7OGgbQqRX4/lI3Nmz+gj2npqwEg/O0KB6YwY52c7PSMHvyFdhz8BzKrh+CJ//nP35t+I7CC3LUuGPGcDy/6Suva1dMvgJ/23kclwxWyKUS3nq5Ye8ScrtaQin5KAWO1rfhpa1fs8csqx6NscXZQY1AQgachYPQO8CIiBBkF9CvfvUrrFixAjqdLqrXHVnUL6rXSyS4fMXr3/oyZuXo+rKzhuuct/fUof6CAYZuOzvavXliEburZtOeOu42PEbhv156PZYvHI+/bP/G79r52WpXLdvGTvz6lYNY9eJ+HG/o9NpJE8kuoVDObTXY2M7ffcxLW79GqyGEFAo+ej7/2PSk6jxTsUyimIj7DGDLli0oKChAaWkptm7dGtVrv7v/LH5UPRaAKwVrKnHxlJ7TV9xtd6C4KDsh2uM7x8kw6LY7cPUVuVixaALONnV67aoJ1EZe7/eH/nuR81irzYGashL8q/YM+92GzUfw/GPTMShf22ddwrkPJy9e4Dym3WjFyCtyAl7fTV7wQxKSSN/LVPs75SNR9Yy7Adi5cyf0ej2qqqrQ2dmJ7u5u/OY3v8GaNWsivvacKUOh13cJnn87FqhVck5fsVohi4mufWmP7xypRAK1QobWViNKBmUgR6fCp182YuiAjJDbGJCj4Ty2IFeDN3efwKUOC/u91e7AxVYjm2YgbF08fNoatQIFOWo0tXbznpvN48fvp1WF/WyS7d2N5L1MNl37itB69qkgTDzYunUrPv/887B2AYm2IEyirwFIAKOlB2cudmHjP45xrgGw58mAIyfbsOWjOpRdP4R1AwVqIydHi8+OfO8nT2F/LX7+x88CFxcJRxeOY5dVj8bmD+vQ1Nod9TUAX5Lu3Y1gDSDpdO0jQutJBiBV8ClHd8XgfmhtNcatPd6FSY9OIEOjwOzJV2BgnhbadAV06XJo07wXZVuNNjzx5wNeu4GkUmBcSR7ydSrONvLyMqC/1OUvD0LsgELUha9a1doHS2Gy2PnPDbV8ZBDi8u7GMG9SOMFpKft36oPQevapIlg8qK6uRnV1dVjnBNoFlK1T4XcPT4mGaImJTxCQVBrFZPAhtMf3R+25EGjtcODvO4+zo3Ctyr9zaTVY2A72UocFmz+qAwAMLdAhP0MVtjwhRcfy6eLTGbZ2WTl92iaLHUW5Gv774ARytErkaJXs54QkFrt2KDgtaUm6SOBA3Di6QGgRREm4wUw5PJG7OTqePfbB6GsHxNEZPrZ4QvLvyw+AKEtdEryklAEg+kiELgG+oC1NmgIX2rqRnqaA2eaAxdqD3Mw05GQqserua1HX0A4nA+z7qhELZpZcdptwycMh7yWDFekqOXRaBRx2p/fxkuCBWVyBaY0tXVi5ZCJe3v6Nl88/afblB6HDJPLIYwpa8yKlDMCJhnahRUg+ouAS4AraWjCzBL9/60tU3nQlrDbXnn3P3zwXTB+ZPxbXXJEFOPjlycnW8sr7o6pr8M+PT3p12JAw2LDpaMBFWd/ANM/F6PvnXA2TxY7hhf0wsF9aynQSmjQF5wxHkyaOzp+C1rxJqXTQqRwI1idCqB8blUAej2CmtQ/egJ/UjEdzWzfuum0UDCY72/kDwE3jBvsFTb245SgMRntAeZoumXh//8v2b3DTuMFex59vNgUNzPINTPM8/pV3v4XV5mTTWPTl3iYiVlsPFpaVeKWUXlhWAqu9R2DJYg8FrfmTUjOAd/efpYpgbkIc7UQtGRlHKoiashI4GZ9gL57gL3d7fPK0dZkxQJfG68Lw7IDdwWe+x7QaLK5F2l43gNFsx6Pzx+J7vZHzmoX9tdyunyQeSWrVSuw5eA5VU4td94wB9hw8hwnDUzSFigeUeM+fpDMAok4GFwahLvZFMxmZb5tOBpBKJJzX52uPT57sjHRAwkDCcz3PjtcdfOYJu8js03kX5Kjx6Pxx2PZJvd81B+Wow09VneAdiS5djrtuHZW8uYciICUS70WZpDMAot4GGgahjnZCSkYW4sKZ0Wy/PLIE8NV3zZgxsRA1ZSWsG2jfV4340dxr8Jd3vmHbe3T+WOg0CsDJL0//bDVOnW/H2aZO3D/napeLpvf3B+aNxj96t5J6rgG4/9jdawAqpQxGS49X593U2o0XthzBo/PH4oUtR0PqFJN6JCniojJJn3gvBiSdAQgEb6peERLyaCdYhxDI3YHLhiE7Mw3tJhu2/7uePW5hWQn2fdWI++ZcgycKr4O+3Yz2Lgv+8/UFNieQ0wm8+cEJr1q7vqmYbT0O1H59ga0BoJBJsWLRBDS2dMFmd6J/dhpW3TXRW34JsHZZKZrbzGhsMeKND06gy2THQ3eMQYZGAWvH5fvS1NqNfhnKkDtFwUeSke5kEeu+fREbPz5SygDQIvBlwhrtBOgQ+Nwdv310Cs63mNjfaspGYNsnp7yO27SnDmvunYQcrRINvccCwIJbSrD+rS+9OlAvF0rveoK+w4Jjpy6xswe3UdlZewbr3/oSVVOLsf3f9Zh8TX9/+RlAIZXgjx6yA8Cf/nkM86YPw9t7vmO/Uylk0KYpQu4Uue7tsuoxcACubRV9DQKTAN+3GHHxkom/Yw/RINMWRx7Eavx4SCkD8K8D52gR2E1fRzs+o0uj2c7p7jCYe3z8/QzncRab61h32cYMjQL9s9VBXSiGbjvqLxg4jUrV1GJs/qgOUin65Kop7K/1cg+F7QZw39tHJqNRb0JjiwlvfHAcXSZ7n3MAhbqwHKpBTqaFaUI4UsoAKOQptas1csId7fR2Qq/vOo6bxg2GVAqMHJKNUUOyoO+0oGLyFcjPVoNhGNh7HFgwcziydWnI1KqgkEmguf0q7PjsNJuZU6WQocfhxNFTegwb3A+PLR4PQIKLrSYvF0puVhpmThqCHgcDg6UHOrUc3TYHBuVpUDWtGHsPNQAAbp5YBEiAoQUZGDUkCxNG5MNg8q885obPVTMoRx25G4AB7D0M/rj5qNf1X9r6NdY+WOq128g3oI1rlB7qwjKfUfM1yHFdmPbVUy2HwRTiTIQCswQlpQwAERmGbjte33XcL0Pnw3eOgcPhRKfRhtffP47Km66ExebA5g9PssfUlJUgTSnDHdOH4Z+fnEKXyY6lVdfgvX31mHR1AZ565SB77OJZI3BPxSj87V/HkaFRoGLyFV5uHt/sm/dUjIK9x4k3P/iOPebB6tFY/9aX7DFcWUd1ajmWVY/2y9J5ecE5MjeAZ04jN+x20wwl54heKZf61S4eVZQZ8sIyn1GzWHuEWZjm2FXlG+jHNxNxOpmk3U6bKqTUkFmdRvYsEjqMNs4qXRv/cQxpSgXe7q3iZTDZ2V087mPe3lOHTpMdnb1F2pcvHIetH5/E6GH5ftd784PvYLb2oGpqMX54+9VegWLuoC3PwC6Dyc52/u7v/uxzjLvymNHSwwZotRps2P2fs6iaWowFM0tcrqMP62AweQT+RBDQ5c5p5Il7uynfiL7+goEzEMndsftey3dh2b3+4BnItXzBOOTyyBLrhWlfPbkC/fiCrZoumTjvkZgDs+JNSvWYbQar0CIkNVkZKkil3IFaZlsPG3DF5+93B1/Zepw439zlKqLCE/hltTux+aM61JSVBA3sksskIQV/qZRSnG7qwp/+ebkegXvR2LNgDDsqjjCgKydDyTnDyNEp0eARicx1jzy/6zDZUJSnCW3Rnm9tBxBki6PfzCVIoJ8nbQZz8m6nTRGSzgBQIFjs0KXLMWpoNqeLIV0pZ0eYfMFd7uCr3AwVFL3Xcf/GFbilUshwxcDMgIFduVlpKOzPXTnMN/irfz8NWzQe8F80dh/nHhVHHNDlBMYWZ2Ptg6V+dQD4XDVcAWpZGiXbsT//2HRcbDUGXpuIJC12lOHTM5Qtstm6dArMEpikcwGt/FMt7lu3l/O/dW8cFlq85EMCGK09uNBhwemLRmRqlXhs0Xg/F4Na5fKz7/uqETqNAotnjfQ6pqasBJkaBYoH6qBTK1CYp8aj88fi3IUOVxCWx7FLq65BepoM86YPg9Fs87vWsurR2HekEQAwc9IQ/PW9b/3y1zx05xj2GHf7Ep7Rp7T3LfcaFSNwQFfI9NYBKBmocy389u7+4XPVFA/U+X3nlgcMMChfi6JcDbsdNix6DUOfz+8Dvnru+6rR73l76ehBQa6G8x5xHUvEBkErgvWF+9fu5o0EnjNlKObedKXgFXjiRcR6SoD6C11o1Bu9FmGXLxiHwnwN2rqsXi4Gl3+9Bw6HA9p0Jax2B0yWHqQpZZDLpdAoZd7Vv2TA0VNt2PxhHburqKSoH/Kz03Cp3QJNmgLdVjuUSjnkUglM5t6KWxoFDCZXumeHk8Ezf/uCTdfszl9TUpSF3EwVLnVakaaSQ5cuh5MBVr24329EyVfNy2Dp4Tw+ajtnuCplIXD1rKR8d3317H1+wWYivFXekqpHCo7QzzRhS0L2BVGXhPQhJ0eLM43tfd5CZzDb8Z//tnjttQei1wl6ln70vPb/feAG3tKPfjLydNLzpg/DDVfle8vYhzrGibYLRSzvLiAeXYXWM2FLQvaFQLmAZFIJ/rJyRpwlEggJcODrJjaiti+dV4fRxrvAGo2FOL5tkhf0JrR2WEKSVZcux0N3jOFc2B05JMtbxj4EvxXma/B/7r8eFlsPcjNUsRmB0l53IkFJOgNAi8AuDN12r3QKXguYagV/hyMF2o12mCx22BxODC/K4lyIS1PJcbrZiHSVnDPIyg+fKl0ZagXy+nEv8qmUMmzYdMTlmjHbA3eKDHDlQB3mTR/m2kHDADtrz6DLZOdeLAw1+I1n9K/rTakQNRJwliEKyOiGRNIZAMoG6oJvAdNosffm5OfocCTA8YYOtLSZWZ//qCFZeGDeaPzPtstbGZdWXYNzTZ3409ZveIOsvODo5NznLF841qsy19Kqa/DOpy6X0+HvWvD2nrqgnaJWJcPQAh3Wv/UlMjQKzJw0BIX9tYBEwq4JhEu8Ujonc+ropIWMbsgk3S6gQIgpGyhf4JBKIecNrmk12HC+2eQVeDV6WD7+8VGdV7DU1o9PQqtWsee7g6z4AnS4Ojn3Of2z1Vhz7yQ2OGz3wbM4eb4TKoUMzt4dM0EDgBigdHQBfvvoFNx16yhs++QUfvv6Yax6cT+ON3T2qRpXVHYAJVA7xGWo8lfopJQBqDvfKbQIcUOXLseKRRP8ttCZLNzJ2zpMrgLpfkFcElc65M0f1WHzh3XY/JErBYO7BKP7fCfD8HZafJ2ck2HQ2mnFH7ccQYfRig2bjrCd//1zrsbeww1+MvIhlUrgdDLsWoD7nL7+YYcaeRsp8WqHuAwZ3dBJOhdQIEoKo+y/TWR6R8UD+nkveBrMPazf3b11Uip1FQPXpCtwtqkrpMAdm93p9VkqkfB2WoGCntRpcnSZ7NhZe4YtFiOVSGC29uBSh8VPxkAunWgWYolXcRAqQhJ/BK/XkETQNtAkhlNPj4yevkndHl88AXanE82t3dh98BxuGjcYmnQZMjPS8Kd/XN5l45uMjXcNoHehzWzrgb7Tihc9KmrVlJUgNzMNQwfq0HDR6NUBLp41Agq5FDv2nfaTkc9Xm5eXgfrz7Xj2tUOuHEC9bp99XzVi1V0T++ZP59qnH4u/hjDaEcu7C8RQ1wRbAxD6maZUHAAFgl2GV08J0NrlvQffnXJ5eGEmMjRKtHaa8Ye3LmdwXFY9BgzD+AViuYOsAFeuJc+0xp6GJk0pg8XmQKZGhTSVDC1tLiOz6q6J0KkVuNDuKu4CBqzr54e3X40Nm474jdS4Fkjz8jKgb+3C0fo2v9w7AfPvJ9luELG8u0CMdY2XcQ8BoZ9pSsUBBOKzr5uoIAwAMIDJo5BLblYaZk++wmukXVNWwpZGbGrtxm/++sXljtfpsZWSZzRVmK/Bhs1HUDW12JVvZ5or06YvbveMNk2O7Z96F14/39wVlkvHYLL7ZZp8aevX/DtqEmwkSMQRqvwVEim1CNxt6RFahITBc/Hx5olFfimZ395T50qt0AvfIhnfjgqjxYEMjcIr+2OgxU6u3DijPBLGcZ3jS7iLewm1GySCtNMEESsEmQG88MILeP/99wEA06ZNw8qVK6Ny3aSrBxDIPRHMdSFxjaC/b+mCSimDWiWHxdYDbboSOrUcYBgsXzgO55uNIaVTdgd/ndEboU1TwGSxQ5OmgM3egzX3TkKbwQqVUoptn5xCe5cVHV0W/GDWKGTrlFi7rBQ2mwNX/3ASWjssSFPJoG83Y2CO5vLUuzdK97ePToHB3AOLtQeZGiUeXzzBr0CKV86cXv3teiMUCllYi3tGs51deAaAvYcacKnD4pUO2u0mcOclkstkoQW+hfksQ5qJuGsCt5qgSVPAauuBVq1MeLcVkbzEvcesra3FZ599hm3btkEikWDp0qXYs2cPysrKQjo/ZSKBgxT3Dthh8AReqZQy7Nh3GvNnlmCLxyLu6nuuDZia2H3+me872WpeC8tK8MW3Tbh50hC8vP0btp1H7hwDg8mG5zd5LuqORJpSiv/d8V8vebjgqlv720en+CWe49Kv9usLqCkr8Utcx+nflQDtRhu2/7veK4XEnoPnXAaD4x66fy+/fkjgwLdwnmWo5R4DyHPXraPIbUXEhLgvAp88eRImkwnjxo0DADz11FMYMmQI7rnnnpDOD7QIXFKYidU/mCj4oksoGMx2rNpYy7kAKpVKUPtNM1s8ZO+hBnSZ7GyHwXfuvOnD4HQy2P7veq8c+K4yfSPw0tbLO31+VHUNTBY7TJYedmG2y2Rnz1MpZFi+cJzfIm1N2QjO5HHzpg/D23u+8/vOM2FbIJ09ffh8x1VNLcbeww2YOWkIhg3ORK6OP3cP3zXW3DsJhblqGLr529j+73ruZHN9eJa6dAUa9Cb8+pWDfuf9eun1KMrVBNV5+7/rUzJyOBn+TqOB0Hom1CLw8OHD2X+fPXsW77//Pt56662oXLvufCfy8jIAgP1/onLxlJ7TLWN1OnGxpZvtZD2Tn3XbHSguyuY918kwl33yHu6dptZuKGQSVE0tRv8cNZpbu9FltuFv/zruL5jk8vUsNodfO8Gqgfl+55Y5kM6exwQ6DhLgUocFb+/5Dr95eLLXOb7wXUMulyIvNyNgG1yyByKYXjaGu4DOgBwt8vK0QXXmukepQqL/nUaLRNVTMKf5yZMn8eCDD2LlypUYOnRoVK6ZrVNBr+8S3OKGglol53bLMBJs/Id3tOumPXWYN30Y1AoZ9Pou/nMlrmhZlcK/WpZUKnHNDKYVs//nuob7PJVChjSlv889WDUw3+/cMgfS2fOYQMd5yuZ7Tqj3V6MMfA/dlcp8ZQ9EML2UUu5yjUopE/TeuOUJVZZkIhn+TqOB0HoGmgEIsgvo8OHDuPfee/Gzn/0M8+bNi9p1bxxdELVrxRq+ilF8qRwK+2td7g4JIJUAD90xxuvcmrIS6DQK7DvSiKVV1/hVy9r26SksnjUC+75qxMLeyl6+VbZqykqw93ADO+t4b189llZd43XM4HwNHpjr/d3iWSPRL0Ppdy13dTBWZ7Uca+6dhJqyEiyYWYKCHDVnBSiue+MpWyhVo/jur/s8rt8XlpVg35FGTtkjacszTfWvl16PZx+e7OfTDyQPVckiYkXc1wCampowb948rF+/HqWlpWGfn1KRwBzBKgZzgCpVagW7UJihUWD25CtQkKtBmlIGhVwKm92JuoYOHD5xERNHDkD/bDW6La5dKRs2He0twO6ESilFfj81urqtGJibgW6LHTqtCjIZ0GWyQ5uuQLelB+o0ORjGCYlEik6jDdp0BV5//7/Qd1rYjJyD8tSw9zDoNFqhVSthMNmQrpIjU63wrg7Gscj56PyxuHpoFncQl8+9SU9XoulSkFq5Idxfrkhmz11AMpkMunS5t+x9fJZhL9pKAJtTgoutRtcuIHsPtGmKlE0bkTR/pxEitJ4JFQm8du1a/POf/0RR0eU96DU1NVi0aFFI56fKIjAvAXaUBFq43PxRHX5+10TkZ6Xj8HctcDovR9zOnnwFCvO1rjz8m4/gUoeFPT83Kw133TrKq+AKb7BUbydntNihUshdZRa1oUXXhroAzEdSP9MwEIuegHh0FVrPhFoEfuKJJ/DEE0/E5NqXOi3BD0p0AlS14guESk+ToaZsBKw2Byx2Bz78ooFNtMYVAfyv2jOsEZg5aQg27fnOa7/867uOc+fXYQCdWtFbb+CLsKJro5nIjSCI6JBkkVMigSeMnSvLYUGOGtp0Jd7e/Y1fJ88XAezesqlSyFA8SAelwjsh28KyEhgtdu50DH0scEIZGgki8SADkGhwlFZ0R6Xq1HI8Mn+sV9bNpXOuwXOvH0aGRoGqicW92wadWDZ3NCw2B2ckbPHgTKy++1rk6FwFdH73hndpyU176rD2Qe71mUsGq1eOoZsnFgESwGjpCeirDpoWOZmStiWTrAQRgJQyAEmXC6i343SnRsjNTIO+3eyVGoFNxTwoA8fPdaLdYL7cqTPA+ZYudkHYcxT/o7nXQCmXckbC1jd2sjOAn9aM53TNmCx25Gj9R+fpvdsVfdvc/mkQV1Cggu3JlLQtmWQliCAkXTrolNkFJAHqL3ShUW/0Sm2weNZI7PisnvXRuyNqJ47IwxN/PuDax++RVXPlXRPR0GwMOTp3xaIJePndr9nr80X28rl0jDYHDp9ogdXuDOu8QPehtcvmWrhmLs9SfK+VKM800sXsYCSKnvFALLoKrWdCLQJHSqCi8ADwv6tvjqM0fcfQbUf9BYNXJ2q1O/DmBye80ji4o1JbDRZY7Q589V0z7p9zNV5591tY7Q7oO7rRP1sdcnRuY0uX1y6gD784h4fuGOO3C4jPnaNVyTA4T4suM3/pyZA7Qp78Nzt7F6kTcYGYFrOJVCLpDEAgkqkkZIfRxptWwS9Lp1KGHF0aCnLUKB09ENs+OYWashL0z1ZDJpcCjGsxuKm12+s8rujcHod3r95lsuPKggxu1wwXDFA8MAOtXbaIF3W5FpQ37alj898k4gIxLWYTqURK1QMYWdRPaBFCJitDxaZV8IQrS2f/7HTkZCqxrHoMNu2pg93hBAPg+U1H8Mxfv8Dv3/wSd8wYjoIcNXve4lkjkKlR+ETs+n+3fME4aNPk0KUrUJSrcY1igzkFGSAnQxk4+jUE+EbT0t7UCYkY/Ro06pcgkoiUmgEkE7p0OYoH6vzSGz+6YCzaOi1YMLMEYIB/1Z5hM4EyvTOGqonFfts7/7L9G6y5dxLau6wYkKNGT48DOrWSHdm7I0s9v4uoVF6gRd0Q4RtNTxyRj5wMZWIuqkZB75hBu5OIMEkpA5BUJSF7XSn9s9MxvKifaxeQTgWj2Y7/98aXfod3mGzI0vZW+fKowuXGanfg2zNt2PxhnVeaYQAevunLbgqdWgFDtx0NLaa+dxYRlt3j2xqasJ2/m0QsN0i7k4g+kHQGIGUKwgAAA2hVcmhVHo+BJ9tmlkbJdpjnW4wBM0cG9UcnSmeRyKPpJKOvAXqEuEk6AxBoF9CcKUOjMwOIdCodQjlHvt+DBUyNKspEYb4GA3LUXjt33Hv8A+3gcWO09OB8ixFV04oBuLZeRr2zCPUeJuJoOgmh3UlEX0g6AxBzIh0dBzs/2O/BRsW9s4YxV/TD849N98ocOWH4xOAjaAlwuqmLs+BM1DqLRJlhiAjanUT0BQoE8yHSQJ9g50czkKgvegYqJxlqCcS+thHJDEPoYJp40Wc9k9Do0jOND6IJBJNJJfjLyhkRXT/SqXSw84WeqvO1zxaciUJnIbSOooTWU4g+kHQGINaLwJFOpYOdL/RUna/9QTnqqHUWQusoWmg9hQiTlAoEiwaRBvr0pRRhPAOJ4tG+0DoSBBEatAbARaTl/cIoRRjJVD0Sf3E02o9nG0L7UeOFWPQExKOr0Hqm1BpAXIh0Kh3sfKGn6vFoX2gdCYIICrmACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRIogBmDHjh2YPXs2ysvL8cYbbwghAkEQhOiJeyqI5uZmrF+/Hlu3boVSqURNTQ2uv/56DBs2LN6iEARBiJq4zwBqa2txww03ICsrC2q1GrNmzcKuXbviLQZBEIToibsBaGlpQV5eHvs5Pz8fzc3N8RaDIAhC9MTdBeR0OiGRSNjPDMN4fQ4GX1pTX/LyMsKWLRkRi56AeHQVi56AeHRNVD3jbgAGDBiAQ4cOsZ/1ej3y8/NDPj8u9QCSBLHoCYhHV7HoCYhHV6H1DFQPIO4uoMmTJ+PAgQNoa2uD2WzG7t27MXXq1HiLQRAEIXriPgPo378/VqxYgbvvvht2ux133nknxowZE28xCIIgRI8gFcEqKytRWVkpRNMEQRBELxQJTBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUQbaBJgvv7DuNuTddGbPz171xGKt/MBHr3jiMkUX9vI59Z99pfPZ1E24cXeB3jXVvHAYATBw1AB/85yzaDVZUThnKHu/ms6+b8LuHp+Dxjftx4+gCnGhox8iifgCA3V+cBwAU9dfiUqcFuZlpqDvfiTlThmLuTVfi8Y37kZuZBgAYWdQPJxra2X+7r/vw7z9F+aRC9lrlkwrxrwPnAADFg3TssTeOLsBnXzcBAPvvbksPyicVYsf+s36yu/V16+nW9fDxi6ysDc1GFPXXsjLNvelKr/v4zr7TAMDq7JbZ9xj3eb56rv7BRL9ruHHL5/7ds213O77vgSee8nk+23VvHGb1dL8Xq38w0e9aXOd7fn+iod3vPK5ruGX2lMn3euHgK6/vNbiu6Sl3sGOD6RPOb9H42w52n4OdH0n70YIMQADe3X82oocU7Py6853s/+vOd3od++7+s7zX8DyP63hf2gxW9nvPczw/txmsXu21Gazsd57neP7bYnN4tef5b7dOvt9z/dv3/259+dr1lct97zzvo68svnK5j3Gfx3Vv+K7hls9TXk99ffF9Jp7yeT5b32twXc8ts+/5vt8HwlNvdxue98PzmHDgun++77TvNT3lDnZsMH3C+S0af9uREGn70YJcQARBECKFDABBEIRISToXkFQaWuroUI8LRH6/9IiuE+x89+/5/dIBeMvs/s73e9/fguF5/Wie05frhtM+0Hc9+WTjOiaUY4PJ53tssOcV6jPnen/43plA75IvXHr7tteXd9/3nGCffWUJdmyo7YbyWzT+toG+90eRth8OgdqRMAwTOLcyQRAEkZKQC4ggCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkpJQB2LFjB2bPno3y8nK88cYbQosTdYxGI26//XY0NjYCAGpra1FZWYny8nKsX79eYOmixwsvvICKigpUVFTgueeeA5C6uj7//POYPXs2Kioq8OqrrwJIXV0B4Nlnn8Xq1asBpKaeS5YsQUVFBaqqqlBVVYWjR48mtp5MinDx4kVmxowZTHt7O2MymZjKykrm5MmTQosVNY4cOcLcfvvtzNVXX82cP3+eMZvNzLRp05iGhgbGbrcz9913H/PJJ58ILWbE7N+/n1m4cCFjtVoZm83G3H333cyOHTtSUteDBw8yNTU1jN1uZ8xmMzNjxgzm+PHjKakrwzBMbW0tc/311zOrVq1KyffX6XQyN954I2O329nvEl3PlJkB1NbW4oYbbkBWVhbUajVmzZqFXbt2CS1W1Ni8eTOefPJJ5OfnAwCOHTuGIUOGoLCwEHK5HJWVlSmhb15eHlavXg2lUgmFQoHi4mKcPXs2JXW97rrr8Pe//x1yuRytra1wOBwwGAwpqWtHRwfWr1+PZcuWAUjN9/f0aVeRnvvuuw9z5szB66+/nvB6powBaGlpQV5eHvs5Pz8fzc3NAkoUXZ5++mlce+217OdU1Xf48OEYN24cAODs2bN4//33IZFIUlJXAFAoFNiwYQMqKipQWlqass/1V7/6FVasWAGdTgcgNd9fg8GA0tJSvPjii/jrX/+Kt99+GxcuXEhoPVPGADidTkgkl9OeMgzj9TnVSHV9T548ifvuuw8rV65EYWFhSuu6fPlyHDhwAE1NTTh79mzK6bplyxYUFBSgtLSU/S4V39/x48fjueeeQ0ZGBrKzs3HnnXdiw4YNCa1n0tUD4GPAgAE4dOgQ+1mv17PuklRkwIAB0Ov17OdU0vfw4cNYvnw51qxZg4qKCnz++ecpqWt9fT1sNhtGjRqF9PR0lJeXY9euXZDJZOwxqaDrzp07odfrUVVVhc7OTnR3d+P7779POT0PHToEu93OGjqGYTBo0KCEfndTZgYwefJkHDhwAG1tbTCbzdi9ezemTp0qtFgxY+zYsThz5gzOnTsHh8OB9957LyX0bWpqwiOPPILf/e53qKioAJC6ujY2NuKJJ56AzWaDzWbDRx99hJqampTT9dVXX8V7772H7du3Y/ny5bj55pvx8ssvp5yeXV1deO6552C1WmE0GrFt2zY89thjCa1nyswA+vfvjxUrVuDuu++G3W7HnXfeiTFjxggtVsxQqVRYt24dfvzjH8NqtWLatGm49dZbhRYrYl555RVYrVasW7eO/a6mpiYldZ02bRqOHTuGuXPnQiaToby8HBUVFcjOzk45XX1Jxfd3xowZOHr0KObOnQun04nFixdj/PjxCa0nVQQjCIIQKSnjAiIIgiDCgwwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQIiS++67D21tbREfc/DgQdx+++1B2xsxYgTntT766COsXbsWgCuV8K5du9DY2Ijx48cHvSZBRErKBIIRRDjs378/KsdEyi233IJbbrkl5u0QBBc0AyBExy9+8QsAwD333IPPP/8cS5YsQWVlJebMmYN33nnH75impiZ8/PHHqKmpQXV1NaZPn44//OEPYbf7hz/8AfPmzUNVVRU+/vhjAMDWrVvx4IMPRkUvgggXmgEQouOZZ57B1q1b8be//Q0LFizAypUrUV5ejubmZsyfPx9DhgzxOqZfv35YuXIl1q1bh6FDh6K5uRkzZszA3XffHVa7gwcPxlNPPYW6ujosWbIE77//fow0JIjQIANAiJb6+npYrVaUl5cDcOWTKi8vx759+7x88BKJBC+99BI++eQTvPfee6ivrwfDMDCbzWG1t2jRIgBASUkJiouL8dVXX0VPGYLoA+QCIkSLRCLxy83OMAx6enq8vuvu7sa8efPw7bff4qqrrsLKlSshl8sRbhotqfTyn5vT6YRcTuMvQljIABCiRCaTYdCgQZDL5di9ezcAoLm5GR988AEmT57MHtPT04Nz587BaDTipz/9KW6++WYcPHgQNpsNTqczrDa3bdsGAPj222/R0NCAsWPHRlcpgggTGoIQouTWW2/Fvffei40bN2Lt2rX44x//CIfDgUceeQQ33HADe8ySJUvw/PPPY/r06bjtttugVCpRUlKCYcOG4dy5c1AqlSG3ef78ecydOxcSiQS///3vkZWVFSPtCCI0KB00QRCESKEZAEFEgZdffhk7duzg/O3+++/HnDlz4iwRQQSHZgAEQRAihRaBCYIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRMr/B0x5tiafPYALAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with pandas df\n",
    "sns.scatterplot(data=pandas_tips, x=\"total_bill\", y=\"tip\")\n",
    "sns.rugplot(data=pandas_tips, x=\"total_bill\", y=\"tip\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='total_bill', ylabel='tip'>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABOIElEQVR4nO29eZhcZZ33/T1Lbd3VW7qrk86+kQQSSAIINAJh8Q1LEwIIDvoYx0FH8eIRRX0Q0Uvf10s0cDmDMuroCKLiwgMj+yDCsA0hkQAhCUmArJ210/tS1bWe5f3j1H3qnFPn1Na1ddfv8093V5865757ub/3b705VVVVEARBEDUHX+kBEARBEJWBBIAgCKJGIQEgCIKoUUgACIIgahQSAIIgiBqFBIAgCKJGIQEgCIKoUcRKDyBfhofHoShTv3ShtdWPwcFQpYdRMWj+NP9anX+x587zHFpa6m2/N+kEQFHUmhAAADUzTydo/jT/WqVccycXEEEQRI1CAkAQBFGjkAAQBEHUKCUVgFAohKuvvhrHjh0DAGzevBnr1q3D2rVrcd9995Xy0QRBEEQWSiYAO3bswCc/+Ul0d3cDAKLRKO666y784he/wHPPPYddu3bhtddeK9XjCYIgiCyUTAAeffRRfO9730N7ezsAYOfOnZg3bx7mzJkDURSxbt06PP/886V6PEEQxJShVF37SyYAd999N84++2z9676+PgQCAf3r9vZ29Pb2lurxBEEQU4Luk2P45i+3oGdwvOj3LlsdgKIo4DhO/1pVVdPXudLa6i/msKqaQKCh0kOoKDR/mn+twuYeiUl44IE3oQJYOK8Vfp+rqM8pmwDMmDED/f39+tf9/f26eygfBgdDNVEgEgg0oL8/WOlhVAyaP82/VudvnPtvnnsfPQPj+D+fXI1IKIpIKJr3/Xiec9w4ly0NdOXKlTh06BAOHz4MWZbx7LPP4qKLLirX4wmCICYVW9/vxaadPbiqcx6WzWspyTPKZgF4PB5s3LgRX/7ylxGLxbBmzRpcccUV5Xo8QRDEpGFgNILfPf8hFs5sxPoLFpTsOSUXgJdffln/vLOzE08//XSpH0kQBDFpkWUF//HMHqiqii9csxyiUDpHDVUCEwRBVBGPvrQP+4+NYsPapWhv9pX0WSQABEEQVcK+YyN45IUPcN7y6ehcMaPkzyMBIAiCqALC0QT+4+k9CLTUYcPapWV5JgkAQRBEhVFVFb//24cYDsbwjU+fBZ+nPPk5JAAEQRAVZvOuk9j6fh/WX7gAy+ZNK9tzSQAIgiAqSO9wGH94YS+WzmlG13nzyvpsEgCCIIgKIckKfvXUbogCh39edxp4Pv/2OBOBBIAgCKJCPPH6QXSfDOIfr1iGaY3esj+fBIAgCKIC7OkewvN/P4KLVs7E2cvy74tWDEgACIIgykwwHMcDz+7BjNY6fPKyUyo2DhIAgiCIMqKqKh567gOEIgl8Yd1yeNxCxcZCAkAQBFFGXn33OLbvH8ANaxZh3ozKnnlAAkAQBFEmjveH8MjL+7Fi4TR87CNzKj0cEgCCIIhykJBk/Orp3fC5BXyu6zTwBZyIWGxIAAiCIMrAo68cwLH+cdzcdRqa6t2VHg4AEgCCIIiSs2P/AF565xj+n7Pn4IxFrZUejg4JAEEQRIn5/d8+xOyAHzdcvLDSQzFBAkAQBFFiRkIxrD6lDS6xcimfdpAAEARBlBhVBaog5psGCQBBEEQJUVUVAMBVoQKQABAEQZSQ5PpPFgBBEEStoZAFQBAEUZswC6DMrf5zggSAIAiihFAMgCAIokahGABBEESNoscAUH0KQAJAEARRQigGQBAEUaOooBgAQRBETUIxAIIgiBqF6gAIgiBqFIoBEARB1ChUB0AQBFGjUAyAIAiiRiELgCAIokZJBYErPBAbKiIATz31FLq6utDV1YV77rmnEkMgCIIoD3oQuPoUoOwCEIlEcPfdd+Phhx/GU089hbfffhubN28u9zAIgiDKgpL8WIXrf/kFQJZlKIqCSCQCSZIgSRI8Hk+5h0EQBFEWqjkGIJb7gX6/H1/5yldw5ZVXwufz4SMf+QjOPPPMcg+DIAiiLFRzFlDZBeCDDz7AX/7yF7zyyitoaGjAN77xDTz44IP4/Oc/n9P7W1v9JR5h9RAINFR6CBWF5k/znwpEkz6g5sa6nOdUrrmXXQA2bdqEzs5OtLa2AgCuv/56/OlPf8pZAAYHQ1AUtZRDrAoCgQb09wcrPYyKQfOn+U+V+Q8OjQMAgqFoTnMq9tx5nnPcOJc9BrBs2TJs3rwZ4XAYqqri5Zdfxumnn17uYRAEQZQFPQZQ4XHYUXYL4IILLsCePXtw/fXXw+Vy4fTTT8cXvvCFcg+DIAiiLFAMwMIXvvAFWvQJgqgJqjkLiCqBCYIgSkg1WwAkAARBECWEzgMgCIKoUeg8AIIgCAee2dyN/377aKWHUTIoBkAQBOHAW+/34a0P+io9jJJRzTGAimQBEQRBMOIJGbKiZL9wklLNMQASAIIgKkosIUOJT93qfuYCqkZ3CwkAQRAVJZqQkUgoUFS1KnvmT5SUC6j65laNokQQRI2gqiricRmKqiISkyo9nJKg0olgBEEQ6SQkhR2YhVAkUdGxlIrUgTDVpwAkAARBVIxYQtY/D4WnpgDoMQASAIIgiBQmAZiiFkA1p4GSABAEUTFi8VoQgOpNAyUBIAiiYsQSqfz/4BR1ASlkARAEQaRjdAGNR6emAFAMgCAIwgajAExVC4BiAARBEDbEkwIg8BzGKQZQdkgACIKoGNFkELilwYPglBUA7WMVrv8kAARBVA7mAmpt9E5ZC6Cam8GRABAEUTGYC6i1yTv1LYDKDsMWEgCCICpGLCGD5zg0+z0YjyR0f/lUQgX1AiIIgkgjFlfgcfPw+1yQFRWRmJz9TZOM1JGQ1acAJAAEUcMEw3Fs2XWyYs+PJSS4XQIa6lwAgFAkXrGxlApFIQuAIIgqZNPOHvz62T0Va8MQSyjwuATU+5gATL2W0EwA+Co8FZ4EgCBqmIGxKIBUMLbcxOIyvC4BDb6pawHISR+QwFffclt9IyIIomwMj8UAAAm5MmfyxhIy3G4Bfl0Apl4mELMABLIACKL8qKqKhFS54KKiqpAqtMBmYyhpAUhSZcYXT8jwuAT4WQxgCraDkGVyARFExXh523F88cevYTgYq8jzX9t+Anf+aktFnp2NoWBlLYBoUgB8HhEcB4SmYEM4mSwAgqgc2/b2AwB6Bscr8vzhYBRDYzG9IrRaiCVk3eWSqJAFEIvL8Lh48BwHv881JS0ARSUBIIiK4XEJAMyHj5QTtu4zV0C1YLSIKiUA8YQMj1sEAE0ApmAMQE5aV+QCIogK4HFrAhCtUKYLQ1aqKw7A/P8AKhaj0NJAtWVoygoAuYAIonLoFkDFBaC6LIChscpaAIqq6kFgQBOAqdgPSFFV8BxHzeAIohJ43ZV1ATGqzQU0FExZAJUQgERCgQqYBGBKWgCyWpXuH4AEgKgBKh0DYJAFYIZZZG4mAHVaEHiqNYSTFbUq3T8ACQBRA1Q6BpAKAldZDCAYRUuDB0BlYgBMAJiFxhrCRSss1MVGIQEw8/LLL+P666/HlVdeiR/84AeVGAJRQ7AujGQBmBkei2F6iw9AZS0AowsImHrVwLJKLiCdo0eP4nvf+x5+8Ytf4Omnn8aePXvw2muvlXsYRA3BXAqV3llKVSYAQ8Eo2lvqAFSmECzNBTRVBUCuXgtALPcDX3zxRVx11VWYMWMGAOC+++6Dx+Mp9zCIGoIV4lQqC4gdCFIOF5CiaG0n2KLqRCQmIRKTK2sBxJkFoO1DG3xuAFNPABRFhSBUpwCU3QI4fPgwZFnGLbfcgvXr1+NPf/oTmpqayj0MooZgPvhYvLKthsvhAnrhraP4zgNvZr2O1QBMa/RCFPjKuoDcqSAwMPUEQFbUqjwMBqiABSDLMt5++208/PDDqKurw5e+9CU88cQTuP7663N6f2urv8QjrB4CgYZKD6GiFGv+dXXazlJWK/MzrUvubBsafKbnv39oCPM6GlDnddm+r5CxjkYSGBiNoqm5LqMVcGQwDABYOLcFHhcPl1ss+8/Gc3QUANAxvRGBQAM8dUlPQLJt8lT5+3e5BLhdQl7zKdfcyy4AbW1t6OzsxLRp0wAAH/vYx7Bz586cBWBwMKS3V53KBAIN6O8PVnoYFaOY8w+GtHTHUDhekZ9pOKz1uB8cCqHfry32kqzgW7/YhPUXLMDV589Pe0+h8x8ejQAADh0ZwrRGr+N1h44OAwB4WYHAcxgLRsv+sxkY0nozjQej6Oc0VwnHAT39IQCYMn//4eQZB7nOp9j/+zzPOW6cy+4CuuSSS7Bp0yaMjY1BlmW8/vrrWL58ebmHQdQQuQaBt+3tx+//9mHxn5/8aAwCK4oKWVHROxQu6rPYmbrBLE3VhsZi4DigucENl1ghF1Dc7ALieQ71XhfGyQVUNsouACtXrsTnP/95fOpTn8JVV12FmTNn4uMf/3i5h0HUEGzdzSYAu7uHSno+rrESmMUl+kejDlcXRiSmxTmCWU7WGgpG0ez3QOB5iKJQkSygqCUNFJia7SCquRCs7C4gALjhhhtwww03VOLRRA3CLIBITIKqqo49WRRF602T6ZqJYGwGxzKT+kciRX1GJBnozsUCmJYsAnNVKAgcT8gQeA6ikNqH+n1TzwKgLCCCqCB6Ja6S+WQuWdESNou+GGZoBz0SjBX1eboFkE0AgjG0JGMELpGvTB1AXE4LVPt9rqxjn2zIyhQoBBsdHUUoFCrlWAiiJBh7yzAfuR0suSBeot2wMQ2UjUmFuS3zREnFAJxdQKqqYngsarAAuIocCRlLyHobCIa/zoXxKXYqmKyoECZrDODgwYP4+Mc/js7OTpx77rn49Kc/jRMnTpRjbARRFIy9xSIZagF0AShywZheCGZwARltgf5RezeQoqrYe3Qk5+coqopoDhbAeFRCXFL0LKGKBYETzhbAVGoIN6ldQN/61rdw4403YseOHXj33Xdx+eWX49vf/nY5xkYQRcF4FGOmfkBsh16qimG7IDAA9I/YWwAfHB7Gxj9uw/H+3CzvWFzWhSWTBaAXgTELQBQqlgXEqoAZDT4XJFmpeN+mYjKpXUCRSAQ33XQTXC4X3G43NmzYgIGBgXKMjSCKgnGxjSecF7qUBVDcxdAYg0i9lvp8wCEQHI5qu/lYjuNh/n8gczUtawPNLACxUjEAw2EwjPpkP6Cx8cxZTJMJWVEmrwto4cKF2LZtm/713r17MXv27JIOiiCKiXGxzbS7L7UFYAxAmywAh1RQtitXkZs7JJLcNfMcl9EFxA6CmdZY2Swg7ThIswA0MAHIYMFMNjQXUHXm22RNAz1x4gQ2bNiApUuXQhRF7NmzB4FAAOvWrQMAPPPMMyUfJEFMBCVHAWDXxaUSuYCMFoDhdScLgO3Kc3WHMwugtcmTxQUUg8BzaKzXWlS4RK4iFkA8IesixDBaAE0eX9nHVAqq2QWUVQC+8Y1vlGMcBFEyjAtoLhZAsV1A1vtrY0odFO5UC6DvynMUABYAbm/2YXf3sOZ64NN3nuwgGFad6hKEimUBpVkAdQYX0LSpIQDVfCCMowAcOHAAixYtQn19ve33qX0DMVnI1QWklDwInO4Cam30om8kgkhMgs9j/nfM1wUUTgpAoKUO6B5GKCKhKbnLN2IsAgMqVwcQjacLADsTIDilYgDV2wrCUQDuvfde/OpXv8KNN96Ijo4Ocy51JIItW7aUZYAEMVEUFXC7eMQTmbNL5FKlgWYIAgdafOgbiaB/JIK5080dIPN1AbFWF+3N2s45GI47CEAUi2elWrCLyTTQUlVAOxG3CwJ7XeAw1YLA1ZsG6igA99xzD0ZGRrBo0SI8/PDD+h9HIpHApz/96XKOkSAmhKqq8LpFxBPxnCyAcriAGO3NPuwGMDAaTRcAZgHkqAAsa6g9echLyCYQrKgqhoMx/SxgQLMAVFUbn1imhUpRVcQlBW5LGijPc6jzihljGJONSekC+vrXv4433ngDHMehs7NTf10QBFx++eVlGRxBFANVBUSBgyhwucUAihwETp0IZugGyiyA5G7dLg6Q70Ht0bgEDkBbk5beaddULTgeh6yoplbRrmSGSjwh48W3j+LiVbPS3FHFJq4fCJ/+HL/PNeUsgEkXBH7wwQcBaIVgP/rRj8o2IIIoNqqqggMHj0tAPJ69DsBOJFRVxeP/cxDnr5iBjlb7uFg2JEMlMHPr1/tE+DwCBpLFYGPjcQgChwCMFkBu9w/HJHg9gu72sdtFDwWTNQAWCwAAdh0awmOvHMD7h4fxtU+symNm+cNqG6yFYIDWDmKqxQCq1QLImpxKi3/+DCf/yYjqQFEBjtMOHy80C2g8KuG/thzGtr39BY/D6AJiT+A5DoEmn94O4vZ/24Qv/+R1AMYgcG5EYzK8blFPpbSrBTAeBclgAsDYdXAo5+rjQmHHc9qdWub3Ti0LoJpdQNVZnTDJGR0nAagmVGhZGJ4sAqDXAdhcw4LHuVblWgYAwNINNPksjgPamn0YSBaDsSuMXUJzjQFEYhLqPCJEgUedx96PzqqAWwz598wFZKwk/uVTu4seDDeSsgBsBKDONaUKwWRFmXwuIIKYbKiqivGopKcSpl7XFtpsApCqBNYWp0hMgkvkIQq8/j6nRTEhyZAV1danrd/fJg2U4zi0NXmx6+CgaaE/OTSuxwCsy38sIQNq6iQtRiSuuYAALZ/e1gIIRuESeb3iFkhZACyN9JqPzsfTb3TjkZf34zOXL3Wcz0SwHghvJFMMQFFUHB8Yz6tZnMctYHpLXWEDLQKaC6g699okAMSUYceBQfz7k7vwL7d+1CQCLIPN4xYy7mqVpI+eXXPrff+D0xe24vZPrNQXLCcBefSVAzjSG8S3Pn1W2vfYUmVyATELAFogOC4ppkVPUVTHQrBfP7MH8YSMr/3DKtPrkZiku3/8dS7bfkBDY1oGkDHdU7RYAGcuCSAuKXj+zSM499R2LJ3bYjvniaALgJ0F4HMhnpBtC8VeeOsoHn1lf97P+95nP4J5M8p/yPzQWBSqCrhFEgCCmBDW9EUrIyHNbRIMx00CwA4b97gEhDIclagHgQ1ZQO8dHNRei2e2AIaDMYyGMrstJJs0UI7jEGjW/PHGnkCKoqbqACwK0DsUxsmhsNZN07CDjsRktDVpWUUiz9umnQ4Fo6YAMJCyACJRWf/62gsWYMvuk3hq0yHc8aniC0A87iwADXVaEHs8kkj7fu9wGPVeEf901ak5PScYjuN3z3+Io32higjAE68fhChwOG/59LI/OxdIAIhJw+h4ZgFgC7i1sZmqIhkD4DEwmlsQ2OpiiCYyxwAkWTH1HLK9v8EFpOguIOiLtjEVVFHVVHsGy22DkQRkRcW+4yNYsaBVf91YTczz9v19hsZiOHWeeUFPuYA0i8El8HC7BFx13jz8+b/34YPDw1g2r7giEM3gAqr3poLYxmA1oGVJtTR4ceaSQE7PkWQFD/9tL/qKfPRmLhztC2Hzeydx+Tlz9d9xtVGddglBFEAqj9+88CnMBeTK5gJKBYEly/GN2SyAhJRBAGwqgVNBYE7P2zc2hTNaAOa3qXqB1weHR0yPicQl+JIxAI4DVIsFICsKRkKxtAZsugAkC8nY12tWzkST342nNh2yn9cEyOQCYv2AQjYng42Ox9FU70p73QlR4NHa5Cn62cu58Nir++HziLiqc17Zn50rJADElMHpRC/NAgDcbiFjFo+xHbS1CCtbDECWFVuXi9392ZgALQbgdglo8rstLiCjJWM80lLSheaDI8P665KsIJ5QUhYAx6UJ0mgoDlUFpjWYd9UsC4gFgcWkALhdArrOm4cPj47g/cPDKCYpF1D6EsTiGHaVzKOhOBrrna1AO9qbfegbDhcwysLZ0z2EXQeHcPX589OSEqoJEgBiyuB0pq9qsABySwNV0twnKQvAXkASspq249afr1cCpx8JyWKxgSaf2QJQUwfYG9dxVt3b2uhFd09QD9yyPkA+d8oFZB1O6iAY8wIqWrKAXIbe9WtWpayAYh7TyH4PdnUALEPJGsRWVVWzAPzp/Y0yEWipQ99w+SwARVXx2CsH0NrowWVnzSrbcwuBBICYMrAFPC0GgFQaaEJSdKGwYmwFYW2PnM0C0GIAmcdnfyKYpgBtzV7T0ZDGLCDjbdmu+CPL2k1nBjMhMFoAVkHSD4JxsAAiFgsA0I6L7DpvHvYeHcEHRbQCYgkl2Z7DzgLQ5mAVgEhMgiQrtg3uMtHe7MN4VCrbYfNb9/TicG8Q11+0CC4xXeCqCRIAYsrg1M3TGAMAnBdxYzO4NAuA1QE49AmSZGdhsUsDZes/qw9qa/LpC7R2rWJbCMYsgJWLWyEKnO6aSQlAKgZgdQE5WQB6FlBMgihwaa2L16yaiWa/G89s7radXyHEbFpBMwSeh88j6jEJxmgyTbYxXwFINscrhxWQkBQ8/j8HMbfdj3OrNPPHCAkAMWVwdgFpCy3zNzsFco0CYrQAZCXVRtqpnXRuWUDGE8EMQQAAgWavydVjigEYXmcWwLRGLxbPatLjAEwAvJ5MLqAoPG4hrdFbSgDktLYQ2vcFXHbWbHxwZAS9Q8XxpccSsq37h1Hvc+lZSQxWJ1GIBQDYN9wrNq9sO4aB0ShuvGRx1Z4BYIQEgJgysAU8kRYE1iwAdwYLQFFVqGqyNTJS/nAAON4/nkMaqOpoAejXKPaVwIAWAzCNR1FtK4GZW8Tvc2HZ3BYc7Q0hFEno5wHXGYPAlvEMB7WDYKw9/42Lvp1LBgDOX9EBjgM2vdeTcY65YlfkZcTvczlaAPkKAOu42ltiC2A8msAzm7uxfME0LF8wraTPKhYkAMSUIZMFwHGA180EIH0RZ+/1Ja8Zj6QWn4M9Y7rV4OTqySkNVE53AbGluK3Z7JeXVdW2G2gwEococPC6BSyb1wIVwIdHRlIWQHL8mgWQHgOw5tUDMDUqs7MAAKClwYMVC1qxedfJrEKXC9kEoN5OAJKFdk3+/LKAPO5kllWJBeC5LYcRjkq48eJFJX1OMSEBIKYMTv38VWsMwMaNw/zszIViDBgeOjGmZ9kA9haErChQnDNM9Wv05yFVBwBogVnjQpxIyPrO3xgDCIUT8Ptc4DgOC2c2QhQ4HDwxqp8HnLIAkLZQW4+CZHAcpy/8LgcLAAAuOKMDw8EY9nQPZZ5oDsQTsm0RGMPvc5msMAAYC8chJA+MyZf2Zl9Ji8EGR6N48e1jOG/5jLSDfaoZEgBiyuB0opeiqFodQAYXEBMP5h9nu886j2iyAIB0CwMAEpKadCOl744zBYGZN4bnObQadudRh1hDKJKA36e5QERBC5ZG47LuAvIas4AMY5FkrdeQnQUApBZ+JwsAAFYtbkO9VyyKG8juPGAjThZAY727IN96e0tpawGefP0gAOC6ixaU7BmlgASAmDI4pYEqQNYsoDQXUNICWDKnGSf6x3X/s937VYecfSvmdtDaB+NaZnQDReOS9VIAWhYQq5QFkge6SwoiMQkCz+lNxzhLEHg4GIMK2FoA7D7Gj07XnHfaDGzbOzDhlErNBeT8LL9NEHh0PJ53BhCjvdmHkVC8JC2uj/QGsXnXSXzsrNlV2/LBCRIAYsrgXAmsWQAed+4WAIsBLJnTDBVAz2BYX6yt97fr8mkZgON1HFIKYFw8jBaAnQuI4RJ4JGRNALxuQXcpWYPAdgfBGMnFBQRobiBJVvDmnt6M12XD7kB4I/U+FyIx2TSH0fFY3gFgRnuyHXQpMoH+89UDqPOK6Dq/els+OEECQOSNoqi4/z93Yv/x0UoPBS+8dRQvvXMMgHMvIC0InJsFwPr5jyd3nwtnNurXsApV6/uNbSMyBUhlmyCB0QIIGCwAp3TTUCQBv4MFYEzv5C11APpRkI32FgDL/hGztC2eO92P2QE/Nu2cmBsollDgzhIDALT+RgytD1ChAlCaWoDd3UPYdWgIXZ3z9SZ2kwkSACJvhoMxbN8/gH9/clelh4JHXtqHP764F0CmbqAsCJysA7BZXFMWgLYojSQXzOnT6vTFsSG5+FhjDJLNYe92mLOAzEFgIJWuCJhdQOyeiqJiPJJIO8xFEwDZJAAcz5ncUboF0DAxC4DjOFxwRge6TwZxbALHRsYSMrxZLABAO4oT0H4GwfFEwS4g9rMtZiBYa/mwH62N3qpv+eAECQCRN+x4u2yFT4XQMziOgdHC/kll1stHslYCp84EBrLEAJKL6MmhMESBR2OdC63Jbp2NyT711vcbBccuE8guCKy3gzZcZ3QBmSyA5LWhaAIqUv3yAW3nnpBkGwvA4gIKxlDvFR0zb3KJATDOWz4dAs8VbAWwNhfZXEAAEEkKQCiSgKKqBVsAfp8L9V6xqBbAm3t6caQ3hOsvWlj1LR+cIAEg8oZlKzo1P5sI3/71m7jj37fkdK0prVJVHbOAtBiA1ndG4DnbOgAmHmwRHQnF9aIpFjhlwde0GICpz38mF5AhS8hwJjBjVqAeCzo0l5NRZNgdWRWw32IBSLKqtYI2LO7WbqDDYzG0OOz+gdyygBiNdW6sXNyGv+8+mdY1NRcyNYJjMAEIJ4PNYwXWABgJFDEVNCEpePy1ydPywYmKCsA999yDO++8s5JDIAqA0y2Ayo7D2C44lpAz1AGkFlqv274jqDULCEj5y9nC72gBZIkBGDWBjZG9w+gC8rgE3PHJ1QCg5/VrN9A+6FXAdZYgMIsBGPLjed4SAxiLOvr/gfwsAAC44PQOjIUTeO/AYE7XG8l0HjDDb3EBFVoFbKS9xVe0YrCXtx3D4FgUN146OVo+OFExAdiyZQueeOKJSj2emADsD74YFaHZGBqLYsvukwCA//vyfvzLI+/q3zOmZo6FE1ljAIC267QLsLKF2Wtwo7AdM3O5sMWx0BgAYIgD2KSBAtrCDZizgNg92SHvaTEAORkDcFtdQKn7DgVjjhlAgCEInCUGwDh90TQ01rsLqglIHQbj/CzdAogxAdBiMoXGAABNAAZGowVZLUbGowk8y1o+zJ8cLR+cqIgAjIyM4L777sMtt9xSicdXLaFIoiR5yrk8N1OffCdkRcVIKJb1/cPBGBKSrO9gw9GEY5aLlQee3YNfP7MHQ2NRfHhkBLu7Uy2JjYeoB8fjGQ+EYQut05kA7L1uUdCvtVoAbOG33t+4oHT3BG2KwVJfM7eVapMGCqQsAmMQmMHOM7a6gOIJmxgAnyoEiyW0n71TDQC7j/FjNgSex/nLZ2DngUHT78GKoqgYTgbUGbEM5wEz/LoLSPs5jI1rfzsTsQACzT4oqqoHxAtlMrZ8cKIiAvDd734Xt99+OxobG7NfXEPc9tPX8aM/bKvIc7//27fyfl8sIeNrP3sDt/30dfzw4Xccrxsdj+Ff/u8O3PbT1wEA//snr+P//PvmvJ51YnDc5r5GCyBuCAKbd3iyoupWi5MAMAtAEFLpomzHfMqsJgDA3Bl+AJnTQO//y05s3zfgOA92MDyThHQLQHshZqoD0D4aG8ExXCKPSEyCrKh6BpN231QQeDhLCii7j/FjLnz0jA7IiqpbaHZs29uPb/5ys0kEcnEB+TwiOC51TvHoeAxukdd7HRVCexEygVjLh84Vk6vlgxNlPxT+scceQ0dHBzo7O/H444/n/f7WVn8JRlVcRqMyAoHC/jgO9wb19xZ6j0LoGQzn/DxvOH3Hd7Qv5Pj+0aisH1zCrglFElmfFwg0YFqzDzgyApfHZXodAGSkFh6VFyAK2uIgyYrp3pKiorHBg0CgAf56N1Rwac8eZG2WW+rgTbZXWDC7GYFAAwKBBiyYOw2z2/3444v7ILhE0/tPjJh3lPtPBrH2owv1r73e1K61ubkOrU0+NA5obQlaWurSxsJxZhdQQ3LsMrQmcLNmNqe+5/fqfvJAq1+/V4PfAxVAW5sfJ4a18S2cM83xZ16fdHM1N/py/jsIBBqwZG4z/r6nF//rqtPSuowCgLJ3AJKsYiicwJKFbQCAo0PaAjw9+bN1wu9zQeV4BAINiEkqWhq9aG8vfNPIJ11k4YRa8P/WH/57HzgO+Ny1pyOQLC4rBeX63y+7ADz33HPo7+/H+vXrMTo6inA4jB/+8Ie46667cnr/4GCoLL7niTA8Mo5+b+E7lf5+TQT6+4NFHFVuz80F60lN2d4/PDJue0225/X3B6EmfeZ9hpxz9r6TA6nXTvSOIZrcLcbiCvr6xvQFKRJNALKK/v4gOKgIheNpzx5MWhjBYBRichcuqKp+nZcHBgZCcIs8RkYjpvcPWKyT7R/2mb4fiaQEs68/CCUuYWREE4BRy70AzX8fMxZAjUXR3x9E3+A46r0u0/VSInWdHJf077Fn9vaN4eBRzW3GK7LjzzySzLaJx6S8/u7OPXU6Hv7bh3jrvRN6BpOR0WRK73t7+7Bouj/5M9B+b5HxmOOzAoEGeN0CBkfC6O8PondwHH6fOKH/CVVV4RZ5HDw6jP7+trzff6Q3iFfePorLz50LTnL+WU6UYv/v8zznuHEuuwA89NBD+uePP/44tm7dmvPiT9QezE0QsYkZRGISGuvdiCdkzQXEMmxUFbKiQhSS7pSEDLdbc214XIJ+MpYRtqkQOKMLKN1l4nHxGWMAgGZNjYZitimLbIyZYsUcx9m6qaxVwIDZZWOtA9DmlToKsiVDDID5pPJxAQHAuae245GX9mHTez22AsDme6Q3JdbxRPYYAADUeVMN4cbCcd2FUygcxyHQ4iu4FkBv+dA5+Vo+OEF1AERVw5qbRWPpQdFITMt9b6xzIxhOmDJwWMBWkhVIsqovNh6nLKDke3leOzjG4xL01sqm8djEEOyySj5MurwAczM3lgXE2kHbpRDyvDmzSDVkARkzgABz5a4xBmAs1hsai6GhzpVTsRITzVyp87pw5pIA3tzdi4TNcZnsd3KkL7WjzaUOANA6sTIBGA3FJ1QDwGhv9hXUD2j3ocnd8sGJigrA9ddfj40bN1ZyCEQB2LU8LhVsfRywydyIxmV4PSIa6l0YM2QBAdAXI7bb9BoFIEMWkMBrLSOmNaafnAUwAbCmgaa+ZgfPOB2gnosFYBUFvRAsErexAFKLqKkVBCvWU1XtIJgMRWBG7OacjQtO70A4JuFdm+A3K5IbGovprkMW38gW0K3ziggnD4IPRRITygBiBJICkE8Vu6KqeOzVyd3ywQmyAIi8sab1lRKWy25sO8AWa7MFEDe1WoglM4HYYs0aj2XLAuJ5Dh89vQMfO2u27Xg8LsHGBZR6LgcOS+Y04/0jI7bvT6WBJq+3WW+NB8NoF2sftLMAnF1AXgcX0PBYLGMGkJFCSppOndeCaY0e29YQxt/J4V7NCsjVBVTvFTEeTej1D8UQgOktPsQlRT9dLBf0lg9rJm/LBydIAIi8eW37ibTXMv0zTyRob7dTY5W3rAFaQ53bVAgGpM4FZvn0ugXgFrTjGy1jMloAHz29A5ecaS8Abhef0QWkqiqWzW1B71A4JZTGSmDmArJpBsewvsbOG4jE5HQXkEEA6uwEIE8LoJDfFM9zOH9FB3YfGkrLsTcKwJGkAMQSMkSB191UTtR5XIhEpaIUgTECelfQ3A6HSUiy1vJhuh/nnjZ5Wz44QQJA5I3dIpEpeDiRw0NsBSC5u4/GJXjdIhrrXQiFE5BkNVWtK7GiLe2jMQYAaItQz+A4jvVpwUnFYAFkwuMS0uoMJMPXKoBl85oBAB8eSXcDWeMFdhaAdQwqjG0gzIug0WdvdKmwe4SjCURiclYLgP2c06yPHLng9BlQAWzeZa4JUBTtdzKt0aMHgrMdBsPweUXEJQWDo5oAFMMCyLcW4KV3jmstHy6Z3C0fnCABIIqC0y7/rv/4O/704r6C72vnqmUCEIlJqEtaAIqqIhiO64ugUSSAVDaR3hI6IePbv34T3/3NVm38GXbkRtxiehA5YVnU57Y3wOcR8UFSAFRTJXAqUwmwd7mwNZgFwFU11fco3QIQ9GuNbRzYPQZHkxlA2QQgRwF0or2lDkvmNGPTez2mGJGsqOB5DnPbG0wWQKYiMEa9l3Vm1dJsiyEArU3a2cu5ZAKNRxP4ry3dWDEFWj44QQJA5I/NqhyOSYglZISjCQyMRvRd/8mhMHpz+Gc7cHwUvTZmuZ2wJCQZqqoiEpPh9Qh6k7bxqKQLwGDSFRGzWABOLaHZgpxt/fO4hbRmc8YYAKAtokvnNOMDmziAbJmPneCwRdhoVQXD6W0ggFQWkNeSscQa9g2MZj4HgHHGolYAwNz2wgstLzi9A33DEew7ljooSFZUCByHudP9ODkYRiwuI5blPGAGc2n1DGp/F8VwAQk8j9ZGb06ZQP+VbPlwwxRo+eBE2esAiMmPk5/43j9tw6GeVLrfb+68NOd73u3QSsLJBRSJSVBUFXVeEY2GzBjWEO3Xz+xB5/IZhsZjVheQedf+wtajAOzTMo14ssQAGMvmNmP7/gHNJ24TA0hZHOnPYGNwuwSMR7V5Bm06gQIpkfBZBIDdQxeALBbAectnYOXitrT75MPZywL444t7sem9HiyZ0wxAE3BB4DBvegNUAEf7Q4glMp8FwKjzpgTA5xGzpo3mSnuLL+umJBqX8NI7x3De8qnR8sEJsgBqjFJm8BgX/2Jhd+bA4FhM9yfPDvj1k7oAYFZA28EKlp46ugvI4Vzg7pPa2LO5ed2igHhCgaKqiMYlDI3Zd5dcOrcFAHQ3ECM9C8jGAuAsFoCaigE4BYF9FpeKUQA4AM055NBPZPEHtOM0P7KsHW990Kf/3GVF0VxAyUX0SG8wGQPIRQC0uZ4cGi/K7p8RyKEt9O5DQ0hICi48o6Noz61GSAAmMZKs4MePvIuDJ8Zyfg/LqGCUM6e/EOxCCyOhqJ5SOG96g+4CArQd+jmntqMtGexztgDsu5FmiwF4DDGGZ97oxv/327dsD5iZM92Peq+Y5gbSXUAZ0kCZC0iPASAVA6h3EgCrBZD8zx4cjaDJ7865zfNEueCMDsTiMt7+sA+ANl+R5zCt0YN6r5gSgBxiAMwFFInJRfH/M9qbfQjHJMeWJgCwfd8A6r0iFs9uKtpzqxESgEnMycEw9nQP46Hn3i/4HoUc62gVjXxK4/N9np1ASbKKwyeDaGnwoLHeDb/PpQdTeZ5LHpKiLfDWzpNMAOzOBQay58GzRTmWkBEMaznqdtk+PKfVA3xweNi2EjhjEFiPASQXyaQLqM4jpi3kLAbg6AIai2Y8B6DYnDK7Ce0tPr0mgAWBOU6zAg73hhBPyDm5c+oMB9wUVQCyHBCvKCp2HBjE6YtayyaclWJqz26KIyRTAKUJ5Nkb19dcrQHjZU31btNh5kZsWy7I+Y3VTjAkWcHh3iDmz9DcCjzP6b5xnufgcgl6+mcsLoNDauFmQhB1EoCsMYCkgCRk3fXDgpRWls1twcBoVM/EAQBJsaaB2rmAtI8mC8CmDxAAiLoFYHEBJW8yNBbLeA5AseE4rY7iw6Mj6BuJaDGApDkyb3oDjveHtGB9DgJQXyoB0FNB7X9v+4+PIhRJYNXi/BvGTTZIACYxzM8tT+CEI2OWjayoGBiJ5JW3r8J51/ylf30t7TXrOb5AZqtAUdIPAg9HJZwcDGOeITjH3EACz8Et8ohLMv74wl4c6hmD2y3oCy27zniWgHFMWWMAhiCyNf3TyrJ5WhzgwImx1O8qpzRQcwxASwONp/n/AYMF4LZkARkmUk4LAAA+umIGOABv7OyBLKv63OdO90OSVYyNx3OKAbhEQd+BFzUG0JzZAti+fwACz2HFgtaiPbNaIQGoEgrxxRsbfhX+3NTnsqzijl9uwbd+9ffM7zF8PjYez6t/jGwRHO25zgupqqYfU7jv2ChUAPNmpASgwWgBiDziCQUvbTuGXYeGTIuNzyPA7eIxEkrFQozWQF4WgJRZOGYF6uH3uSDJij6H9CMhbSqBWQwg+SwVmgvImgIKZMoCSn1eTgsA0ATntAXTsHlXDyRZMQhA6vfFurNmg7mBimkBuF0CWho8joHg7fsGsGxus8kFNVUhAagSCirB55gFUJxWC8w9YRccGwnF8MundmluHcvj8imQNI6VCUBCSh//D3+7FcPBGKIJOa1DJTtcxigAbIfILAAjxqpTjuPQ7PeYBMDoqspaB2AoJJNkRXdT2Pm0eU6rBwBSFbtM7JxOBGPvA1IuIJYFZOcC8rgFuEQ+rdUzX0ELANBqAgbHYth7bFTfqMyYVge3K9WWOxdYILjJXzwBADQrwK4auGdwHCeHwlh1SqCoz6tWSACqhQkk41iLi/J6rFEAMgjJX147gK3v92Hr+71p38unRN5kAcipls1WtrzXg217+xFPyLqf23iPJr/blNrIDm7nOS6tYZfHZd7Jtfg9GDGkw0bysADMLiAVswJ+tDV54XFohcHcQAKzAPRuoM6VxyyDx2XJAmrwpS+CHpeA73/uHFxgSVfkDEqWrQq4FJy5pA11HhGRmKTHqniew5xkoVmuAlCvWwDFnUN7s/25ADv2DwIAVi6e+u4fgASg6Dzw7B5s29uf9/sm4saRlfxjAJKs4Lm/Hza5PyTJ+T7MhSEpqqm1AZAKRueC0d0j6RaA+bkXrdQWs3hCq/gV+dSfKSv6mmcpzmGvCzyn7zIZHou7obnBgxFDN0jTAexZLQBzENglcPjM5Uux/sKFttcvm9sMIGUB6GcCZ/h1C5YYQDQuIy4pthYAAExvqUtzkxlF2c51VGpcoqA3TxMMY2FuoFwFwJcUgGLGAAAtE2h0PJ6WqLB9Xz/mtPvR1jSxw2cmCyQARSQSk7Bl18m88vInAltECrEAduwfwH++egB/finVp8eYodKfzOAYHI3i5FBYX1AkWUmzVs5copnLs9rqsz53yLDzZu4gqwXAdtlxSYGimgVmfvLUqfkzzALAisFYDMCINeOkxe/BsEMMIJs1w8QllowBiAKPFQtbsXKR/Y5xZls9GupcaQF79iO0czkxlwlb1EMObSAyYdDMijUxY1aJYBAnJty51AEAKRdQg4P4FQpLBTW2hAiG49h3fLQmsn8YUz/KUUa6TwahQhOCfJlIQVYhMQDmJtnTPaS/ZnQBffvXf8dNl52CP7ywN+1Z1iIqtlAdHzCfjWukPXkU38Y/bjM8L9mx02IB8Jzmx39q0yFwnNZvnrFkTjN2HhjEKbObTe9h2T08z8FtcQFZ/fPNfrfJ6ojGjC4gxykAMFsACVnR3VNOriOO43D6wlacGBjHIBfTxZpZW3bN1ziLBRB0aASXCeOiXykBmD+jAXOn+03Fa4tmNuZcmQwAbU0+BJq9Rc/HZwLQOxzB7KRbaueBQagqsOqU2hEAsgCKyKEebefvlGOeiULWf+aKKcQCSJ2YZXDJGBZFSVZtF3RJVtIW7GyctTSAG9akN9RiP6ewJe2U41KLdmujF5+/+jT9ews6GnHPLZ1YvsDcnVFPA+XSg8DWk6eaLQFTowuIy+IDMsYANBeQ9qxMXTQ/vXYJbv/ESggCp/+uBseicIu87a6ebQbYoqc3gstjF2xc9CvVxZjjOHzjptX47BXL9NdmBfy450udumssG+s+Oh/f3nB20cfGagGMFsD2/QNo9rtNyQVTHRKAIsIEoDALoIAHTiBwzBZxYx2AtUjJbh52AdtsTGuw38Gx+48nz31lu2uO43QXQWO9OdjrFnnbwrOGemMhmDULyGoBmAXAaNHkYwFIsqrPK1P2kNettawWeE631gZGo2ht8tpaDuxvgcUNxpIWQH4uIM7283Lj97nS0inbmnw5pw57XELR/f+A1meo3ivqB8MkJBm7Dg5h1eK2Kdn33wkSgCKSEoACLIACVvOJdPFhx/LZtSlgRKLpAlCItcFx9oeNh5kAJNNOm5OpfhxSC631gBKng2daG704Y1ErFs1qyu4CslgARqHL9s/P8xxEQesImpBSLqBcDlIReE4P2A+MRBwrqJ0sgIa63BfCanABVTvtLXV6KugHR0YQS8g15f4BSACKxkgohqGxGOq9ohYgzHOnXJgLqHDs3DjWPvdhGwtAltW84xUcZ79AWi0AtjM3WgDWxctJAESBx1dvXIkFHY3pQWC3NQhsXkhN58PmsFZ6XFqhmVbgxeljzoYg8LqADoxG0dZkn5/PfjNMAMajEjjOfORjNjjDj4DWf3tYXArQir/cLt4Ub6oFSACKBNv9M9+03eKZiVI25YzGJfzq6d2mVtDW1EvAHA8AiucC4sCZMkGs9x+PJsBznJ7Jw3EpC8Dqvsh09CQjvRDMLAAuUTD1mTFmJuXiLXEnD5bPNQbAYC6gcDSBcExyTDVkAmvMfqr3uvJy5ZgsgAq6gKqZQLMPg8l23tv3D2DFgtYpd+h7NkgAisShniB4jsNpyaPjxjO0mrWnEBMgt/ds3zeAN/f04us/f0P3d8dt2iFbX7MTMa0OID9ysQDqfaKpmIq5bay791z+Qa0iYZdyaKycNR5knstOPiUAxhhA7i6g/hHteY4WgJK6nt013zRIcgFlZ3qLD6oKbNvbj+FgrKbSPxkkAEXiUM8YZgXq9YVl3MZ/nolCinlzfYuxj0o4OS5bC0DK0QLI8OBrL1yQ/iJnXyxmjAHUe11w2QSBvZYmZ64c0gHTK4HTBcAYCDZaRrmslR4Xr4+dxQD4HP6TBIGHJKsYGNXcDjnFAJLjybeYyzgPWv/tYT//F946Cg7AGTVS/WuEBKAIqKqK7p4xLOhoRH3yFKP8LYB0Rsfj2HVwMMODc7uP0QWw84B2P9sYgMUCsAtmZ6s56Fw+I+01DpzeEtju/uPRBOp9ou66MQWBhQJcQFmygACzAIwZOoPmagGw368rDwtA5LU0UN0CaHawAJgLiOf0++YrAKYsIFIAW1gtwMETY1g0u8l0sFCtQAJQBPpGIhiPSlg4sxH1Pm3Hmk9LZcC+FcQ9f9yGf310x4RP7TK+/bXtJwCk6gCM5JLfLytKRt0xLjbnnNoOQNuB2s1BdwFFJM0C0IuqUm4bq+vILpvISloMwMYFZMwEMo4sl6XS4xL0329+QWAOsqxgYDQCn0fUNwtWUmmgqXnkkwEEUAwgF5rq3fpmYXWNZf8wSACKwKFk64f5MxoMFkCetQA2q+rJIS1H2S71MhiO6xWiVvYeHcGfXtyLE8lCLjtxycUCsGPr+30ZewYZFxu26+c42PbONwaB672iLgCKquoxAKsA5LLQWmsO7CwAp/bCudzf4xJ0V5q1UV0m3KKAuKRgYDSKgIP/HzAEgXlOd9/kHQMw/NzIALCH4zi9IKwW/f8AtYIoCgd7xuAWecwK1OuVpPlaAHY7ZJ7joKiqdq6qZQ37yv2bHO/1/JtHsH3/AP77nWO495ZOWwFI2Jxja80CcuLDZDtmO3gOuPdLnRAFHn959QAAzQVkt+CagsBel+5OkSQV3qQlZec6yoZ1EbcTAKuVkA9uF68LQC4xCUadV8TYeBwjIRkzptU5Xme2ADgAav4uIM74OSmAE3PatarfjtbsfaymImQBFIFDPWOYN6MBAs+D5zl43ULeFoCdW4WtfUqeEWLjgj8ajsOuWaidBRCzcQvlC8dzaGvyodnv0RdijtOqP79+0yrTtZGYBFlREIlJqPe59J17QlZSdQBF+Au1cwHlEktwvJ9L0H9f+fSoqfOICEclDI5GHQPAgDUGoL2WvwAYLQASACc2XL4Ed3zqzEoPo2KQAEwQSVZwpDeEBckulYB2OlP+FkD6a/qBL3kKgOnfXbV3AdnGAHJwAWXDtNtMfsoWoHbLoheJSfpO2ugCkmRF37UXoz7CzgKYiAAYK43zEQCfV8TgWBRxSXFMAQVSGWHGLKCJuIAIZ7xusSLtsqsFEoAJcrx/HAlJsQiAkHcaqJ0LiB3qke+Zv8Ydn6oCqo2AxCUlbRHM1QWUCTt3A3vF6nYJx2T951TvSwWBE1JKAAopPLNiLwCFF/wYzxdwibkvtHUeURfztgwWgLEQjLkU/TaHwWSCdv1ELpAATBBWAbxgpkEA3BW2AAz/+ypU+yBwQk7b+RTFAjC2ILB8Yu3JE41JCCUD2fVegwvIIE4TOe2MYT0QBqiMBWBsipYpCMxcfgLPpeoAyAIgSgAJwAQ51DMGv89l+of2ecS86wDs6mvZ/7CUwyJoXOSNu7/BsahtkVlcUkz95X0eIe82z3bYtSG29rdn16kAhoJaTrzVBaQfoTiB844ZdoHkicYAGPnGABiZTpxiv0pB4PW/gXzOAgBya2lBECQAE+RQzxjmdzSYFt1CXEB2UWCWAmkNAtsFhSMxCXuPjuCFrUdMFsB/PL0n7XpZUZCQFJNfWRT4IlkAzi4gUeD1OfmTWT6sH7vVBaQfop6MYJ+/YgbOX5FeZFYo+WTvWDEWmuUjJHXJFOGGOlfGE7HYZkDkOQAchGRiQT6QC4jIBRKACRCNSzg+MI6FBv8/AHiTQeB8zvm123s7xQCsJ3IBQCiSwMY/bsMjL+9PcydZxzEaimsuIENxkSjw6D4ZBJBqy2zkR184L5dpWGIAqSwgBlsw2SlRA6MpC8DoAmK7duYC+vzVp5kOhpko+eTvW5moBZDtvNmUBaBFAPx1rrwXdEr9JHKhIgLws5/9DF1dXejq6sK9995biSEUhcMng1BVmALAgBYDUFXNx50zDnUAQLof3FYADEVhxuZmQLrFMByKaRaAz2gBaG0Kmv1uLJnTnHb/XA9+t1t3jIsXWzB1AUhaAHVeMVUHICv6dRN1Af3ktgtsX3flcZC9FbMA5BEETsYAAg4tIBipNFAeHJe/+wcoTvosMfUp+5/J5s2bsWnTJjzxxBN48sknsXv3brz44ovlHkZRONSj7ZjTBMCjLRChPNxA+QSB7QQgGEnorgnW45xhtQCGx2KISwp8Bp80u2TVKQHbvvO5FmRxdjEAw/fZgulPukP6R6PweUQIPA8xmVGTkBRdcCaaBeTU32UiZ8wag9l5FYLlagEkf988p/08C0lTJAuAyIWyC0AgEMCdd94Jt9sNl8uFRYsW4cSJE+UeRlE41DOG1kZv2pF1bGF1CgSrqoqewXFT6qd1/R8ai+qZRJKs4Hh/SP9ezObM4fFIQq+2tbZx/uDwiOnrXYe0hnBGXzZzxaxa3JbWgRMoLKjI3mIUBSYkbFEbHI3qvfldhkIw3QIoQhaQHXbnE+SKyQLIw5XU5HfD5xGxcGZjxuvYlJkL0F9AkzLKAiJyoeytIE455RT98+7ubvz1r3/Fn//855zf39rqL8WwCuJwXwjLFkxDIGA+RDowTRujy+NK+x4A/O3vh/Gzx7bjK/+wGh87Zy4AIGrY6AYCDVj39af0r194+xje+aAP9311DV5795itsBwfiuhdJq1s2X3S9PX/7OgBAMyari1ES+e2QBA47Dk0hAvPmoO+sVjaPaZPb8Q5p83A1j3me51/RodpjsbP3Z5k0LPBq7/Ogp9tyVYIsqKiqcGDQKAB3npP8p4zcUrSDXXDx5bY/gyzcfqiNrx3YMDxvY3NmohefOZsLF/YipfeOoIPDg/n9KxgPPXLmjG90dSora3Jm/Eef/7BVfrO3omLVs/Cc5u7MaujCS2NXiyY1ZT3z8AonIX8/KqByTruYlCuuVesF9C+ffvwxS9+EXfccQfmz5+f8/sGB0N5t0YoBWPhOPqGwrh45Uz09wdN34tEtAV0YGg87XsDIxH8+qn3AABPvrofZ8xvBsdxGBoa16+xvue9AwMAgINHhvDXLd22zdie39Kdcbz/fPVpWHVKGz44MoxmvwcCz2F2ux8/++qFcIk8EpKCWELB6EgYizs0AWtr8uqWQXA0gss/MgvrPzoPzX4PxqMJeN0ivG7BNF7j54MjWjM7yLL+Olv2BIPN4xF5/fv3f+VC1HlENPk9+M2dl9r+PHLhf1+3AglJzvje+79yIXweAQLP44z5LUhISk7PCodSQjs6HEZ0XPt9//z2iyDwXEHjNXLdBfPxv644FcHRCL75yVVwiULe9zRalxMdTyUIBBom5biLQbHnzvOc48a5IqGid955B5/97Gfx9a9/Hdddd10lhjBhulkBWEe6UrNUR7uFeseBQcTiMj521mwc7g3qcYRMLZ9Z47ZQNIFYXC7ILTKjtQ4+j4iWBg8WdDRi7vQG8ByHOq8LLlFAndelH2Yzf0YjfnPnpaYDskWBA8dx6Gith88joq3JB7+hf48dzFIx7pCZb9/rFnU/tbEtst+X39GHTrhEXk+7dMLvc+kuKe363PZDbpMLyJj+K6YVuxWCwPO6W7HO0CY7HygNlMiFsgtAT08Pbr31Vvz4xz9GV1dXuR9fNA6eGAPHAfNm2AgAy2ax6cLGcu2v6pwHj0vAK+8eA2COAVjFgH01OGrv4jGyaJa9f7mQvHfjwlzIghLSBSB1H7bgCgKnB8vrJ1kvFo8r1ea6kG6lBFEtlP2v98EHH0QsFsPGjRuxfv16rF+/Pq8YQLVwqCeImW31tgFTUbcA0nfqLIOnsd6NzuXTsfX9PoQiCVMWkKoC0xo9ae/NRQCWzmmxfd3lyv9XbZcNlA92AsCygASO04Pl9TnuvKsFtsufSDEZQVQDZf/P+853voPvfOc75X5sUVFVFYd6xhwPkciUwsiasPEch4tXz8Kr20/gjfd6cOq81MLdNxKBz+MCYA7GDjgIgChwkJL58k6LaSGLVa4uESdYrKbB0MiMucd43igAk8sCYBXNE0klJYhqgP6CC2BgNIpQJGFqAGeEuQVsBSAh62mEc6c3YPGsJrz67nGTBXDXf/zdNtDdNxJJew0w94p3ajFQiB95ogLw9ZtW47oLF5jGxBZNgZ+8FgCgWQETqSYmiGqA/oILgHUAtbaAYKQsALsunIop//6SM2ehdziCPd1DpuuGg9G0itHhYMz03saka8XYKtipZ0xBAjBBF9Ccdj/WfXSB6TWjBcDuP9liAIBWQzGRamKCqAZIAArgUM8YREE7AtIOFgOwOwc3lpBN7YTPXtoOv8+FV949brouGpcxq60eM1vNRwcumtmkfz6tURMI4+EiHpeDC6gAAWhvcT62sFCYBcBxhiDwJLQAPC6BXEDEpIf+ggvg0IkxzJvud1wA+AwHucQTclo3yQvP6LD176sq0OQ3B4NPmZ0SgNbkwn/W0oAeaHWyAArJVmFpocVEMHT59E1mC0AkFxAx+Zl8W68KIysKunuDuOiMmY7XcBwHUeBMFoCiqjh8MoihYCzNT79m9Sw8/+aRtHYQdV4RHksswNh3qDVpAYyNxxFo9iEYTmRsMwwATfX5Leq3XrfCNtOpUPQun7I6aYPAgHbIjJ2LjyAmEyQAedIzEEY8oTgGgBmiwEOSVBw4Poqt7/fh7Q/7MBzUsnqWzzenarY3+7BiYSveO6j16PnEJYtx8UfmovvYELZ9OGC6dkayhcKyuc1YubgNL7x1FLMC9ZBkBQdPjCHQ5DVV8FrJd1d/1tJ2/fN8xcOOlAWgItDsQ51ncp7J2tbkK8r5CQRRSUgA8uSgXgGcWQDcIo8X3z6KF98+ClHgsGJBK/w+F472hWwbkV121mxdAOq8IuZMb8DJvjE0GBrNnXNqO9qavfj57RfBJfIQBR73f+VC+H0urFjYikvPmo16rwt3//O5+OKPXyvirDWK4RISDS2fL141C2cvDUzodK5K8U9XLrM7w4cgJhUkAHlyqGcMdR4R7S2ZW/r+4xXLcLg3iECzD6tPaUOd14UnXz+Io30h29jAGYtaMWNaHU4OhU3ZN8y3v3x+C25ZvwIA4POkFky2e+Y5TnelTOTA81Kjt8mQVS0TaBK6f4D0840JYjJCApAnh06MYUFHQ9Z+66uXBLB6ScD02px2rSHTicGw7XtaGjw4ORSGz5AVw/rZl6otcrlZPKsJL71zDDPbip9hRBBEfpAA5EE8IeNY/ziu6pxb0PvnTNf6BrFYgJUzlwTw/uFhzGxNpZeyFMmpEnA897TpWNDRUJIUU4Ig8oMEIA+O9IagqCoWzMjs/3fCmK9vx6VnzsJFKztMLhy978wk9JM7QYs/QVQHJAB5oAeAs2QAOcFzHD575TIEHISA4zjT4t9U70GT342uznm4eNWsgp5JEAThBAlAHnT3jKGlwYNmf+HZMBetdK4fsMKybj6+ZlHBzyMIgnCCBCAPDvaMZU3/JIhq4fs3n1NQG3CidiAByJFQJIG+4QguPKOj0kPJi7nT/ViSPF+XqC1mt1fP+dlEdVITAhBLyNh1cBATyaQ8MaCd2evUAbRa+X//6ZxKD4EgiCqlJgRgy+6T+P3zH074Pm6Rx/wcBKAYLROKQTlbLKxa3IZ6X038ORHElIFTM51GXoUMDoZsD0vJhKqqODkUzvt9Vvw+V1p3zlIRCDSgvz9Y8PsTkgyAm7TpoxOd/2SH5l+78y/23HmeQ2urvTuwJrZsHMeho9W+d/9UpZrbQRAEUR1Mzu0hQRAEMWFIAAiCIGoUEgCCIIgahQSAIAiiRiEBIAiCqFFIAAiCIGqUSZcGyvOZD2KZStTSXO2g+dP8a5Vizj3TvSZdIRhBEARRHMgFRBAEUaOQABAEQdQoJAAEQRA1CgkAQRBEjUICQBAEUaOQABAEQdQoJAAEQRA1CgkAQRBEjUICQBAEUaOQAFQJoVAIV199NY4dOwYA2Lx5M9atW4e1a9fivvvuq/DoSsvPfvYzdHV1oaurC/feey+A2pr/T3/6U1x11VXo6urCQw89BKC25s+45557cOeddwKorflv2LABXV1dWL9+PdavX48dO3aUb/4qUXG2b9+uXn311ery5cvVo0ePqpFIRF2zZo165MgRNZFIqDfffLP66quvVnqYJeGNN95Q/+Ef/kGNxWJqPB5XP/OZz6jPPPNMzcz/zTffVG+66SY1kUiokUhEveSSS9T333+/ZubP2Lx5s3ruueeq3/zmN2vq719RFPWCCy5QE4mE/lo5508WQBXw6KOP4nvf+x7a29sBADt37sS8efMwZ84ciKKIdevW4fnnn6/wKEtDIBDAnXfeCbfbDZfLhUWLFqG7u7tm5n/OOefg97//PURRxODgIGRZxtjYWM3MHwBGRkZw33334ZZbbgFQW3//Bw8eBADcfPPNuOaaa/CHP/yhrPMnAagC7r77bpx99tn61319fQgEAvrX7e3t6O3trcTQSs4pp5yCVatWAQC6u7vx17/+FRzH1cz8AcDlcuH+++9HV1cXOjs7a+r3DwDf/e53cfvtt6OxsRFAbf39j42NobOzEz//+c/x29/+Fo888ghOnDhRtvmTAFQhiqKA41ItXFVVNX09Fdm3bx9uvvlm3HHHHZgzZ07Nzf+2227Dli1b0NPTg+7u7pqZ/2OPPYaOjg50dnbqr9XS3//q1atx7733oqGhAdOmTcMNN9yA+++/v2zzn3TnAdQCM2bMQH9/v/51f3+/7h6airzzzju47bbbcNddd6Grqwtbt26tmfkfOHAA8Xgcp556Knw+H9auXYvnn38egiDo10zl+T/33HPo7+/H+vXrMTo6inA4jOPHj9fM/N9++20kEgldAFVVxaxZs8r2908WQBWycuVKHDp0CIcPH4Ysy3j22Wdx0UUXVXpYJaGnpwe33norfvzjH6OrqwtAbc3/2LFj+M53voN4PI54PI6XXnoJN910U83M/6GHHsKzzz6Lp556CrfddhsuvfRSPPDAAzUz/2AwiHvvvRexWAyhUAhPPPEEvva1r5Vt/mQBVCEejwcbN27El7/8ZcRiMaxZswZXXHFFpYdVEh588EHEYjFs3LhRf+2mm26qmfmvWbMGO3fuxLXXXgtBELB27Vp0dXVh2rRpNTF/O2rp7/+SSy7Bjh07cO2110JRFHzqU5/C6tWryzZ/OhGMIAiiRiEXEEEQRI1CAkAQBFGjkAAQBEHUKCQABEEQNQoJAEEQRI1CAkAQBFGjkAAQNcnNN9+MoaGhCV/z5ptv4uqrr876vKVLl9re66WXXsIPfvADAFpb4Oeffx7Hjh3D6tWrs96TICYKFYIRNckbb7xRlGsmymWXXYbLLrus5M8hCDvIAiBqjm9961sAgH/8x3/E1q1bsWHDBqxbtw7XXHMNnnzyybRrenp68Morr+Cmm27C9ddfj4svvhg/+clP8n7uT37yE1x33XVYv349XnnlFQDA448/ji9+8YtFmRdB5AtZAETN8aMf/QiPP/44fve73+ETn/gE7rjjDqxduxa9vb248cYbMW/ePNM1LS0tuOOOO7Bx40bMnz8fvb29uOSSS/CZz3wmr+fOnj0b3//+97F3715s2LABf/3rX0s0Q4LIDRIAomY5cOAAYrEY1q5dCwCYPn061q5di9dff93kg+c4Dr/85S/x6quv4tlnn8WBAwegqioikUhez/vkJz8JAFiyZAkWLVqEd999t3iTIYgCIBcQUbNwHJfWZ11VVUiSZHotHA7juuuuw+7du3HaaafhjjvugCiKyLeNFs+n/t0URYEo0v6LqCwkAERNIggCZs2aBVEU8cILLwAAent78be//Q3nn3++fo0kSTh8+DBCoRC++tWv4tJLL8Wbb76JeDwORVHyeuYTTzwBANi9ezeOHDmClStXFndSBJEntAUhapIrrrgCn/3sZ/GLX/wCP/jBD/Bv//ZvkGUZt956K8477zz9mg0bNuCnP/0pLr74Ylx55ZVwu91YsmQJFi9ejMOHD8Ptduf8zKNHj+Laa68Fx3H413/9VzQ3N5dodgSRG9QOmiAIokYhC4AgisADDzyAZ555xvZ7n/vc53DNNdeUeUQEkR2yAAiCIGoUCgITBEHUKCQABEEQNQoJAEEQRI1CAkAQBFGjkAAQBEHUKP8/4tJAb6qNkeYAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with Modin df\n",
    "sns.lineplot(data=modin_tips, x=\"total_bill\", y=\"tip\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='total_bill', ylabel='tip'>"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABOIElEQVR4nO29eZhcZZ33/T1Lbd3VW7qrk86+kQQSSAIINAJh8Q1LEwIIDvoYx0FH8eIRRX0Q0Uvf10s0cDmDMuroCKLiwgMj+yDCsA0hkQAhCUmArJ210/tS1bWe5f3j1H3qnFPn1Na1ddfv8093V5865757ub/3b705VVVVEARBEDUHX+kBEARBEJWBBIAgCKJGIQEgCIKoUUgACIIgahQSAIIgiBqFBIAgCKJGIQEgCIKoUcRKDyBfhofHoShTv3ShtdWPwcFQpYdRMWj+NP9anX+x587zHFpa6m2/N+kEQFHUmhAAADUzTydo/jT/WqVccycXEEEQRI1CAkAQBFGjkAAQBEHUKCUVgFAohKuvvhrHjh0DAGzevBnr1q3D2rVrcd9995Xy0QRBEEQWSiYAO3bswCc/+Ul0d3cDAKLRKO666y784he/wHPPPYddu3bhtddeK9XjCYIgiCyUTAAeffRRfO9730N7ezsAYOfOnZg3bx7mzJkDURSxbt06PP/886V6PEEQxJShVF37SyYAd999N84++2z9676+PgQCAf3r9vZ29Pb2lurxBEEQU4Luk2P45i+3oGdwvOj3LlsdgKIo4DhO/1pVVdPXudLa6i/msKqaQKCh0kOoKDR/mn+twuYeiUl44IE3oQJYOK8Vfp+rqM8pmwDMmDED/f39+tf9/f26eygfBgdDNVEgEgg0oL8/WOlhVAyaP82/VudvnPtvnnsfPQPj+D+fXI1IKIpIKJr3/Xiec9w4ly0NdOXKlTh06BAOHz4MWZbx7LPP4qKLLirX4wmCICYVW9/vxaadPbiqcx6WzWspyTPKZgF4PB5s3LgRX/7ylxGLxbBmzRpcccUV5Xo8QRDEpGFgNILfPf8hFs5sxPoLFpTsOSUXgJdffln/vLOzE08//XSpH0kQBDFpkWUF//HMHqiqii9csxyiUDpHDVUCEwRBVBGPvrQP+4+NYsPapWhv9pX0WSQABEEQVcK+YyN45IUPcN7y6ehcMaPkzyMBIAiCqALC0QT+4+k9CLTUYcPapWV5JgkAQRBEhVFVFb//24cYDsbwjU+fBZ+nPPk5JAAEQRAVZvOuk9j6fh/WX7gAy+ZNK9tzSQAIgiAqSO9wGH94YS+WzmlG13nzyvpsEgCCIIgKIckKfvXUbogCh39edxp4Pv/2OBOBBIAgCKJCPPH6QXSfDOIfr1iGaY3esj+fBIAgCKIC7OkewvN/P4KLVs7E2cvy74tWDEgACIIgykwwHMcDz+7BjNY6fPKyUyo2DhIAgiCIMqKqKh567gOEIgl8Yd1yeNxCxcZCAkAQBFFGXn33OLbvH8ANaxZh3ozKnnlAAkAQBFEmjveH8MjL+7Fi4TR87CNzKj0cEgCCIIhykJBk/Orp3fC5BXyu6zTwBZyIWGxIAAiCIMrAo68cwLH+cdzcdRqa6t2VHg4AEgCCIIiSs2P/AF565xj+n7Pn4IxFrZUejg4JAEEQRIn5/d8+xOyAHzdcvLDSQzFBAkAQBFFiRkIxrD6lDS6xcimfdpAAEARBlBhVBaog5psGCQBBEEQJUVUVAMBVoQKQABAEQZSQ5PpPFgBBEEStoZAFQBAEUZswC6DMrf5zggSAIAiihFAMgCAIokahGABBEESNoscAUH0KQAJAEARRQigGQBAEUaOooBgAQRBETUIxAIIgiBqF6gAIgiBqFIoBEARB1ChUB0AQBFGjUAyAIAiiRiELgCAIokZJBYErPBAbKiIATz31FLq6utDV1YV77rmnEkMgCIIoD3oQuPoUoOwCEIlEcPfdd+Phhx/GU089hbfffhubN28u9zAIgiDKgpL8WIXrf/kFQJZlKIqCSCQCSZIgSRI8Hk+5h0EQBFEWqjkGIJb7gX6/H1/5yldw5ZVXwufz4SMf+QjOPPPMcg+DIAiiLFRzFlDZBeCDDz7AX/7yF7zyyitoaGjAN77xDTz44IP4/Oc/n9P7W1v9JR5h9RAINFR6CBWF5k/znwpEkz6g5sa6nOdUrrmXXQA2bdqEzs5OtLa2AgCuv/56/OlPf8pZAAYHQ1AUtZRDrAoCgQb09wcrPYyKQfOn+U+V+Q8OjQMAgqFoTnMq9tx5nnPcOJc9BrBs2TJs3rwZ4XAYqqri5Zdfxumnn17uYRAEQZQFPQZQ4XHYUXYL4IILLsCePXtw/fXXw+Vy4fTTT8cXvvCFcg+DIAiiLFAMwMIXvvAFWvQJgqgJqjkLiCqBCYIgSkg1WwAkAARBECWEzgMgCIKoUeg8AIIgCAee2dyN/377aKWHUTIoBkAQBOHAW+/34a0P+io9jJJRzTGAimQBEQRBMOIJGbKiZL9wklLNMQASAIIgKkosIUOJT93qfuYCqkZ3CwkAQRAVJZqQkUgoUFS1KnvmT5SUC6j65laNokQQRI2gqiricRmKqiISkyo9nJKg0olgBEEQ6SQkhR2YhVAkUdGxlIrUgTDVpwAkAARBVIxYQtY/D4WnpgDoMQASAIIgiBQmAZiiFkA1p4GSABAEUTFi8VoQgOpNAyUBIAiiYsQSqfz/4BR1ASlkARAEQaRjdAGNR6emAFAMgCAIwgajAExVC4BiAARBEDbEkwIg8BzGKQZQdkgACIKoGNFkELilwYPglBUA7WMVrv8kAARBVA7mAmpt9E5ZC6Cam8GRABAEUTGYC6i1yTv1LYDKDsMWEgCCICpGLCGD5zg0+z0YjyR0f/lUQgX1AiIIgkgjFlfgcfPw+1yQFRWRmJz9TZOM1JGQ1acAJAAEUcMEw3Fs2XWyYs+PJSS4XQIa6lwAgFAkXrGxlApFIQuAIIgqZNPOHvz62T0Va8MQSyjwuATU+5gATL2W0EwA+Co8FZ4EgCBqmIGxKIBUMLbcxOIyvC4BDb6pawHISR+QwFffclt9IyIIomwMj8UAAAm5MmfyxhIy3G4Bfl0Apl4mELMABLIACKL8qKqKhFS54KKiqpAqtMBmYyhpAUhSZcYXT8jwuAT4WQxgCraDkGVyARFExXh523F88cevYTgYq8jzX9t+Anf+aktFnp2NoWBlLYBoUgB8HhEcB4SmYEM4mSwAgqgc2/b2AwB6Bscr8vzhYBRDYzG9IrRaiCVk3eWSqJAFEIvL8Lh48BwHv881JS0ARSUBIIiK4XEJAMyHj5QTtu4zV0C1YLSIKiUA8YQMj1sEAE0ApmAMQE5aV+QCIogK4HFrAhCtUKYLQ1aqKw7A/P8AKhaj0NJAtWVoygoAuYAIonLoFkDFBaC6LIChscpaAIqq6kFgQBOAqdgPSFFV8BxHzeAIohJ43ZV1ATGqzQU0FExZAJUQgERCgQqYBGBKWgCyWpXuH4AEgKgBKh0DYJAFYIZZZG4mAHVaEHiqNYSTFbUq3T8ACQBRA1Q6BpAKAldZDCAYRUuDB0BlYgBMAJiFxhrCRSss1MVGIQEw8/LLL+P666/HlVdeiR/84AeVGAJRQ7AujGQBmBkei2F6iw9AZS0AowsImHrVwLJKLiCdo0eP4nvf+x5+8Ytf4Omnn8aePXvw2muvlXsYRA3BXAqV3llKVSYAQ8Eo2lvqAFSmECzNBTRVBUCuXgtALPcDX3zxRVx11VWYMWMGAOC+++6Dx+Mp9zCIGoIV4lQqC4gdCFIOF5CiaG0n2KLqRCQmIRKTK2sBxJkFoO1DG3xuAFNPABRFhSBUpwCU3QI4fPgwZFnGLbfcgvXr1+NPf/oTmpqayj0MooZgPvhYvLKthsvhAnrhraP4zgNvZr2O1QBMa/RCFPjKuoDcqSAwMPUEQFbUqjwMBqiABSDLMt5++208/PDDqKurw5e+9CU88cQTuP7663N6f2urv8QjrB4CgYZKD6GiFGv+dXXazlJWK/MzrUvubBsafKbnv39oCPM6GlDnddm+r5CxjkYSGBiNoqm5LqMVcGQwDABYOLcFHhcPl1ss+8/Gc3QUANAxvRGBQAM8dUlPQLJt8lT5+3e5BLhdQl7zKdfcyy4AbW1t6OzsxLRp0wAAH/vYx7Bz586cBWBwMKS3V53KBAIN6O8PVnoYFaOY8w+GtHTHUDhekZ9pOKz1uB8cCqHfry32kqzgW7/YhPUXLMDV589Pe0+h8x8ejQAADh0ZwrRGr+N1h44OAwB4WYHAcxgLRsv+sxkY0nozjQej6Oc0VwnHAT39IQCYMn//4eQZB7nOp9j/+zzPOW6cy+4CuuSSS7Bp0yaMjY1BlmW8/vrrWL58ebmHQdQQuQaBt+3tx+//9mHxn5/8aAwCK4oKWVHROxQu6rPYmbrBLE3VhsZi4DigucENl1ghF1Dc7ALieQ71XhfGyQVUNsouACtXrsTnP/95fOpTn8JVV12FmTNn4uMf/3i5h0HUEGzdzSYAu7uHSno+rrESmMUl+kejDlcXRiSmxTmCWU7WGgpG0ez3QOB5iKJQkSygqCUNFJia7SCquRCs7C4gALjhhhtwww03VOLRRA3CLIBITIKqqo49WRRF602T6ZqJYGwGxzKT+kciRX1GJBnozsUCmJYsAnNVKAgcT8gQeA6ikNqH+n1TzwKgLCCCqCB6Ja6S+WQuWdESNou+GGZoBz0SjBX1eboFkE0AgjG0JGMELpGvTB1AXE4LVPt9rqxjn2zIyhQoBBsdHUUoFCrlWAiiJBh7yzAfuR0suSBeot2wMQ2UjUmFuS3zREnFAJxdQKqqYngsarAAuIocCRlLyHobCIa/zoXxKXYqmKyoECZrDODgwYP4+Mc/js7OTpx77rn49Kc/jRMnTpRjbARRFIy9xSIZagF0AShywZheCGZwARltgf5RezeQoqrYe3Qk5+coqopoDhbAeFRCXFL0LKGKBYETzhbAVGoIN6ldQN/61rdw4403YseOHXj33Xdx+eWX49vf/nY5xkYQRcF4FGOmfkBsh16qimG7IDAA9I/YWwAfHB7Gxj9uw/H+3CzvWFzWhSWTBaAXgTELQBQqlgXEqoAZDT4XJFmpeN+mYjKpXUCRSAQ33XQTXC4X3G43NmzYgIGBgXKMjSCKgnGxjSecF7qUBVDcxdAYg0i9lvp8wCEQHI5qu/lYjuNh/n8gczUtawPNLACxUjEAw2EwjPpkP6Cx8cxZTJMJWVEmrwto4cKF2LZtm/713r17MXv27JIOiiCKiXGxzbS7L7UFYAxAmywAh1RQtitXkZs7JJLcNfMcl9EFxA6CmdZY2Swg7ThIswA0MAHIYMFMNjQXUHXm22RNAz1x4gQ2bNiApUuXQhRF7NmzB4FAAOvWrQMAPPPMMyUfJEFMBCVHAWDXxaUSuYCMFoDhdScLgO3Kc3WHMwugtcmTxQUUg8BzaKzXWlS4RK4iFkA8IesixDBaAE0eX9nHVAqq2QWUVQC+8Y1vlGMcBFEyjAtoLhZAsV1A1vtrY0odFO5UC6DvynMUABYAbm/2YXf3sOZ64NN3nuwgGFad6hKEimUBpVkAdQYX0LSpIQDVfCCMowAcOHAAixYtQn19ve33qX0DMVnI1QWklDwInO4Cam30om8kgkhMgs9j/nfM1wUUTgpAoKUO6B5GKCKhKbnLN2IsAgMqVwcQjacLADsTIDilYgDV2wrCUQDuvfde/OpXv8KNN96Ijo4Ocy51JIItW7aUZYAEMVEUFXC7eMQTmbNL5FKlgWYIAgdafOgbiaB/JIK5080dIPN1AbFWF+3N2s45GI47CEAUi2elWrCLyTTQUlVAOxG3CwJ7XeAw1YLA1ZsG6igA99xzD0ZGRrBo0SI8/PDD+h9HIpHApz/96XKOkSAmhKqq8LpFxBPxnCyAcriAGO3NPuwGMDAaTRcAZgHkqAAsa6g9echLyCYQrKgqhoMx/SxgQLMAVFUbn1imhUpRVcQlBW5LGijPc6jzihljGJONSekC+vrXv4433ngDHMehs7NTf10QBFx++eVlGRxBFANVBUSBgyhwucUAihwETp0IZugGyiyA5G7dLg6Q70Ht0bgEDkBbk5beaddULTgeh6yoplbRrmSGSjwh48W3j+LiVbPS3FHFJq4fCJ/+HL/PNeUsgEkXBH7wwQcBaIVgP/rRj8o2IIIoNqqqggMHj0tAPJ69DsBOJFRVxeP/cxDnr5iBjlb7uFg2JEMlMHPr1/tE+DwCBpLFYGPjcQgChwCMFkBu9w/HJHg9gu72sdtFDwWTNQAWCwAAdh0awmOvHMD7h4fxtU+symNm+cNqG6yFYIDWDmKqxQCq1QLImpxKi3/+DCf/yYjqQFEBjtMOHy80C2g8KuG/thzGtr39BY/D6AJiT+A5DoEmn94O4vZ/24Qv/+R1AMYgcG5EYzK8blFPpbSrBTAeBclgAsDYdXAo5+rjQmHHc9qdWub3Ti0LoJpdQNVZnTDJGR0nAagmVGhZGJ4sAqDXAdhcw4LHuVblWgYAwNINNPksjgPamn0YSBaDsSuMXUJzjQFEYhLqPCJEgUedx96PzqqAWwz598wFZKwk/uVTu4seDDeSsgBsBKDONaUKwWRFmXwuIIKYbKiqivGopKcSpl7XFtpsApCqBNYWp0hMgkvkIQq8/j6nRTEhyZAV1danrd/fJg2U4zi0NXmx6+CgaaE/OTSuxwCsy38sIQNq6iQtRiSuuYAALZ/e1gIIRuESeb3iFkhZACyN9JqPzsfTb3TjkZf34zOXL3Wcz0SwHghvJFMMQFFUHB8Yz6tZnMctYHpLXWEDLQKaC6g699okAMSUYceBQfz7k7vwL7d+1CQCLIPN4xYy7mqVpI+eXXPrff+D0xe24vZPrNQXLCcBefSVAzjSG8S3Pn1W2vfYUmVyATELAFogOC4ppkVPUVTHQrBfP7MH8YSMr/3DKtPrkZiku3/8dS7bfkBDY1oGkDHdU7RYAGcuCSAuKXj+zSM499R2LJ3bYjvniaALgJ0F4HMhnpBtC8VeeOsoHn1lf97P+95nP4J5M8p/yPzQWBSqCrhFEgCCmBDW9EUrIyHNbRIMx00CwA4b97gEhDIclagHgQ1ZQO8dHNRei2e2AIaDMYyGMrstJJs0UI7jEGjW/PHGnkCKoqbqACwK0DsUxsmhsNZN07CDjsRktDVpWUUiz9umnQ4Fo6YAMJCyACJRWf/62gsWYMvuk3hq0yHc8aniC0A87iwADXVaEHs8kkj7fu9wGPVeEf901ak5PScYjuN3z3+Io32higjAE68fhChwOG/59LI/OxdIAIhJw+h4ZgFgC7i1sZmqIhkD4DEwmlsQ2OpiiCYyxwAkWTH1HLK9v8EFpOguIOiLtjEVVFHVVHsGy22DkQRkRcW+4yNYsaBVf91YTczz9v19hsZiOHWeeUFPuYA0i8El8HC7BFx13jz8+b/34YPDw1g2r7giEM3gAqr3poLYxmA1oGVJtTR4ceaSQE7PkWQFD/9tL/qKfPRmLhztC2Hzeydx+Tlz9d9xtVGddglBFEAqj9+88CnMBeTK5gJKBYEly/GN2SyAhJRBAGwqgVNBYE7P2zc2hTNaAOa3qXqB1weHR0yPicQl+JIxAI4DVIsFICsKRkKxtAZsugAkC8nY12tWzkST342nNh2yn9cEyOQCYv2AQjYng42Ox9FU70p73QlR4NHa5Cn62cu58Nir++HziLiqc17Zn50rJADElMHpRC/NAgDcbiFjFo+xHbS1CCtbDECWFVuXi9392ZgALQbgdglo8rstLiCjJWM80lLSheaDI8P665KsIJ5QUhYAx6UJ0mgoDlUFpjWYd9UsC4gFgcWkALhdArrOm4cPj47g/cPDKCYpF1D6EsTiGHaVzKOhOBrrna1AO9qbfegbDhcwysLZ0z2EXQeHcPX589OSEqoJEgBiyuB0pq9qsABySwNV0twnKQvAXkASspq249afr1cCpx8JyWKxgSaf2QJQUwfYG9dxVt3b2uhFd09QD9yyPkA+d8oFZB1O6iAY8wIqWrKAXIbe9WtWpayAYh7TyH4PdnUALEPJGsRWVVWzAPzp/Y0yEWipQ99w+SwARVXx2CsH0NrowWVnzSrbcwuBBICYMrAFPC0GgFQaaEJSdKGwYmwFYW2PnM0C0GIAmcdnfyKYpgBtzV7T0ZDGLCDjbdmu+CPL2k1nBjMhMFoAVkHSD4JxsAAiFgsA0I6L7DpvHvYeHcEHRbQCYgkl2Z7DzgLQ5mAVgEhMgiQrtg3uMtHe7MN4VCrbYfNb9/TicG8Q11+0CC4xXeCqCRIAYsrg1M3TGAMAnBdxYzO4NAuA1QE49AmSZGdhsUsDZes/qw9qa/LpC7R2rWJbCMYsgJWLWyEKnO6aSQlAKgZgdQE5WQB6FlBMgihwaa2L16yaiWa/G89s7radXyHEbFpBMwSeh88j6jEJxmgyTbYxXwFINscrhxWQkBQ8/j8HMbfdj3OrNPPHCAkAMWVwdgFpCy3zNzsFco0CYrQAZCXVRtqpnXRuWUDGE8EMQQAAgWavydVjigEYXmcWwLRGLxbPatLjAEwAvJ5MLqAoPG4hrdFbSgDktLYQ2vcFXHbWbHxwZAS9Q8XxpccSsq37h1Hvc+lZSQxWJ1GIBQDYN9wrNq9sO4aB0ShuvGRx1Z4BYIQEgJgysAU8kRYE1iwAdwYLQFFVqGqyNTJS/nAAON4/nkMaqOpoAejXKPaVwIAWAzCNR1FtK4GZW8Tvc2HZ3BYc7Q0hFEno5wHXGYPAlvEMB7WDYKw9/42Lvp1LBgDOX9EBjgM2vdeTcY65YlfkZcTvczlaAPkKAOu42ltiC2A8msAzm7uxfME0LF8wraTPKhYkAMSUIZMFwHGA180EIH0RZ+/1Ja8Zj6QWn4M9Y7rV4OTqySkNVE53AbGluK3Z7JeXVdW2G2gwEococPC6BSyb1wIVwIdHRlIWQHL8mgWQHgOw5tUDMDUqs7MAAKClwYMVC1qxedfJrEKXC9kEoN5OAJKFdk3+/LKAPO5kllWJBeC5LYcRjkq48eJFJX1OMSEBIKYMTv38VWsMwMaNw/zszIViDBgeOjGmZ9kA9haErChQnDNM9Wv05yFVBwBogVnjQpxIyPrO3xgDCIUT8Ptc4DgOC2c2QhQ4HDwxqp8HnLIAkLZQW4+CZHAcpy/8LgcLAAAuOKMDw8EY9nQPZZ5oDsQTsm0RGMPvc5msMAAYC8chJA+MyZf2Zl9Ji8EGR6N48e1jOG/5jLSDfaoZEgBiyuB0opeiqFodQAYXEBMP5h9nu886j2iyAIB0CwMAEpKadCOl744zBYGZN4bnObQadudRh1hDKJKA36e5QERBC5ZG47LuAvIas4AMY5FkrdeQnQUApBZ+JwsAAFYtbkO9VyyKG8juPGAjThZAY727IN96e0tpawGefP0gAOC6ixaU7BmlgASAmDI4pYEqQNYsoDQXUNICWDKnGSf6x3X/s937VYecfSvmdtDaB+NaZnQDReOS9VIAWhYQq5QFkge6SwoiMQkCz+lNxzhLEHg4GIMK2FoA7D7Gj07XnHfaDGzbOzDhlErNBeT8LL9NEHh0PJ53BhCjvdmHkVC8JC2uj/QGsXnXSXzsrNlV2/LBCRIAYsrgXAmsWQAed+4WAIsBLJnTDBVAz2BYX6yt97fr8mkZgON1HFIKYFw8jBaAnQuI4RJ4JGRNALxuQXcpWYPAdgfBGMnFBQRobiBJVvDmnt6M12XD7kB4I/U+FyIx2TSH0fFY3gFgRnuyHXQpMoH+89UDqPOK6Dq/els+OEECQOSNoqi4/z93Yv/x0UoPBS+8dRQvvXMMgHMvIC0InJsFwPr5jyd3nwtnNurXsApV6/uNbSMyBUhlmyCB0QIIGCwAp3TTUCQBv4MFYEzv5C11APpRkI32FgDL/hGztC2eO92P2QE/Nu2cmBsollDgzhIDALT+RgytD1ChAlCaWoDd3UPYdWgIXZ3z9SZ2kwkSACJvhoMxbN8/gH9/clelh4JHXtqHP764F0CmbqAsCJysA7BZXFMWgLYojSQXzOnT6vTFsSG5+FhjDJLNYe92mLOAzEFgIJWuCJhdQOyeiqJiPJJIO8xFEwDZJAAcz5ncUboF0DAxC4DjOFxwRge6TwZxbALHRsYSMrxZLABAO4oT0H4GwfFEwS4g9rMtZiBYa/mwH62N3qpv+eAECQCRN+x4u2yFT4XQMziOgdHC/kll1stHslYCp84EBrLEAJKL6MmhMESBR2OdC63Jbp2NyT711vcbBccuE8guCKy3gzZcZ3QBmSyA5LWhaAIqUv3yAW3nnpBkGwvA4gIKxlDvFR0zb3KJATDOWz4dAs8VbAWwNhfZXEAAEEkKQCiSgKKqBVsAfp8L9V6xqBbAm3t6caQ3hOsvWlj1LR+cIAEg8oZlKzo1P5sI3/71m7jj37fkdK0prVJVHbOAtBiA1ndG4DnbOgAmHmwRHQnF9aIpFjhlwde0GICpz38mF5AhS8hwJjBjVqAeCzo0l5NRZNgdWRWw32IBSLKqtYI2LO7WbqDDYzG0OOz+gdyygBiNdW6sXNyGv+8+mdY1NRcyNYJjMAEIJ4PNYwXWABgJFDEVNCEpePy1ydPywYmKCsA999yDO++8s5JDIAqA0y2Ayo7D2C44lpAz1AGkFlqv274jqDULCEj5y9nC72gBZIkBGDWBjZG9w+gC8rgE3PHJ1QCg5/VrN9A+6FXAdZYgMIsBGPLjed4SAxiLOvr/gfwsAAC44PQOjIUTeO/AYE7XG8l0HjDDb3EBFVoFbKS9xVe0YrCXtx3D4FgUN146OVo+OFExAdiyZQueeOKJSj2emADsD74YFaHZGBqLYsvukwCA//vyfvzLI+/q3zOmZo6FE1ljAIC267QLsLKF2Wtwo7AdM3O5sMWx0BgAYIgD2KSBAtrCDZizgNg92SHvaTEAORkDcFtdQKn7DgVjjhlAgCEInCUGwDh90TQ01rsLqglIHQbj/CzdAogxAdBiMoXGAABNAAZGowVZLUbGowk8y1o+zJ8cLR+cqIgAjIyM4L777sMtt9xSicdXLaFIoiR5yrk8N1OffCdkRcVIKJb1/cPBGBKSrO9gw9GEY5aLlQee3YNfP7MHQ2NRfHhkBLu7Uy2JjYeoB8fjGQ+EYQut05kA7L1uUdCvtVoAbOG33t+4oHT3BG2KwVJfM7eVapMGCqQsAmMQmMHOM7a6gOIJmxgAnyoEiyW0n71TDQC7j/FjNgSex/nLZ2DngUHT78GKoqgYTgbUGbEM5wEz/LoLSPs5jI1rfzsTsQACzT4oqqoHxAtlMrZ8cKIiAvDd734Xt99+OxobG7NfXEPc9tPX8aM/bKvIc7//27fyfl8sIeNrP3sDt/30dfzw4Xccrxsdj+Ff/u8O3PbT1wEA//snr+P//PvmvJ51YnDc5r5GCyBuCAKbd3iyoupWi5MAMAtAEFLpomzHfMqsJgDA3Bl+AJnTQO//y05s3zfgOA92MDyThHQLQHshZqoD0D4aG8ExXCKPSEyCrKh6BpN231QQeDhLCii7j/FjLnz0jA7IiqpbaHZs29uPb/5ys0kEcnEB+TwiOC51TvHoeAxukdd7HRVCexEygVjLh84Vk6vlgxNlPxT+scceQ0dHBzo7O/H444/n/f7WVn8JRlVcRqMyAoHC/jgO9wb19xZ6j0LoGQzn/DxvOH3Hd7Qv5Pj+0aisH1zCrglFElmfFwg0YFqzDzgyApfHZXodAGSkFh6VFyAK2uIgyYrp3pKiorHBg0CgAf56N1Rwac8eZG2WW+rgTbZXWDC7GYFAAwKBBiyYOw2z2/3444v7ILhE0/tPjJh3lPtPBrH2owv1r73e1K61ubkOrU0+NA5obQlaWurSxsJxZhdQQ3LsMrQmcLNmNqe+5/fqfvJAq1+/V4PfAxVAW5sfJ4a18S2cM83xZ16fdHM1N/py/jsIBBqwZG4z/r6nF//rqtPSuowCgLJ3AJKsYiicwJKFbQCAo0PaAjw9+bN1wu9zQeV4BAINiEkqWhq9aG8vfNPIJ11k4YRa8P/WH/57HzgO+Ny1pyOQLC4rBeX63y+7ADz33HPo7+/H+vXrMTo6inA4jB/+8Ie46667cnr/4GCoLL7niTA8Mo5+b+E7lf5+TQT6+4NFHFVuz80F60lN2d4/PDJue0225/X3B6EmfeZ9hpxz9r6TA6nXTvSOIZrcLcbiCvr6xvQFKRJNALKK/v4gOKgIheNpzx5MWhjBYBRichcuqKp+nZcHBgZCcIs8RkYjpvcPWKyT7R/2mb4fiaQEs68/CCUuYWREE4BRy70AzX8fMxZAjUXR3x9E3+A46r0u0/VSInWdHJf077Fn9vaN4eBRzW3GK7LjzzySzLaJx6S8/u7OPXU6Hv7bh3jrvRN6BpOR0WRK73t7+7Bouj/5M9B+b5HxmOOzAoEGeN0CBkfC6O8PondwHH6fOKH/CVVV4RZ5HDw6jP7+trzff6Q3iFfePorLz50LTnL+WU6UYv/v8zznuHEuuwA89NBD+uePP/44tm7dmvPiT9QezE0QsYkZRGISGuvdiCdkzQXEMmxUFbKiQhSS7pSEDLdbc214XIJ+MpYRtqkQOKMLKN1l4nHxGWMAgGZNjYZitimLbIyZYsUcx9m6qaxVwIDZZWOtA9DmlToKsiVDDID5pPJxAQHAuae245GX9mHTez22AsDme6Q3JdbxRPYYAADUeVMN4cbCcd2FUygcxyHQ4iu4FkBv+dA5+Vo+OEF1AERVw5qbRWPpQdFITMt9b6xzIxhOmDJwWMBWkhVIsqovNh6nLKDke3leOzjG4xL01sqm8djEEOyySj5MurwAczM3lgXE2kHbpRDyvDmzSDVkARkzgABz5a4xBmAs1hsai6GhzpVTsRITzVyp87pw5pIA3tzdi4TNcZnsd3KkL7WjzaUOANA6sTIBGA3FJ1QDwGhv9hXUD2j3ocnd8sGJigrA9ddfj40bN1ZyCEQB2LU8LhVsfRywydyIxmV4PSIa6l0YM2QBAdAXI7bb9BoFIEMWkMBrLSOmNaafnAUwAbCmgaa+ZgfPOB2gnosFYBUFvRAsErexAFKLqKkVBCvWU1XtIJgMRWBG7OacjQtO70A4JuFdm+A3K5IbGovprkMW38gW0K3ziggnD4IPRRITygBiBJICkE8Vu6KqeOzVyd3ywQmyAIi8sab1lRKWy25sO8AWa7MFEDe1WoglM4HYYs0aj2XLAuJ5Dh89vQMfO2u27Xg8LsHGBZR6LgcOS+Y04/0jI7bvT6WBJq+3WW+NB8NoF2sftLMAnF1AXgcX0PBYLGMGkJFCSppOndeCaY0e29YQxt/J4V7NCsjVBVTvFTEeTej1D8UQgOktPsQlRT9dLBf0lg9rJm/LBydIAIi8eW37ibTXMv0zTyRob7dTY5W3rAFaQ53bVAgGpM4FZvn0ugXgFrTjGy1jMloAHz29A5ecaS8Abhef0QWkqiqWzW1B71A4JZTGSmDmArJpBsewvsbOG4jE5HQXkEEA6uwEIE8LoJDfFM9zOH9FB3YfGkrLsTcKwJGkAMQSMkSB191UTtR5XIhEpaIUgTECelfQ3A6HSUiy1vJhuh/nnjZ5Wz44QQJA5I3dIpEpeDiRw0NsBSC5u4/GJXjdIhrrXQiFE5BkNVWtK7GiLe2jMQYAaItQz+A4jvVpwUnFYAFkwuMS0uoMJMPXKoBl85oBAB8eSXcDWeMFdhaAdQwqjG0gzIug0WdvdKmwe4SjCURiclYLgP2c06yPHLng9BlQAWzeZa4JUBTtdzKt0aMHgrMdBsPweUXEJQWDo5oAFMMCyLcW4KV3jmstHy6Z3C0fnCABIIqC0y7/rv/4O/704r6C72vnqmUCEIlJqEtaAIqqIhiO64ugUSSAVDaR3hI6IePbv34T3/3NVm38GXbkRtxiehA5YVnU57Y3wOcR8UFSAFRTJXAqUwmwd7mwNZgFwFU11fco3QIQ9GuNbRzYPQZHkxlA2QQgRwF0or2lDkvmNGPTez2mGJGsqOB5DnPbG0wWQKYiMEa9l3Vm1dJsiyEArU3a2cu5ZAKNRxP4ry3dWDEFWj44QQJA5I/NqhyOSYglZISjCQyMRvRd/8mhMHpz+Gc7cHwUvTZmuZ2wJCQZqqoiEpPh9Qh6k7bxqKQLwGDSFRGzWABOLaHZgpxt/fO4hbRmc8YYAKAtokvnNOMDmziAbJmPneCwRdhoVQXD6W0ggFQWkNeSscQa9g2MZj4HgHHGolYAwNz2wgstLzi9A33DEew7ljooSFZUCByHudP9ODkYRiwuI5blPGAGc2n1DGp/F8VwAQk8j9ZGb06ZQP+VbPlwwxRo+eBE2esAiMmPk5/43j9tw6GeVLrfb+68NOd73u3QSsLJBRSJSVBUFXVeEY2GzBjWEO3Xz+xB5/IZhsZjVheQedf+wtajAOzTMo14ssQAGMvmNmP7/gHNJ24TA0hZHOnPYGNwuwSMR7V5Bm06gQIpkfBZBIDdQxeALBbAectnYOXitrT75MPZywL444t7sem9HiyZ0wxAE3BB4DBvegNUAEf7Q4glMp8FwKjzpgTA5xGzpo3mSnuLL+umJBqX8NI7x3De8qnR8sEJsgBqjFJm8BgX/2Jhd+bA4FhM9yfPDvj1k7oAYFZA28EKlp46ugvI4Vzg7pPa2LO5ed2igHhCgaKqiMYlDI3Zd5dcOrcFAHQ3ECM9C8jGAuAsFoCaigE4BYF9FpeKUQA4AM055NBPZPEHtOM0P7KsHW990Kf/3GVF0VxAyUX0SG8wGQPIRQC0uZ4cGi/K7p8RyKEt9O5DQ0hICi48o6Noz61GSAAmMZKs4MePvIuDJ8Zyfg/LqGCUM6e/EOxCCyOhqJ5SOG96g+4CArQd+jmntqMtGexztgDsu5FmiwF4DDGGZ97oxv/327dsD5iZM92Peq+Y5gbSXUAZ0kCZC0iPASAVA6h3EgCrBZD8zx4cjaDJ7865zfNEueCMDsTiMt7+sA+ANl+R5zCt0YN6r5gSgBxiAMwFFInJRfH/M9qbfQjHJMeWJgCwfd8A6r0iFs9uKtpzqxESgEnMycEw9nQP46Hn3i/4HoUc62gVjXxK4/N9np1ASbKKwyeDaGnwoLHeDb/PpQdTeZ5LHpKiLfDWzpNMAOzOBQay58GzRTmWkBEMaznqdtk+PKfVA3xweNi2EjhjEFiPASQXyaQLqM4jpi3kLAbg6AIai2Y8B6DYnDK7Ce0tPr0mgAWBOU6zAg73hhBPyDm5c+oMB9wUVQCyHBCvKCp2HBjE6YtayyaclWJqz26KIyRTAKUJ5Nkb19dcrQHjZU31btNh5kZsWy7I+Y3VTjAkWcHh3iDmz9DcCjzP6b5xnufgcgl6+mcsLoNDauFmQhB1EoCsMYCkgCRk3fXDgpRWls1twcBoVM/EAQBJsaaB2rmAtI8mC8CmDxAAiLoFYHEBJW8yNBbLeA5AseE4rY7iw6Mj6BuJaDGApDkyb3oDjveHtGB9DgJQXyoB0FNB7X9v+4+PIhRJYNXi/BvGTTZIACYxzM8tT+CEI2OWjayoGBiJ5JW3r8J51/ylf30t7TXrOb5AZqtAUdIPAg9HJZwcDGOeITjH3EACz8Et8ohLMv74wl4c6hmD2y3oCy27zniWgHFMWWMAhiCyNf3TyrJ5WhzgwImx1O8qpzRQcwxASwONp/n/AYMF4LZkARkmUk4LAAA+umIGOABv7OyBLKv63OdO90OSVYyNx3OKAbhEQd+BFzUG0JzZAti+fwACz2HFgtaiPbNaIQGoEgrxxRsbfhX+3NTnsqzijl9uwbd+9ffM7zF8PjYez6t/jGwRHO25zgupqqYfU7jv2ChUAPNmpASgwWgBiDziCQUvbTuGXYeGTIuNzyPA7eIxEkrFQozWQF4WgJRZOGYF6uH3uSDJij6H9CMhbSqBWQwg+SwVmgvImgIKZMoCSn1eTgsA0ATntAXTsHlXDyRZMQhA6vfFurNmg7mBimkBuF0CWho8joHg7fsGsGxus8kFNVUhAagSCirB55gFUJxWC8w9YRccGwnF8MundmluHcvj8imQNI6VCUBCSh//D3+7FcPBGKIJOa1DJTtcxigAbIfILAAjxqpTjuPQ7PeYBMDoqspaB2AoJJNkRXdT2Pm0eU6rBwBSFbtM7JxOBGPvA1IuIJYFZOcC8rgFuEQ+rdUzX0ELANBqAgbHYth7bFTfqMyYVge3K9WWOxdYILjJXzwBADQrwK4auGdwHCeHwlh1SqCoz6tWSACqhQkk41iLi/J6rFEAMgjJX147gK3v92Hr+71p38unRN5kAcipls1WtrzXg217+xFPyLqf23iPJr/blNrIDm7nOS6tYZfHZd7Jtfg9GDGkw0bysADMLiAVswJ+tDV54XFohcHcQAKzAPRuoM6VxyyDx2XJAmrwpS+CHpeA73/uHFxgSVfkDEqWrQq4FJy5pA11HhGRmKTHqniew5xkoVmuAlCvWwDFnUN7s/25ADv2DwIAVi6e+u4fgASg6Dzw7B5s29uf9/sm4saRlfxjAJKs4Lm/Hza5PyTJ+T7MhSEpqqm1AZAKRueC0d0j6RaA+bkXrdQWs3hCq/gV+dSfKSv6mmcpzmGvCzyn7zIZHou7obnBgxFDN0jTAexZLQBzENglcPjM5Uux/sKFttcvm9sMIGUB6GcCZ/h1C5YYQDQuIy4pthYAAExvqUtzkxlF2c51VGpcoqA3TxMMY2FuoFwFwJcUgGLGAAAtE2h0PJ6WqLB9Xz/mtPvR1jSxw2cmCyQARSQSk7Bl18m88vInAltECrEAduwfwH++egB/finVp8eYodKfzOAYHI3i5FBYX1AkWUmzVs5copnLs9rqsz53yLDzZu4gqwXAdtlxSYGimgVmfvLUqfkzzALAisFYDMCINeOkxe/BsEMMIJs1w8QllowBiAKPFQtbsXKR/Y5xZls9GupcaQF79iO0czkxlwlb1EMObSAyYdDMijUxY1aJYBAnJty51AEAKRdQg4P4FQpLBTW2hAiG49h3fLQmsn8YUz/KUUa6TwahQhOCfJlIQVYhMQDmJtnTPaS/ZnQBffvXf8dNl52CP7ywN+1Z1iIqtlAdHzCfjWukPXkU38Y/bjM8L9mx02IB8Jzmx39q0yFwnNZvnrFkTjN2HhjEKbObTe9h2T08z8FtcQFZ/fPNfrfJ6ojGjC4gxykAMFsACVnR3VNOriOO43D6wlacGBjHIBfTxZpZW3bN1ziLBRB0aASXCeOiXykBmD+jAXOn+03Fa4tmNuZcmQwAbU0+BJq9Rc/HZwLQOxzB7KRbaueBQagqsOqU2hEAsgCKyKEebefvlGOeiULWf+aKKcQCSJ2YZXDJGBZFSVZtF3RJVtIW7GyctTSAG9akN9RiP6ewJe2U41KLdmujF5+/+jT9ews6GnHPLZ1YvsDcnVFPA+XSg8DWk6eaLQFTowuIy+IDMsYANBeQ9qxMXTQ/vXYJbv/ESggCp/+uBseicIu87a6ebQbYoqc3gstjF2xc9CvVxZjjOHzjptX47BXL9NdmBfy450udumssG+s+Oh/f3nB20cfGagGMFsD2/QNo9rtNyQVTHRKAIsIEoDALoIAHTiBwzBZxYx2AtUjJbh52AdtsTGuw38Gx+48nz31lu2uO43QXQWO9OdjrFnnbwrOGemMhmDULyGoBmAXAaNHkYwFIsqrPK1P2kNettawWeE631gZGo2ht8tpaDuxvgcUNxpIWQH4uIM7283Lj97nS0inbmnw5pw57XELR/f+A1meo3ivqB8MkJBm7Dg5h1eK2Kdn33wkSgCKSEoACLIACVvOJdPFhx/LZtSlgRKLpAlCItcFx9oeNh5kAJNNOm5OpfhxSC631gBKng2daG704Y1ErFs1qyu4CslgARqHL9s/P8xxEQesImpBSLqBcDlIReE4P2A+MRBwrqJ0sgIa63BfCanABVTvtLXV6KugHR0YQS8g15f4BSACKxkgohqGxGOq9ohYgzHOnXJgLqHDs3DjWPvdhGwtAltW84xUcZ79AWi0AtjM3WgDWxctJAESBx1dvXIkFHY3pQWC3NQhsXkhN58PmsFZ6XFqhmVbgxeljzoYg8LqADoxG0dZkn5/PfjNMAMajEjjOfORjNjjDj4DWf3tYXArQir/cLt4Ub6oFSACKBNv9M9+03eKZiVI25YzGJfzq6d2mVtDW1EvAHA8AiucC4sCZMkGs9x+PJsBznJ7Jw3EpC8Dqvsh09CQjvRDMLAAuUTD1mTFmJuXiLXEnD5bPNQbAYC6gcDSBcExyTDVkAmvMfqr3uvJy5ZgsgAq6gKqZQLMPg8l23tv3D2DFgtYpd+h7NkgAisShniB4jsNpyaPjxjO0mrWnEBMgt/ds3zeAN/f04us/f0P3d8dt2iFbX7MTMa0OID9ysQDqfaKpmIq5bay791z+Qa0iYZdyaKycNR5knstOPiUAxhhA7i6g/hHteY4WgJK6nt013zRIcgFlZ3qLD6oKbNvbj+FgrKbSPxkkAEXiUM8YZgXq9YVl3MZ/nolCinlzfYuxj0o4OS5bC0DK0QLI8OBrL1yQ/iJnXyxmjAHUe11w2QSBvZYmZ64c0gHTK4HTBcAYCDZaRrmslR4Xr4+dxQD4HP6TBIGHJKsYGNXcDjnFAJLjybeYyzgPWv/tYT//F946Cg7AGTVS/WuEBKAIqKqK7p4xLOhoRH3yFKP8LYB0Rsfj2HVwMMODc7uP0QWw84B2P9sYgMUCsAtmZ6s56Fw+I+01DpzeEtju/uPRBOp9ou66MQWBhQJcQFmygACzAIwZOoPmagGw368rDwtA5LU0UN0CaHawAJgLiOf0++YrAKYsIFIAW1gtwMETY1g0u8l0sFCtQAJQBPpGIhiPSlg4sxH1Pm3Hmk9LZcC+FcQ9f9yGf310x4RP7TK+/bXtJwCk6gCM5JLfLytKRt0xLjbnnNoOQNuB2s1BdwFFJM0C0IuqUm4bq+vILpvISloMwMYFZMwEMo4sl6XS4xL0329+QWAOsqxgYDQCn0fUNwtWUmmgqXnkkwEEUAwgF5rq3fpmYXWNZf8wSACKwKFk64f5MxoMFkCetQA2q+rJIS1H2S71MhiO6xWiVvYeHcGfXtyLE8lCLjtxycUCsGPr+30ZewYZFxu26+c42PbONwaB672iLgCKquoxAKsA5LLQWmsO7CwAp/bCudzf4xJ0V5q1UV0m3KKAuKRgYDSKgIP/HzAEgXlOd9/kHQMw/NzIALCH4zi9IKwW/f8AtYIoCgd7xuAWecwK1OuVpPlaAHY7ZJ7joKiqdq6qZQ37yv2bHO/1/JtHsH3/AP77nWO495ZOWwFI2Jxja80CcuLDZDtmO3gOuPdLnRAFHn959QAAzQVkt+CagsBel+5OkSQV3qQlZec6yoZ1EbcTAKuVkA9uF68LQC4xCUadV8TYeBwjIRkzptU5Xme2ADgAav4uIM74OSmAE3PatarfjtbsfaymImQBFIFDPWOYN6MBAs+D5zl43ULeFoCdW4WtfUqeEWLjgj8ajsOuWaidBRCzcQvlC8dzaGvyodnv0RdijtOqP79+0yrTtZGYBFlREIlJqPe59J17QlZSdQBF+Au1cwHlEktwvJ9L0H9f+fSoqfOICEclDI5GHQPAgDUGoL2WvwAYLQASACc2XL4Ed3zqzEoPo2KQAEwQSVZwpDeEBckulYB2OlP+FkD6a/qBL3kKgOnfXbV3AdnGAHJwAWXDtNtMfsoWoHbLoheJSfpO2ugCkmRF37UXoz7CzgKYiAAYK43zEQCfV8TgWBRxSXFMAQVSGWHGLKCJuIAIZ7xusSLtsqsFEoAJcrx/HAlJsQiAkHcaqJ0LiB3qke+Zv8Ydn6oCqo2AxCUlbRHM1QWUCTt3A3vF6nYJx2T951TvSwWBE1JKAAopPLNiLwCFF/wYzxdwibkvtHUeURfztgwWgLEQjLkU/TaHwWSCdv1ELpAATBBWAbxgpkEA3BW2AAz/+ypU+yBwQk7b+RTFAjC2ILB8Yu3JE41JCCUD2fVegwvIIE4TOe2MYT0QBqiMBWBsipYpCMxcfgLPpeoAyAIgSgAJwAQ51DMGv89l+of2ecS86wDs6mvZ/7CUwyJoXOSNu7/BsahtkVlcUkz95X0eIe82z3bYtSG29rdn16kAhoJaTrzVBaQfoTiB844ZdoHkicYAGPnGABiZTpxiv0pB4PW/gXzOAgBya2lBECQAE+RQzxjmdzSYFt1CXEB2UWCWAmkNAtsFhSMxCXuPjuCFrUdMFsB/PL0n7XpZUZCQFJNfWRT4IlkAzi4gUeD1OfmTWT6sH7vVBaQfop6MYJ+/YgbOX5FeZFYo+WTvWDEWmuUjJHXJFOGGOlfGE7HYZkDkOQAchGRiQT6QC4jIBRKACRCNSzg+MI6FBv8/AHiTQeB8zvm123s7xQCsJ3IBQCiSwMY/bsMjL+9PcydZxzEaimsuIENxkSjw6D4ZBJBqy2zkR184L5dpWGIAqSwgBlsw2SlRA6MpC8DoAmK7duYC+vzVp5kOhpko+eTvW5moBZDtvNmUBaBFAPx1rrwXdEr9JHKhIgLws5/9DF1dXejq6sK9995biSEUhcMng1BVmALAgBYDUFXNx50zDnUAQLof3FYADEVhxuZmQLrFMByKaRaAz2gBaG0Kmv1uLJnTnHb/XA9+t1t3jIsXWzB1AUhaAHVeMVUHICv6dRN1Af3ktgtsX3flcZC9FbMA5BEETsYAAg4tIBipNFAeHJe/+wcoTvosMfUp+5/J5s2bsWnTJjzxxBN48sknsXv3brz44ovlHkZRONSj7ZjTBMCjLRChPNxA+QSB7QQgGEnorgnW45xhtQCGx2KISwp8Bp80u2TVKQHbvvO5FmRxdjEAw/fZgulPukP6R6PweUQIPA8xmVGTkBRdcCaaBeTU32UiZ8wag9l5FYLlagEkf988p/08C0lTJAuAyIWyC0AgEMCdd94Jt9sNl8uFRYsW4cSJE+UeRlE41DOG1kZv2pF1bGF1CgSrqoqewXFT6qd1/R8ai+qZRJKs4Hh/SP9ezObM4fFIQq+2tbZx/uDwiOnrXYe0hnBGXzZzxaxa3JbWgRMoLKjI3mIUBSYkbFEbHI3qvfldhkIw3QIoQhaQHXbnE+SKyQLIw5XU5HfD5xGxcGZjxuvYlJkL0F9AkzLKAiJyoeytIE455RT98+7ubvz1r3/Fn//855zf39rqL8WwCuJwXwjLFkxDIGA+RDowTRujy+NK+x4A/O3vh/Gzx7bjK/+wGh87Zy4AIGrY6AYCDVj39af0r194+xje+aAP9311DV5795itsBwfiuhdJq1s2X3S9PX/7OgBAMyari1ES+e2QBA47Dk0hAvPmoO+sVjaPaZPb8Q5p83A1j3me51/RodpjsbP3Z5k0LPBq7/Ogp9tyVYIsqKiqcGDQKAB3npP8p4zcUrSDXXDx5bY/gyzcfqiNrx3YMDxvY3NmohefOZsLF/YipfeOoIPDg/n9KxgPPXLmjG90dSora3Jm/Eef/7BVfrO3omLVs/Cc5u7MaujCS2NXiyY1ZT3z8AonIX8/KqByTruYlCuuVesF9C+ffvwxS9+EXfccQfmz5+f8/sGB0N5t0YoBWPhOPqGwrh45Uz09wdN34tEtAV0YGg87XsDIxH8+qn3AABPvrofZ8xvBsdxGBoa16+xvue9AwMAgINHhvDXLd22zdie39Kdcbz/fPVpWHVKGz44MoxmvwcCz2F2ux8/++qFcIk8EpKCWELB6EgYizs0AWtr8uqWQXA0gss/MgvrPzoPzX4PxqMJeN0ivG7BNF7j54MjWjM7yLL+Olv2BIPN4xF5/fv3f+VC1HlENPk9+M2dl9r+PHLhf1+3AglJzvje+79yIXweAQLP44z5LUhISk7PCodSQjs6HEZ0XPt9//z2iyDwXEHjNXLdBfPxv644FcHRCL75yVVwiULe9zRalxMdTyUIBBom5biLQbHnzvOc48a5IqGid955B5/97Gfx9a9/Hdddd10lhjBhulkBWEe6UrNUR7uFeseBQcTiMj521mwc7g3qcYRMLZ9Z47ZQNIFYXC7ILTKjtQ4+j4iWBg8WdDRi7vQG8ByHOq8LLlFAndelH2Yzf0YjfnPnpaYDskWBA8dx6Gith88joq3JB7+hf48dzFIx7pCZb9/rFnU/tbEtst+X39GHTrhEXk+7dMLvc+kuKe363PZDbpMLyJj+K6YVuxWCwPO6W7HO0CY7HygNlMiFsgtAT08Pbr31Vvz4xz9GV1dXuR9fNA6eGAPHAfNm2AgAy2ax6cLGcu2v6pwHj0vAK+8eA2COAVjFgH01OGrv4jGyaJa9f7mQvHfjwlzIghLSBSB1H7bgCgKnB8vrJ1kvFo8r1ea6kG6lBFEtlP2v98EHH0QsFsPGjRuxfv16rF+/Pq8YQLVwqCeImW31tgFTUbcA0nfqLIOnsd6NzuXTsfX9PoQiCVMWkKoC0xo9ae/NRQCWzmmxfd3lyv9XbZcNlA92AsCygASO04Pl9TnuvKsFtsufSDEZQVQDZf/P+853voPvfOc75X5sUVFVFYd6xhwPkciUwsiasPEch4tXz8Kr20/gjfd6cOq81MLdNxKBz+MCYA7GDjgIgChwkJL58k6LaSGLVa4uESdYrKbB0MiMucd43igAk8sCYBXNE0klJYhqgP6CC2BgNIpQJGFqAGeEuQVsBSAh62mEc6c3YPGsJrz67nGTBXDXf/zdNtDdNxJJew0w94p3ajFQiB95ogLw9ZtW47oLF5jGxBZNgZ+8FgCgWQETqSYmiGqA/oILgHUAtbaAYKQsALsunIop//6SM2ehdziCPd1DpuuGg9G0itHhYMz03saka8XYKtipZ0xBAjBBF9Ccdj/WfXSB6TWjBcDuP9liAIBWQzGRamKCqAZIAArgUM8YREE7AtIOFgOwOwc3lpBN7YTPXtoOv8+FV949brouGpcxq60eM1vNRwcumtmkfz6tURMI4+EiHpeDC6gAAWhvcT62sFCYBcBxhiDwJLQAPC6BXEDEpIf+ggvg0IkxzJvud1wA+AwHucQTclo3yQvP6LD176sq0OQ3B4NPmZ0SgNbkwn/W0oAeaHWyAArJVmFpocVEMHT59E1mC0AkFxAx+Zl8W68KIysKunuDuOiMmY7XcBwHUeBMFoCiqjh8MoihYCzNT79m9Sw8/+aRtHYQdV4RHksswNh3qDVpAYyNxxFo9iEYTmRsMwwATfX5Leq3XrfCNtOpUPQun7I6aYPAgHbIjJ2LjyAmEyQAedIzEEY8oTgGgBmiwEOSVBw4Poqt7/fh7Q/7MBzUsnqWzzenarY3+7BiYSveO6j16PnEJYtx8UfmovvYELZ9OGC6dkayhcKyuc1YubgNL7x1FLMC9ZBkBQdPjCHQ5DVV8FrJd1d/1tJ2/fN8xcOOlAWgItDsQ51ncp7J2tbkK8r5CQRRSUgA8uSgXgGcWQDcIo8X3z6KF98+ClHgsGJBK/w+F472hWwbkV121mxdAOq8IuZMb8DJvjE0GBrNnXNqO9qavfj57RfBJfIQBR73f+VC+H0urFjYikvPmo16rwt3//O5+OKPXyvirDWK4RISDS2fL141C2cvDUzodK5K8U9XLrM7w4cgJhUkAHlyqGcMdR4R7S2ZW/r+4xXLcLg3iECzD6tPaUOd14UnXz+Io30h29jAGYtaMWNaHU4OhU3ZN8y3v3x+C25ZvwIA4POkFky2e+Y5TnelTOTA81Kjt8mQVS0TaBK6f4D0840JYjJCApAnh06MYUFHQ9Z+66uXBLB6ScD02px2rSHTicGw7XtaGjw4ORSGz5AVw/rZl6otcrlZPKsJL71zDDPbip9hRBBEfpAA5EE8IeNY/ziu6pxb0PvnTNf6BrFYgJUzlwTw/uFhzGxNpZeyFMmpEnA897TpWNDRUJIUU4Ig8oMEIA+O9IagqCoWzMjs/3fCmK9vx6VnzsJFKztMLhy978wk9JM7QYs/QVQHJAB5oAeAs2QAOcFzHD575TIEHISA4zjT4t9U70GT342uznm4eNWsgp5JEAThBAlAHnT3jKGlwYNmf+HZMBetdK4fsMKybj6+ZlHBzyMIgnCCBCAPDvaMZU3/JIhq4fs3n1NQG3CidiAByJFQJIG+4QguPKOj0kPJi7nT/ViSPF+XqC1mt1fP+dlEdVITAhBLyNh1cBATyaQ8MaCd2evUAbRa+X//6ZxKD4EgiCqlJgRgy+6T+P3zH074Pm6Rx/wcBKAYLROKQTlbLKxa3IZ6X038ORHElIFTM51GXoUMDoZsD0vJhKqqODkUzvt9Vvw+V1p3zlIRCDSgvz9Y8PsTkgyAm7TpoxOd/2SH5l+78y/23HmeQ2urvTuwJrZsHMeho9W+d/9UpZrbQRAEUR1Mzu0hQRAEMWFIAAiCIGoUEgCCIIgahQSAIAiiRiEBIAiCqFFIAAiCIGqUSZcGyvOZD2KZStTSXO2g+dP8a5Vizj3TvSZdIRhBEARRHMgFRBAEUaOQABAEQdQoJAAEQRA1CgkAQRBEjUICQBAEUaOQABAEQdQoJAAEQRA1CgkAQRBEjUICQBAEUaOQAFQJoVAIV199NY4dOwYA2Lx5M9atW4e1a9fivvvuq/DoSsvPfvYzdHV1oaurC/feey+A2pr/T3/6U1x11VXo6urCQw89BKC25s+45557cOeddwKorflv2LABXV1dWL9+PdavX48dO3aUb/4qUXG2b9+uXn311ery5cvVo0ePqpFIRF2zZo165MgRNZFIqDfffLP66quvVnqYJeGNN95Q/+Ef/kGNxWJqPB5XP/OZz6jPPPNMzcz/zTffVG+66SY1kUiokUhEveSSS9T333+/ZubP2Lx5s3ruueeq3/zmN2vq719RFPWCCy5QE4mE/lo5508WQBXw6KOP4nvf+x7a29sBADt37sS8efMwZ84ciKKIdevW4fnnn6/wKEtDIBDAnXfeCbfbDZfLhUWLFqG7u7tm5n/OOefg97//PURRxODgIGRZxtjYWM3MHwBGRkZw33334ZZbbgFQW3//Bw8eBADcfPPNuOaaa/CHP/yhrPMnAagC7r77bpx99tn61319fQgEAvrX7e3t6O3trcTQSs4pp5yCVatWAQC6u7vx17/+FRzH1cz8AcDlcuH+++9HV1cXOjs7a+r3DwDf/e53cfvtt6OxsRFAbf39j42NobOzEz//+c/x29/+Fo888ghOnDhRtvmTAFQhiqKA41ItXFVVNX09Fdm3bx9uvvlm3HHHHZgzZ07Nzf+2227Dli1b0NPTg+7u7pqZ/2OPPYaOjg50dnbqr9XS3//q1atx7733oqGhAdOmTcMNN9yA+++/v2zzn3TnAdQCM2bMQH9/v/51f3+/7h6airzzzju47bbbcNddd6Grqwtbt26tmfkfOHAA8Xgcp556Knw+H9auXYvnn38egiDo10zl+T/33HPo7+/H+vXrMTo6inA4jOPHj9fM/N9++20kEgldAFVVxaxZs8r2908WQBWycuVKHDp0CIcPH4Ysy3j22Wdx0UUXVXpYJaGnpwe33norfvzjH6OrqwtAbc3/2LFj+M53voN4PI54PI6XXnoJN910U83M/6GHHsKzzz6Lp556CrfddhsuvfRSPPDAAzUz/2AwiHvvvRexWAyhUAhPPPEEvva1r5Vt/mQBVCEejwcbN27El7/8ZcRiMaxZswZXXHFFpYdVEh588EHEYjFs3LhRf+2mm26qmfmvWbMGO3fuxLXXXgtBELB27Vp0dXVh2rRpNTF/O2rp7/+SSy7Bjh07cO2110JRFHzqU5/C6tWryzZ/OhGMIAiiRiEXEEEQRI1CAkAQBFGjkAAQBEHUKCQABEEQNQoJAEEQRI1CAkAQBFGjkAAQNcnNN9+MoaGhCV/z5ptv4uqrr876vKVLl9re66WXXsIPfvADAFpb4Oeffx7Hjh3D6tWrs96TICYKFYIRNckbb7xRlGsmymWXXYbLLrus5M8hCDvIAiBqjm9961sAgH/8x3/E1q1bsWHDBqxbtw7XXHMNnnzyybRrenp68Morr+Cmm27C9ddfj4svvhg/+clP8n7uT37yE1x33XVYv349XnnlFQDA448/ji9+8YtFmRdB5AtZAETN8aMf/QiPP/44fve73+ETn/gE7rjjDqxduxa9vb248cYbMW/ePNM1LS0tuOOOO7Bx40bMnz8fvb29uOSSS/CZz3wmr+fOnj0b3//+97F3715s2LABf/3rX0s0Q4LIDRIAomY5cOAAYrEY1q5dCwCYPn061q5di9dff93kg+c4Dr/85S/x6quv4tlnn8WBAwegqioikUhez/vkJz8JAFiyZAkWLVqEd999t3iTIYgCIBcQUbNwHJfWZ11VVUiSZHotHA7juuuuw+7du3HaaafhjjvugCiKyLeNFs+n/t0URYEo0v6LqCwkAERNIggCZs2aBVEU8cILLwAAent78be//Q3nn3++fo0kSTh8+DBCoRC++tWv4tJLL8Wbb76JeDwORVHyeuYTTzwBANi9ezeOHDmClStXFndSBJEntAUhapIrrrgCn/3sZ/GLX/wCP/jBD/Bv//ZvkGUZt956K8477zz9mg0bNuCnP/0pLr74Ylx55ZVwu91YsmQJFi9ejMOHD8Ptduf8zKNHj+Laa68Fx3H413/9VzQ3N5dodgSRG9QOmiAIokYhC4AgisADDzyAZ555xvZ7n/vc53DNNdeUeUQEkR2yAAiCIGoUCgITBEHUKCQABEEQNQoJAEEQRI1CAkAQBFGjkAAQBEHUKP8/4tJAb6qNkeYAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with pandas df\n",
    "sns.lineplot(data=pandas_tips, x=\"total_bill\", y=\"tip\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<seaborn.axisgrid.FacetGrid at 0x7fc3bbd7f7c0>"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVwAAAFcCAYAAACEFgYsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA09klEQVR4nO3deXyU9b0v8M/s+2SZzGQjCVvYIYlVBLVE0RPRGEWKFnsvevRcX3qvhVt6XrUeDkfvrV0or/ZQl3Laq61tbe/tS1tQjEg9RcUlLIpCIGwBspNMJsskM5lk1uf+ERIBWTLJPPPMPPN5v159lSST3/P9meSTJ7/ntygEQRBARESiU0pdABFRqmDgEhHFCQOXiChOGLhERHHCwCUiihMGLhFRnKjFbHz16tXo6emBWj18mR/84AcYGBjAT37yE/j9ftxxxx1Yt26dmCUQESUM0QJXEAQ0Njbi/fffHw3coaEhLFu2DK+++ipyc3Px2GOPYffu3SgvLx9zu93dXkQi8pk6nJFhRG+vT+oyRCX3Psq9fwD7GA273XLZj4kWuGfOnAEAPPLII3C73bj//vsxY8YMFBUVoaCgAABQVVWFnTt3RhW4cqNWq6QuQXRy76Pc+wewj7Ei2hhuf38/Fi9ejF/+8pf43e9+hz//+c84e/Ys7Hb76GscDgecTqdYJRARJRTR7nDLyspQVlY2+vbKlSvx/PPP42tf+9ro+wRBgEKhiKpdm80csxoTxZX+BJELufdR7v0D2MdYEC1wP/vsMwSDQSxevBjAcLjm5+fD5XKNvsblcsHhcETVrtzGcO12C1wuj9RliErufZR7/wD2Mdp2Lke0IQWPx4NNmzbB7/fD6/Vi27Zt+O53v4uGhgY0NTUhHA6juroaS5YsEasEIqKEItod7i233IJDhw5h+fLliEQi+Na3voWysjJs3LgRa9asgd/vR3l5OZYtWyZWCURECUWRbNszckgh+ci9j3LvH8A+RtvO5XClGRFRnDBwiYjihIFLRBQnDFwiojhh4BIRxQkDl4goTkTdnpHiKy3dCK0mNhtwBIJh9LnlvTsUUbwxcGVEq1Hhpa2HYtLWoytKYtIOEX2JQwpERHHCwCUiihMGLhFRnDBwiYjihIFLRBQnDFwiojhh4BIRxQkDl4goThi4RERxwsAlIooTBi4RUZwwcImI4oSBS0QUJwxcIqI4YeASEcUJA5eIKE64AbmE0tKNAAC73SJxJUQUDwxcCWk1Kvzfvx3HwIA/Ju3xlAaixMYhBSKiOOEdboqKRAT4/CGolArotSooFAqpSyKSPQZuinF7/DjZ6kZn7yAiwvD79FoVChxmTM9Pg0bNP3qIxMLATRGCIOB4sxv1rX3QqJWYnGOF1aRBOCKgs3cQ9a19aHZ6cO1MB2xpeqnLJZIlBm4KiAgCPj/pwtkuHwodZsybkgn1eXeyU3Kt6PX48UW9CzV1Hbim2C5htUTyxb8fZU4QBBw61Y2zXT7MKcpAaXHWBWE7IsOiw00LcpFh0eHzehc+P94pQbVE8sbAlbnGDg9aOr2YMSkN0yelXfG1WrUK18/OhsWowcY/7IezxxenKolSAwNXxvoGAjjS0ANHhgEzC9PH9DkatRLXz86GSqnEr96sQzAUEbdIohTCwJWp4aGELmjUSlxTnBXVtC+DTo3vrCpDk9ODt/c0ilckUYph4MpUQ7sHbm8A86fYoNWoov786+flYtHcbLy9pwlnuwZEqJAo9TBwZSgQCuNEixv2dD3ysozjbmfV0mLotSr86T9PQhCEGFZIlJoYuDJ0qrUPwVAEc4oyJrSCzGrS4u6bpuBYUy+ONPTEsEKi1MTAlZmhQBhn2j2YZDchzaybcHu3lOXDnq7H6++fQoR3uUQTwsCVmTNn+xGJCJhRkB6T9tQqJe5dMhWtrgF8fsIVkzaJUhUDV0a8g0E0dvQjz2aE2aCJWbsLZ2UjJ9OIt2oaOZZLNAEMXBl5d28jQmEBxVdZ4BAtpVKBysVFaOn04tCp7pi2TZRKGLgyIQgC3q5phM2qi8nY7cUWzc1GhkWHvx9oiXnbRKmCgSsTzp5BdPb4MCXXKkr7KqUSt5Tl42hjL+flEo0TA1cmGjs8sKXpkWMb/7zbq1lSmge1Soldn7eKdg0iOWPgysCgP4RO9yBuW1gIpYgnN1iNWlw/24Gawx3wDYVEuw6RXDFwZaCl0wsAuO26QtGvtfRrk+APhvHJkXbRr0UkNwzcJCcIApo7vchK0yPHZhL9elNyrZiWZ8V7B1q5EIIoSgzcJNfr8cM3FEKBwxy3ay792iQ4ewdxotkdt2sSyQEDN8m1dQ1AqVQgJ1O8h2UXu2aGHXqtCjUcViCKCgM3iUUEAW1dA8jOMMT1tF2dRoXrZjnw2QkX/IFw3K5LlOxE/yn96U9/iqeeegoAUFNTg6qqKlRUVGDz5s1iX1r2uvqGEAhGkG8Xf+z2YjfOz4U/EMaBkzz7jGisRA3cPXv2YNu2bQCAoaEhrF+/Hlu2bMGOHTtw5MgR7N69W8zLy15Htw8qpQLZ6Ya4X7t4Uhqy0vSoOdIR92sTJSvRAtftdmPz5s14/PHHAQC1tbUoKipCQUEB1Go1qqqqsHPnTrEuL3uCIKCjxwdHhgEqVfxHhhQKBW6Yl4Njjb3o6R+K+/WJkpFarIaffvpprFu3Du3tww9WOjs7YbfbRz/ucDjgdDqjbtdmi9/T+HgxmaLf+6C7bxBDgTCKcq0XfP542rocu91yxY9XlU/H9k8aUdvYi/tunTGhtpKd3PsHsI+xIErgvv7668jNzcXixYuxdetWAEAkErng9AFBEMZ1GkF3txeRiDzmf458cQcG/FF/bkNbHxQA0o2aCz5/PG1djsvlueLHVQBmTErDf+5rQvn8nMt+Pe12y1XbSmZy7x/APkbbzuWIErg7duyAy+XCPffcg76+Pvh8PrS1tUGl+vIwQ5fLBYfDIcblU0JHjw+ZVt24DoiMpevnZOPVd0+irWsAk+zy++uDKJZEGfx75ZVXUF1djTfffBNr167F0qVL8fLLL6OhoQFNTU0Ih8Oorq7GkiVLxLi87A0MBeHxBeM69/ZyrpnpgEIBfHqMsxWIriZuT1t0Oh02btyINWvW4M4778TUqVOxbNmyeF1eVjq6fQAg6s5gY5Vm0mJmQTo+O9HJ0yCIrkK0h2YjVqxYgRUrVgAAFi9ejO3bt4t9Sdnr6PHBYtTApI/dMToTcd3sbLz6txNocw1gUhyXGBMlG9EDl2IrEAyju9+PGTE+Rudi4XBkzE9sKxZPwZ/ePYG6FjfK5uZesi0iYuAmna6+4TmvjgxxFzuoVEq8tPXQmF+fadXj7Y/PwDfg/8pshUdXlMS6PKKkxL0UkozLPQi1SoF0S+zPLZuIvCwTvIMh9PuCUpdClLAYuEnG5R5CVppe1JMdxiP33IyJdp53RnRZDNwkMjAYhM8fgl2CvROuRqdVwWbVo73HJ3UpRAmLgZtEXO5BAEjIwAWAHJsBHl8QA4McViC6FAZuEnH1DcGgU8GkT8xnnSMLMTp4l0t0SQzcJCEIArrcQ7CnGca1B0U8mPQaWIwaBi7RZTBwk4TbG0AwHIE9XS91KVeUm2lEd78f/iBPgiC6GAM3SYyM32Yl6PjtiJHlxs7eQYkrIUo8DNwk4XIPIs2khU7i3cGuJs2khV6rGt3vgYi+xMBNAuFIBL0eP2xpiT2cAAyfBJGTaYTLPcglvUQXYeAmAbcngIgA2KyJH7jA8GyFcESAq49H7xCdj4GbBLrPnRlmsybWct7LyUrTQ61ScFiB6CIM3CTQ3T8Ei1Ej+ekOY6VUKpCdYURHr4975BKdh4Gb4CKCMDx+myTDCSNyMg0IBCPo8cTujDWiZMfATXD9AwGEwkLSDCeMcGQYoVBw1RnR+Ri4Ca773IOnzCS7w9WolbBZ9XAycIlGMXATXHe/H0a9GgZdYu6fcCU5mUZ4B0M46/JKXQpRQmDgJjBBENDTP5R047cjsjOHV8XtP+qUuBKixMDATWDewSACoUjSjd+OGNnM5tOjHVKXQpQQGLgJrLt/+Al/so3fni87w4i6M93wDXGPXCIGbgLr6R+CTqNM2P1vxyIn04BwRMDhMz1Sl0IkOQZuAuv1+JFh0Sfs/rdjkWHRwWrS4tCpLqlLIZIcAzdBBYJhDAyFkJFgp/NGS6FQ4NrZ2Th8phvhCDezodTGwE1QvedWaCV74ALAwjk5GBgK4VRrn9SlEEmKgZugRgI33ayVuJKJK5tph0qpwKFT3VKXQiQpBm6C6vH4YTVpoVYl/5fIqNdgVmE6DnIcl1Jc8v80y5AgCHB7/ciUwXDCiJLpWejo8XGpL6U0Bm4C8gwGEQoLshi/HVEyPQsAeJdLKY2Bm4Dk9MBshD3dgPwsE6eHUUpj4Cag3n4/NOrkXvBwKSXTs3CypY+rzihlMXATUK/XjwyLLqkXPFxK6fQsRASuOqPUxcBNMMFQBB5fEBlm+QwnjJiaZ4XZoOGwAqUsBm6CcXvlN347QqlUoGSajavOKGUxcBPMSOCmW5J/wcOllEzP4qozSlkM3ATj9gZg1KuhVSfHCb3Rmjslk6vOKGUxcBOM2+uXxXLeyzHo1Fx1RimLgZtA/IEwBv1hpMvwgdn5uOqMUhUDN4G4B0Y2rJF34JZy1RmlKAZuAnF7AwCANJN8hxQAICvdgHw7V51R6mHgJhC3xw+zQQONWv5fltJzq84GuOqMUoj8f7KTiNsbkPUDs/OVnFt1doSrziiFMHATxJA/BH9Q/g/MRkzNtcJi5KozSi0M3AQxMn6bKne4SqUCC6Zy1RmlFgZughhZYWaV+QOz83HVGaUaBm6CcHsDsBg1sjhSZ6zmTsmEWqXg9DBKGanz053ARo7UkeMOYVdi0KkxszADB7nMl1IEAzcBDAXCCIQiSEuR8dvzlU7PgrPHhw6uOqMUwMBNAH0Dww/MUmn8dkTJNBsAcLYCpQRRA/e5557DnXfeicrKSrzyyisAgJqaGlRVVaGiogKbN28W8/JJo/9c4KYZUy9wueqMUologbt//37s3bsX27dvx1//+le8+uqrOH78ONavX48tW7Zgx44dOHLkCHbv3i1WCUmjbyAAk14NdQqsMLsUrjqjVCHaT/jChQvxhz/8AWq1Gt3d3QiHw+jv70dRUREKCgqgVqtRVVWFnTt3ilVC0ugfCKTkcMKIktGzzvjwjORN1GNhNRoNnn/+efz2t7/FsmXL0NnZCbvdPvpxh8MBp9MZVZs2mznWZUoqGIpgYCiEqZPSYTJNfJZCLNqIdVvhcAR2u+WyH8+0mZFmPozjLX2oKi8eU3uqBJs+d6X+yQX7OHGin8O9du1aPProo3j88cfR2Nh4wUm0giBEfTJtd7cXkYgQ6zIlYbdb4PYMAQAMGiUGzm3POBGxaCPWbalUSry09dAVX2M1arGnth2//utBKK/yPfHoihK4XJ6Y1BYLdrsloeoRA/sYXTuXI9ptwunTp3Hs2DEAgMFgQEVFBfbt2weXyzX6GpfLBYfDIVYJSaHXMxxqct+S8WqyMw0IhiPo6Y/dLwyiRCNa4La2tmLDhg0IBAIIBALYtWsXVq1ahYaGBjQ1NSEcDqO6uhpLliwRq4Sk4Pb4oVUrodfK8wyzsXKkGaBUgKdAkKyJNqRQXl6O2tpaLF++HCqVChUVFaisrERmZibWrFkDv9+P8vJyLFu2TKwSkkKvxw+rSRv10IrcqNVK2NL06OjxYc7kjJT/70HyJOoY7po1a7BmzZoL3rd48WJs375dzMsmjXA4ArfXjyk58n8YMRY5mUYcPtMDz2AQ1hSck0zyl1iPelNMm2v4AWAqTwk7X67NCABo7+KwAskTA1dCZ872A+ADsxF6rRo2qw5nuwekLoVIFAxcCTW09UGpUMBs0EhdSsLItZng8QXh9XHVGckPA1dCZ872Ic2shVLJB0QjRoYVeJdLcsTAlYggCGg424cMq17qUhKKQadGhkWH9m6O45L8MHAl4vYG0OcNIMOSWpuOj0WuzYi+gQA3syHZYeBKpKVzeAlhOgP3K/I4W4FkioErkZZOLwCkzLHo0TDqNUgzaTmOS7LDwJVIs9OLHJsRWk1qL+m9nLwsI9zeAHz+kNSlEMXMmAJ3/fr1X3nf2rVrY15MKmnu9GJKXprUZSSsXJsJANDOu1ySkSsu7X3mmWfgdDpx4MAB9PT0jL4/FAqhpaVF9OLkaigQQmePD7deVwhBkMdWk7FmNmhgNWrQ3uXDNP5iIpm4YuCuXLkS9fX1OHHiBG6//fbR96tUKpSWlopdm2y1ugYgAJiaZ8Xptj6py0lYuVkmnGh2Y9AfgkEn+tbNRKK74nfx/PnzMX/+fNxwww3IycmJV02y1+IcnqEwJT+NgXsF+ecCt61rANPzeZdLyW9Mtw3t7e343ve+h76+vgv+BH7rrbdEK0zOmju9MOnVsKcbpC4loZkNGqSbtWhzMXBJHsYUuE8//TRWrFiBOXPmcJ/SGGh2elHgMPO/5RhMsptxpKEHHl8AFm7ZSEluTIGrVqvx8MMPi11LSohEBLS5vLi5LF/qUpJCXpYRRxp60OYawKwiBi4ltzFNCysuLsaJEyfEriUlOHt9CIQiKHDI6/Rhsei1amSl6dHaNcAZHZT0xnSH29LSgm984xvIy8uDTvflyiiO4Uav2Tm8woyBO3aT7CYcPNUNN/eeoCQ3psBdt26d2HWkjOZOD1RKBfKyTFKXkjRybSbUnu5Gq8vLwKWkNqbAnTFjhth1pIwWpxf5WSaoVVxVPVYatRLZmUa0dQ1g7pRMqcshGrcxBe6iRYugUCggCMLok3W73Y4PP/xQ1OLkqLnTi/lTGRrRyreb0N7tQ5d7SOpSiMZtTIF7/Pjx0X8HAgFUV1ejoaFBtKLkqs/rR/9AAIUOntIbrewMA9QqBdq6vFKXQjRuUf9dq9VqsWLFCnzyySdi1CNrI1syFmbzgVm0VEolcm3Dd7n+YFjqcojGZUx3uG63e/TfgiDgyJEj6O/vF6sm2Wru5AyFiSiwm9DS6cWew+2YW8CVZ5R8oh7DBQCbzYZ//dd/FbUwOWp2emCz6mHU85Te8bCl6WHUqfH3/U2YW7BA6nKIohb1GC6NX0unl8MJE6BQKFCQbcah+i50uQeRxb0oKMmMaQw3EongpZdewurVq/HAAw/gxRdfRCjEnfij4Q+E0dHt43DCBA3vQQF8fLhd6lKIojamwP35z3+OvXv34qGHHsLDDz+ML774Aps2bRK7Nllp7fJCAFCYzRkKE2HUqVFabMcnh9sR4VJfSjJjCtyPPvoIv/rVr3DbbbehoqIC//Ef/8E5uFFqObekt5B3uBP2DwuL0N3vx7GmXqlLIYrKmAJXEARoNF8+6NFqtRe8TVfX0umFQaeGLU0vdSlJ7/p5OTDp1fi4lsMKlFzGFLizZs3Cj3/8YzQ3N6OlpQU//vGPudw3Ss2dHhRyD9yY0GpUWDQnBwdOuDAwFJS6HKIxG1PgPvPMM+jv78eqVatw3333obe3F//2b/8mdm2yEYkIaO0cQAFnKMTMTQtyEQpHsO+oU+pSiMbsioEbCATw/e9/H3v27MHGjRtRU1ODBQsWQKVSwWxmeIxVp3sQ/mCYMxRiqCjHgkKHGR9xWIGSyBUD9/nnn4fX68U111wz+r5nn30W/f39eOGFF0QvTi6azx0ayT0UYuvrJXlo6vCM/vclSnRXDNwPPvgAP//5z2Gz2Ubfl52djU2bNuHvf/+76MXJRUunl3vgimDR3Gxo1Uq893mb1KUQjckVA1ej0UCv/+pTdbPZDK2W50uNVbPTi1ybCRo198CNJZNeg0Vzs7H3aAd8fHhGSeCKCaBUKuH1fnU7PK/Xy5VmUWjp9HBJr0iWXjMJgWAEHx/ukLoUoqu6YuDedddd2LBhA3w+3+j7fD4fNmzYgIqKCtGLk4P+gQDc3gAXPIikMNuCaflWvP95K1eeUcK7YuA+9NBDsFgsuPHGG3H//fdj5cqVuPHGG2G1WvHEE0/Eq8akNrIHbgGX9Ipm6TWT4OwdxLFGrjyjxHbF3cKUSiWeffZZPP7446irq4NSqcSCBQvgcDjiVV/Sa+4cfoLOKWHiuXamA3/eVY/3Pm/lmWeU0Ma0PWN+fj7y8/PFrkWWWpxeZFp1MBu4FFosGrUSS0rysGNvE7r7hrh8mhIWH5uLrLnTy/m3cXBz6fANwQcHOUWMEhcDV0SBYBjt3QMcTogDW5oepdOz8OGhswiGIlKXQ3RJDFwRtXUNQBB4aGS8LL1mEjy+IPYf4/4KlJgYuCLiDIX4mjM5A/l2E/62v2X0/D2iRMLAFVGz0wODToUsPsSJC4VCgduvK0Sry4ujnCJGCYiBK6LmTi8K7GYouQdu3Fw/JxtpZi127m+WuhSir2DgiiQiCGjp9KKAMxTiSqNW4ravTUJdQ8/okA5RohjTPFyKnss9CH8gzE3HRRAOR2C3X/4X2crbZuLtPU3YXduOdQ9cc9nXAcMzSfrcviu+hihWGLgiGT00koEbcyqVEi9tPXTF1+TZjHj/QAtUEGDQXf7b/NEVJbEuj+iyRB1SePHFF1FZWYnKysrRY9VrampQVVWFiooKbN68WczLS6q50wulQoF87oErial5VggC0NDeL3UpRKNEC9yamhp8/PHH2LZtG9544w3U1dWhuroa69evx5YtW7Bjxw4cOXIEu3fvFqsESbU4PcjNMkKjVkldSkoy6jXIyzKiscPDhRCUMEQLXLvdjqeeemr0SPVp06ahsbERRUVFKCgogFqtRlVVFXbu3ClWCZIaXtLL4QQpTc9LQygsoKmDR/BQYhAtcIuLi1FaWgoAaGxsxDvvvAOFQgG73T76GofDAadTfquCPL4Aej1+zlCQWLpFB3u6HqfO9iEU5l0uSU/0h2b19fV47LHH8OSTT0KlUqGxsXH0Y4IgQBHlHFWbLfHvGs+edAEA5s+wX/Fp+giTSReza6dCW9G0V1Jsx98/bUFH7xBmFmVc8jVj+RpdTSzaSHTs48SJGrgHDhzA2rVrsX79elRWVmL//v1wuVyjH3e5XFHvrdvd7UUkktjLNg+dHL5rT9Or4XJd/s/ZkS/uwIA/ZtdOhbaiac+oVcFm1aPuTDdyM/VQKb/6R92VvkZjYbdbJtxGomMfo2vnckQbUmhvb8cTTzyBn/3sZ6isrAQAlJSUoKGhAU1NTQiHw6iursaSJUvEKkEyzU4vbNwDN2HMLEiDPxhGs5MLIUhaot3h/uY3v4Hf78fGjRtH37dq1Sps3LgRa9asgd/vR3l5OZYtWyZWCZJp6vCgkBvWJAxbmh6ZFh3qW/tQmG2BSsml1iQN0QJ3w4YN2LBhwyU/tn37drEuK7mhQAjOHh8WzcmWuhQ6R6FQYEZBOvYedaKl04vJOfxlSNLgXgox1tLphQDwDjfB2NP1SDdrcaq1L+GfAZB8MXBjbGScsIh3UQlFoVBgZkE6fP4QWlwcyyVpMHBjrKnDA4tRg3SzVupS6CKODAPSzVqcbHYjHOG8XIo/Bm6MNTs9KMq2RD2/mMSnUCgwuygDg4EwGtvlPcWJEhMDN4aCoQjaugY4fpvA7OkG2NP1qG/t4x4LFHcM3Bg62zWAcETg+G2Cm12YgUAogtNn+6QuhVIMAzeGmpzDf6ZyD9zElm7RIddmxOm2frg9sV0BR3QlDNwYajp3aKQ93SB1KXQVswszEIkIeH3XSalLoRTCwI2h5g4PChwWHhqZBMxGDQqyzdhR04gu96DU5VCKYODGSCQyfGhkER+YJY2ZBelQKICtH52RuhRKEQzcGGnv8SEQinD8NokYdGosL5+GvXVOnG7jAzQSHwM3RprPPTDjHW5yue/WGUgzafH/dtUjInDJL4mLgRsjTR0eaNRK5GYZpS6FomDQqfGN8mk4c7Yf+47K7/QRSiwM3BhpdnowyW665AbXlNhumJ+DyTkW/OWD0/AHwlKXQzLGdIgBQRDQ5OQDs2SlVCjwwG3F6PX48c6+JqnLIRlj4MaAq28Ig/4Ql/QmseJJ6Vg424F39jWju29I6nJIphi4MdDcMbLCjIGbzO67eToA4LX3T0lcCckVAzcGGjr6oVIqUOAwSV0KTYAtTY/KxUX49HgnjpzplrockiEGbgw0tnswyW6GRq2SuhSaoDuuL0J2phF/fPckAkE+QKPYYuBOUEQQ0NjRjym5HE6QA41aiQcrZqDTPYi39/ABGsUWA3eCnD0+DPrDmJxrlboUipHZkzOxeG42duxtQnv3gNTlkIwwcCdo5OSAKQxcWbl/aTF0GhVe/dsJCFyBRjHCwJ2gho5+aNVK5HGFmaykmbRYefM0HG92Y09dh9TlkEwwcCeosd2DwhwLV5jJ0JLSPEzLs+LPu06hfyAgdTkkA2qpC0hm4UgEzU4PykvzpS6FxikcjsBuv/wDz3/+r9fif/77B3h99xk89dB1l32d3W5BIBhGn9snRpkkEwzcCWhzDSAQinCGQhJTqZR4aeuhK75men4aPqk9i//9f2qQl/XVudYmkw4DA348uqJErDJJJvh38AQ0nlthxhkK8jYt34p0sxa1p7vh59xcmgAG7gQ0tPfDoFPDkcEzzORMqVCgtDgLoXAEh09zBRqNHwN3Ahra+zE5h2eYpQKrUYuZhek42+3D2S7OzaXxYeCOUzAURptrgPNvU8i0/DSkmYaHFoYCIanLoSTEh2ZRSks3QqtR4WhDN8IRAaWzsq/4lJvkQ6lQ4JoZWfjwUDu+qO/CojnZUPCvG4oCAzdKWo0KL209hFPnDh38rK4dR+o7x9UWn2onH4tRi7mTM1B7pgcN7R5MzeNfODR2HFIYp16PH0adGnotdwhLNUU5FmRnGHC0sYcLIigqDNxxEAQBPf1+ZFh0UpdCElAoFCidngWNWokDJ10IhyNSl0RJgoE7DoP+MPzBMDKtDNxUpdOqUFacBY8viIP1LqnLoSTBMdxx6PUMn3nFO9zU5sgwYkquBSeb3UgzaqUuh5IAA3ccejx+qJQKWPlDlvLmTM5E30AQX9S70NE9AI7o05VwSGEcej1+pJu1UCo5JSjVqZQK3LggFwDw01c/QzDE8Vy6PAZulPzBMPoGAsiw6KUuhRKE2ahFWXEWTrW48dp7PPGXLo+BG6VTLW4IApDJ8Vs6T67NhOXl07Dr81bsP+aUuhxKUAzcKJ1o6gHAB2b0VQ9VzsG0PCteeec42rjfAl0CAzdKx5t6YdSroeOCB7qIWqXEf18+Dzq1Ei/+tRa+oaDUJVGCYeBGQRAEHG3o5nACXVamVY//ce98dPUN4dfbjyIS4QGU9CUGbhTau33o8wZgS+MDM7q8GQXp+NZtxTh8phvbPjojdTmUQDgPNwonWtwAAJuVgUtXdnNZPpqcHry9pwmF2RZcN8shdUmUAHiHG4WTLW5kWnUw6fl7iq5MoVDgv/zDTEzLt+I3bx9FS6dX6pIoATBwx0gQBJxo7sW8qVncA5XGRKNW4ol758OgU+OFv9bCO8iHaKmOgTtGLvcg3N4A5k6zSV0KJZF0sw7fvnc+3F4/frn1MELcWSylMXDH6ESzGwAwbyoDl6IzLT8ND985Gyda3PjdO8chCJy5kKo4GDlGJ1rcMBs0KMjmcToUvcVzc9DZO4g3P25AdoYBVTdOkbokkgADd4xOtrgxsyCd47c0bnffOBmdvT5s+6gB9gwDFs3JkbokijNRhxS8Xi/uuusutLa2AgBqampQVVWFiooKbN68WcxLx1R33xC6+oYwozBd6lIoiSkUCvzjHbMxoyAdv337OOpb3VKXRHEmWuAeOnQIDzzwABobGwEAQ0NDWL9+PbZs2YIdO3bgyJEj2L17t1iXj6kTLb0AgJkF6dIWQklPo1bi2yvmw2bV4YW/HkZnr0/qkiiORAvc1157Dc888wwcjuEJ37W1tSgqKkJBQQHUajWqqqqwc+dOsS4fU8eb3DDp1ZhkN0tdCsmA2aDBd+4rgSAI2Px6Lfp9PIgyVYg2hvujH/3ogrc7Oztht9tH33Y4HHA6o9/GzmaLb+gJgoBjzb0onelAdvbwkdgmU2z3Uohle6nQVqzbi0VbI23Y7WN7qGq3W/D0f1uEf/tVDba8cQQ/fPxGGHSJ/UhlrH1LZmL3MW5f4UgkcsEDJ0EQxvUAqrvbG9cNQdq6BtDdN4TpuRa4XB7Y7RYMDPhjeo1YtpcKbcW6vYm2ZTLpRttwuTxj/jy7WYvH7pmLF7cexg9e2oO1KxdArUrMmZp2uyWqviWjWPXxSqEdt69uTk4OXK4vTzd1uVyjww2JrK5heP/buZMzJa6E5Kis2I6Hls3CkYYe/HbHMUQ4R1fW4ha4JSUlaGhoQFNTE8LhMKqrq7FkyZJ4XX7c6hp6kJ1pRFa6QepSSKaWlOThG+VTsbfOidfeO8WFETIWtyEFnU6HjRs3Ys2aNfD7/SgvL8eyZcvidflxCYYiONHci68vyJO6FJK5OxcVoc8bwLuftiDNrMUd1xdJXRKJQPTAfe+990b/vXjxYmzfvl3sS8bMqbY+BEIRzJ3C4QQSl0KhwKrbiuEZDOL190/DpNdgSQl/0ctNYj8WlVhdQw9USgVmcsEDjUE4HJnwU+7vP7QQP3plH36/8zisVj1K+cteVhi4V1DX0INp+WkJP12HEoNKpcRLWw9NuJ2cdD1sVj1efO0gHr9nHq7l5uWykZhzUBJAvy+AJqeHwwkUdyqVEgtnOzCzKBO/3l6Hg/VdUpdEMcLAvYzDp7sBAPMYuCQBtUqJ//XoIhRmm7HljcOj0xMpuTFwL+NgfRfSzVoU5ch/dQ0lJqNeg3X3lyLXZsILf63FieZeqUuiCWLgXkIgGMbhhm6UFduh5HaMJCGzQYN/XlWKrHQDNr9+CMcaeaebzBi4l3C0qReBYARlxVlSl0IEq1GL7z1QBke6Ab/4Sy0On+mWuiQaJwbuJRysd8GgU2FWUYbUpRABANJMWjz5rWuQZzPh+b/U4ouTrqt/EiUcBu5FIhEBB+u7MH+qLWE3EqHUZDZo8L0HSlGUY8GWN45g/7Hod9sjaTFRLnLmbD/6fUGUFduv/mKiODPqNfjnb5ZiWp4Vv36zDrsOtEpdEkWBgXuRz+tdUCkVmM/TeSlBGXRqrPtmKUqmZ+FP/3kSf/ngNDe8SRIM3PMIgoAv6rswqygDRj1Xl1Hi0mlUeGLFPNxcmocde5vwm7ePIRSOSF0WXQVT5TzNTi+cPT7cfl2B1KUQjWlvhu/+12sxKeck/rjzOHyBML6/+lqYjdqvvC4QDKPPzfPTpMbAPc+eug6olAquXaeEEM3eDKXTbThU78J/+9F/YuFsBywXhe6jK0rEKJGixCGFcyIRAfuOOrFgmg1mg0bqcoiiUphtwQ3zchAKR/DhoXa0d/NuNhExcM851tSLvoEAFs/NkboUonGxWfVYUpIHi1GDT4934nhzLx+mJRgG7jl76jpg0KlRMp2zEyh5GXRq3Dg/B5PsJpxs6UPNkQ4M+kNSl0XnMHAB+ANhHDjpwnWz7NCoVVKXQzQhKqUSZcVZKJ1ug9sbwAcHz+KT2rNSl0XgQzMAwBenXPAHwhxOINlQKBQozLYg06rH5ydd2Pj7T/EPCwvxT3fPg2mczyjOnzHBWQ/jw8AFUHO4A5lWHYoL0qUuhSimzAYNbpqfC4NRi9d31eOjg22YNzUTuZlGKKLYCc9k0mFgwD/6Nmc9jE/KDyk4e3w40tCDJQvyuBUjyZJSqcCDd87B1xfkQqtR4rPjLnx6vJNjuxJI+cB97/M2qJQKlJfyhFSStwyLDktK8jBncgZc7iG893kbTra4EeYKtbhJ6SGFoUAIHx9ux7WzHEgz66Quh0h0SoUC0/PTkGszoq6hB8eb3Wjq8GBWUQYm2U1RDTNQ9FL6DndvnROD/hBuvWaS1KUQxZVJr8HC2dm4YV4OtBoVvqjvwoeH2tHZO8i5uyJK2cAVBAG7Pm9FYbYZ0/KtUpdDJImsND2WlOSirDgLgVAYe4868fHhDnT2+hi8IkjZIYUTzW60uQbw8B2z+GcUpTSFQoEChxn5WSY0d3pR3+rG3qOdyDBrMbMwHfZ0g9QlykbKBu47+5phNmhw/ZxsqUshSghKpQKTcywodJjR0unFyXPBm27WYs4UGzItWs7kmaCUDNymDg8On+nGiiVTodVwZRnR+ZRKBYpyLCg4F7yn2vpQc7gdBp0KU3OtKMy+8paRdHkpGbjVNY0w6NRYyodlRJc1EryF2Wa4fSEcPdOFusZenGhxQ2/Q4sY5DmRa9VKXmVRSLnAb2vtx4KQLd984mac6EI2BQqHAJIcZGSYNej1+nD7bhzc/PI3tH57G12bacUtZPmYUpPNZyBikXOJs3X0aZoMGty8slLoUoqSTYdHh2pkO3H1zMV579zg+rm3H/mOdyMsy4ebSPNwwL1fSG5m0dOOEhgkvPmEj1ntGpFTgHmnoRl1jL765dDoMupTqOlFMZWcaserWYty7ZCr2H3Pigy/a8H//Xo+/7D6N62dn48b5uSielBb3u16tRjXmUzIudvF+EUDs94xImdQJhSP4f3+vhyPdwLFbohjRaVT4+oI8fH1BHho7+vH+523Yd8yJj2rbkZWmx+K5ObhhXg6yM41Sl5oQUiZw3zvQivZuH9Z8Yz406pRd70Ekmsk5Vjx8pxUP3FaMz0+6sOdIB6prGvFWTSMm2U0oK7bjmhl2FGabU3a8NyUCt8s9iG0fNWDBNBtKp2dJXQ6RrOm1atwwLxc3zMtFr8ePT4858UV9F6r3DIdvplWH2UUZmFWYgdlFGSk100H2gSsIAv7wtxMAgNUVM1P2NyuRFDIsOlQsLETFwkJ4fAEcOtWNQ6e6cLC+C58c7gAA2NP1mJJrRVG2BYXZw9PQLj51WC5kH7jvfd6GIw09+C//MAO2tNT5TUqUaCxGLW5akIubFuQiIgho7fTieLMbJ1vcON3Wj/3HOkdfazZo4MgwDP8v3YDsDCPs6QbY0vRIMyfvijdZB67HF8Br75/C/Kk2LL0mX+pyiOgc5bkjgAqzLai4rgAA4B0MotnpQbPTC2evD529g6hv6cO+OifO30ZHpVTAZtXDlqYf/f+sc/+OqFQQBCFh/5KVdeBq1EosW1iIW782KWG/AEQ0zGzQYM7kTMyZnHnB+4OhCLr6BuFyD6K7bwhd/UPo7hv+3+GGbvR5Axe8XqVUwGLUwGrUwmLSIMOsQ7pZB6VS+gyQdeDqtWrcu2Sq1GUQ0QRo1Erk2kzItZku+fFgKIyefj+6+ofgDwvY8fEZ9PuC6Oj1oblz+DQLpVKBDLMWNqse9nQDMqw6SYYlZB24RCR/GrUK2ZlGZGcaYbdb0Hy2b/RjQ4EQevr96On3o7t/CCdb+3CytQ8atRKODANyMoxwZBjiNlU0JQJ3osv9iOhC4XDkK8tgxysUjkCtEifw9Fo18rLUyMsavjsOhiJwuQfR0TM8RtzmGoBSqUBOhgHTCtKRZtCIOvSQEoE7keV+F+Px0ESASqWM6c9UvH4+NWol8rJMyMsyQRAE9Hj8ONs1gLauAZw9eHb040UOM9LMsZ+alhKBS0R0MYXi3GwHqx5zJ2fCMxTCqVY3Wju9aOrwwGrU4M6vT4Muhje8DFwiSnlKpQJ5djPSjBoEQxG0ubzo6B3EwGAQOqMmdteJWUtERDKgUSsxOdeKRXOyMbMo8+qfEAUGLhFRnDBwiYjihIFLRBQnDFwiojiRJHDfeust3HnnnaioqMCf/vQnKUogIoq7uE8Lczqd2Lx5M7Zu3QqtVotVq1bh+uuvx/Tp0+NdChFRXMU9cGtqarBo0SKkp6cDAG6//Xbs3LkT3/72t8f0+eNddmeO4Vy6WLZl0mugECIxay9R+xnLtmLd3kTbMp73NZRrP42X+D5NlNpi1dal+giMP3MuRSEIgnD1l8XOr3/9a/h8Pqxbtw4A8Prrr6O2thbPPvtsPMsgIoq7uI/hRiKRC/amTeTNgomIYinugZuTkwOXyzX6tsvlgsPhiHcZRERxF/fAveGGG7Bnzx709PRgcHAQ7777LpYsWRLvMoiI4i7uD82ys7Oxbt06PPjggwgGg1i5ciUWLFgQ7zKIiOIu7g/NiIhSFVeaERHFCQOXiChOGLhERHHCwCUiihMGbpx5vV7cddddaG1tBTC81LmqqgoVFRXYvHmzxNVN3IsvvojKykpUVlZi06ZNAOTXx+eeew533nknKisr8corrwCQXx8B4Kc//SmeeuopAPLr3+rVq1FZWYl77rkH99xzDw4dOhSfPgoUNwcPHhTuuusuYe7cuUJLS4swODgolJeXC83NzUIwGBQeeeQR4YMPPpC6zHH75JNPhG9+85uC3+8XAoGA8OCDDwpvvfWWrPq4b98+YdWqVUIwGBQGBweFW265RTh27Jis+igIglBTUyNcf/31wve//33ZfZ9GIhHhpptuEoLB4Oj74tVH3uHG0WuvvYZnnnlmdGVdbW0tioqKUFBQALVajaqqKuzcuVPiKsfPbrfjqaeeglarhUajwbRp09DY2CirPi5cuBB/+MMfoFar0d3djXA4jP7+fln10e12Y/PmzXj88ccByO/79MyZMwCARx55BHfffTf++Mc/xq2PDNw4+tGPfoRrr7129O3Ozk7Y7fbRtx0OB5xOpxSlxURxcTFKS0sBAI2NjXjnnXegUChk1UcA0Gg0eP7551FZWYnFixfL7uv49NNPY926dbBarQDk933a39+PxYsX45e//CV+97vf4c9//jPOnj0blz4ycCUk14186uvr8cgjj+DJJ59EQUGBLPu4du1a7NmzB+3t7WhsbJRNH19//XXk5uZi8eLFo++T2/dpWVkZNm3aBIvFgszMTKxcuRLPP/98XPoY96W99CU5buRz4MABrF27FuvXr0dlZSX2798vqz6ePn0agUAAs2fPhsFgQEVFBXbu3AmVSjX6mmTu444dO+ByuXDPPfegr68PPp8PbW1tsukfAHz22WcIBoOjv1QEQUB+fn5cvk95hyuhkpISNDQ0oKmpCeFwGNXV1Um9kU97ezueeOIJ/OxnP0NlZSUA+fWxtbUVGzZsQCAQQCAQwK5du7Bq1SrZ9PGVV15BdXU13nzzTaxduxZLly7Fyy+/LJv+AYDH48GmTZvg9/vh9Xqxbds2fPe7341LH3mHKyGdToeNGzdizZo18Pv9KC8vx7Jly6Qua9x+85vfwO/3Y+PGjaPvW7Vqlaz6WF5ejtraWixfvhwqlQoVFRWorKxEZmambPp4Mbl9n95yyy04dOgQli9fjkgkgm9961soKyuLSx+5eQ0RUZxwSIGIKE4YuEREccLAJSKKEwYuEVGcMHCJiOKEgUtEFCcMXEoajzzyCHp6eib8mn379uGuu+666vVmzpx5ybZ27dqFH/7whwCGt/nbuXMnWltbUVZWdtU2KbVx4QMljU8++SQmr5moW2+9Fbfeeqvo1yH54R0uJYV/+Zd/AQA89NBD2L9/P1avXo2qqircfffdeOONN77ymvb2drz//vtYtWoVVqxYgZtvvhm/+MUvor7uL37xC9x7772455578P777wMAtm7disceeywm/aLUwjtcSgo/+clPsHXrVvz+97/H/fffjyeffBIVFRVwOp247777UFRUdMFrMjIy8OSTT2Ljxo2YPHkynE4nbrnlFjz44INRXXfSpEn4wQ9+gJMnT2L16tV45513ROohpQIGLiWV06dPw+/3o6KiAgCQnZ2NiooKfPTRRxeMoSoUCvzqV7/CBx98gOrqapw+fRqCIGBwcDCq6z3wwAMAgBkzZmDatGn44osvYtcZSjkcUqCkolAovrJPqSAICIVCF7zP5/Ph3nvvRV1dHebMmYMnn3wSarUa0W4dolR++SMSiUSgVvMehcaPgUtJQ6VSIT8/H2q1Gu+++y4AwOl04m9/+xtuuOGG0deEQiE0NTXB6/XiO9/5DpYuXYp9+/YhEAggEolEdc1t27YBAOrq6tDc3IySkpLYdopSCn9dU9JYtmwZ/vEf/xFbtmzBD3/4Q7zwwgsIh8N44oknsGjRotHXrF69Gs899xxuvvlm3HHHHdBqtZgxYwamT5+OpqYmaLXaMV+zpaUFy5cvh0KhwL//+78jPT1dpN5RKuD2jEREccI7XEpZL7/8Mt56661Lfuyf/umfcPfdd8e5IpI73uESEcUJH5oREcUJA5eIKE4YuEREccLAJSKKEwYuEVGc/H/38wAJadXY2gAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 360x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with Modin df\n",
    "sns.displot(data=modin_tips, x=\"total_bill\", kde=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<seaborn.axisgrid.FacetGrid at 0x7fc3bd078340>"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVwAAAFcCAYAAACEFgYsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA09klEQVR4nO3deXyU9b0v8M/s+2SZzGQjCVvYIYlVBLVE0RPRGEWKFnsvevRcX3qvhVt6XrUeDkfvrV0or/ZQl3Laq61tbe/tS1tQjEg9RcUlLIpCIGwBspNMJsskM5lk1uf+ERIBWTLJPPPMPPN5v159lSST3/P9meSTJ7/ntygEQRBARESiU0pdABFRqmDgEhHFCQOXiChOGLhERHHCwCUiihMGLhFRnKjFbHz16tXo6emBWj18mR/84AcYGBjAT37yE/j9ftxxxx1Yt26dmCUQESUM0QJXEAQ0Njbi/fffHw3coaEhLFu2DK+++ipyc3Px2GOPYffu3SgvLx9zu93dXkQi8pk6nJFhRG+vT+oyRCX3Psq9fwD7GA273XLZj4kWuGfOnAEAPPLII3C73bj//vsxY8YMFBUVoaCgAABQVVWFnTt3RhW4cqNWq6QuQXRy76Pc+wewj7Ei2hhuf38/Fi9ejF/+8pf43e9+hz//+c84e/Ys7Hb76GscDgecTqdYJRARJRTR7nDLyspQVlY2+vbKlSvx/PPP42tf+9ro+wRBgEKhiKpdm80csxoTxZX+BJELufdR7v0D2MdYEC1wP/vsMwSDQSxevBjAcLjm5+fD5XKNvsblcsHhcETVrtzGcO12C1wuj9RliErufZR7/wD2Mdp2Lke0IQWPx4NNmzbB7/fD6/Vi27Zt+O53v4uGhgY0NTUhHA6juroaS5YsEasEIqKEItod7i233IJDhw5h+fLliEQi+Na3voWysjJs3LgRa9asgd/vR3l5OZYtWyZWCURECUWRbNszckgh+ci9j3LvH8A+RtvO5XClGRFRnDBwiYjihIFLRBQnDFwiojhh4BIRxQkDl4goTkTdnpHiKy3dCK0mNhtwBIJh9LnlvTsUUbwxcGVEq1Hhpa2HYtLWoytKYtIOEX2JQwpERHHCwCUiihMGLhFRnDBwiYjihIFLRBQnDFwiojhh4BIRxQkDl4goThi4RERxwsAlIooTBi4RUZwwcImI4oSBS0QUJwxcIqI4YeASEcUJA5eIKE64AbmE0tKNAAC73SJxJUQUDwxcCWk1Kvzfvx3HwIA/Ju3xlAaixMYhBSKiOOEdboqKRAT4/CGolArotSooFAqpSyKSPQZuinF7/DjZ6kZn7yAiwvD79FoVChxmTM9Pg0bNP3qIxMLATRGCIOB4sxv1rX3QqJWYnGOF1aRBOCKgs3cQ9a19aHZ6cO1MB2xpeqnLJZIlBm4KiAgCPj/pwtkuHwodZsybkgn1eXeyU3Kt6PX48UW9CzV1Hbim2C5htUTyxb8fZU4QBBw61Y2zXT7MKcpAaXHWBWE7IsOiw00LcpFh0eHzehc+P94pQbVE8sbAlbnGDg9aOr2YMSkN0yelXfG1WrUK18/OhsWowcY/7IezxxenKolSAwNXxvoGAjjS0ANHhgEzC9PH9DkatRLXz86GSqnEr96sQzAUEbdIohTCwJWp4aGELmjUSlxTnBXVtC+DTo3vrCpDk9ODt/c0ilckUYph4MpUQ7sHbm8A86fYoNWoov786+flYtHcbLy9pwlnuwZEqJAo9TBwZSgQCuNEixv2dD3ysozjbmfV0mLotSr86T9PQhCEGFZIlJoYuDJ0qrUPwVAEc4oyJrSCzGrS4u6bpuBYUy+ONPTEsEKi1MTAlZmhQBhn2j2YZDchzaybcHu3lOXDnq7H6++fQoR3uUQTwsCVmTNn+xGJCJhRkB6T9tQqJe5dMhWtrgF8fsIVkzaJUhUDV0a8g0E0dvQjz2aE2aCJWbsLZ2UjJ9OIt2oaOZZLNAEMXBl5d28jQmEBxVdZ4BAtpVKBysVFaOn04tCp7pi2TZRKGLgyIQgC3q5phM2qi8nY7cUWzc1GhkWHvx9oiXnbRKmCgSsTzp5BdPb4MCXXKkr7KqUSt5Tl42hjL+flEo0TA1cmGjs8sKXpkWMb/7zbq1lSmge1Soldn7eKdg0iOWPgysCgP4RO9yBuW1gIpYgnN1iNWlw/24Gawx3wDYVEuw6RXDFwZaCl0wsAuO26QtGvtfRrk+APhvHJkXbRr0UkNwzcJCcIApo7vchK0yPHZhL9elNyrZiWZ8V7B1q5EIIoSgzcJNfr8cM3FEKBwxy3ay792iQ4ewdxotkdt2sSyQEDN8m1dQ1AqVQgJ1O8h2UXu2aGHXqtCjUcViCKCgM3iUUEAW1dA8jOMMT1tF2dRoXrZjnw2QkX/IFw3K5LlOxE/yn96U9/iqeeegoAUFNTg6qqKlRUVGDz5s1iX1r2uvqGEAhGkG8Xf+z2YjfOz4U/EMaBkzz7jGisRA3cPXv2YNu2bQCAoaEhrF+/Hlu2bMGOHTtw5MgR7N69W8zLy15Htw8qpQLZ6Ya4X7t4Uhqy0vSoOdIR92sTJSvRAtftdmPz5s14/PHHAQC1tbUoKipCQUEB1Go1qqqqsHPnTrEuL3uCIKCjxwdHhgEqVfxHhhQKBW6Yl4Njjb3o6R+K+/WJkpFarIaffvpprFu3Du3tww9WOjs7YbfbRz/ucDjgdDqjbtdmi9/T+HgxmaLf+6C7bxBDgTCKcq0XfP542rocu91yxY9XlU/H9k8aUdvYi/tunTGhtpKd3PsHsI+xIErgvv7668jNzcXixYuxdetWAEAkErng9AFBEMZ1GkF3txeRiDzmf458cQcG/FF/bkNbHxQA0o2aCz5/PG1djsvlueLHVQBmTErDf+5rQvn8nMt+Pe12y1XbSmZy7x/APkbbzuWIErg7duyAy+XCPffcg76+Pvh8PrS1tUGl+vIwQ5fLBYfDIcblU0JHjw+ZVt24DoiMpevnZOPVd0+irWsAk+zy++uDKJZEGfx75ZVXUF1djTfffBNr167F0qVL8fLLL6OhoQFNTU0Ih8Oorq7GkiVLxLi87A0MBeHxBeM69/ZyrpnpgEIBfHqMsxWIriZuT1t0Oh02btyINWvW4M4778TUqVOxbNmyeF1eVjq6fQAg6s5gY5Vm0mJmQTo+O9HJ0yCIrkK0h2YjVqxYgRUrVgAAFi9ejO3bt4t9Sdnr6PHBYtTApI/dMToTcd3sbLz6txNocw1gUhyXGBMlG9EDl2IrEAyju9+PGTE+Rudi4XBkzE9sKxZPwZ/ePYG6FjfK5uZesi0iYuAmna6+4TmvjgxxFzuoVEq8tPXQmF+fadXj7Y/PwDfg/8pshUdXlMS6PKKkxL0UkozLPQi1SoF0S+zPLZuIvCwTvIMh9PuCUpdClLAYuEnG5R5CVppe1JMdxiP33IyJdp53RnRZDNwkMjAYhM8fgl2CvROuRqdVwWbVo73HJ3UpRAmLgZtEXO5BAEjIwAWAHJsBHl8QA4McViC6FAZuEnH1DcGgU8GkT8xnnSMLMTp4l0t0SQzcJCEIArrcQ7CnGca1B0U8mPQaWIwaBi7RZTBwk4TbG0AwHIE9XS91KVeUm2lEd78f/iBPgiC6GAM3SYyM32Yl6PjtiJHlxs7eQYkrIUo8DNwk4XIPIs2khU7i3cGuJs2khV6rGt3vgYi+xMBNAuFIBL0eP2xpiT2cAAyfBJGTaYTLPcglvUQXYeAmAbcngIgA2KyJH7jA8GyFcESAq49H7xCdj4GbBLrPnRlmsybWct7LyUrTQ61ScFiB6CIM3CTQ3T8Ei1Ej+ekOY6VUKpCdYURHr4975BKdh4Gb4CKCMDx+myTDCSNyMg0IBCPo8cTujDWiZMfATXD9AwGEwkLSDCeMcGQYoVBw1RnR+Ri4Ca773IOnzCS7w9WolbBZ9XAycIlGMXATXHe/H0a9GgZdYu6fcCU5mUZ4B0M46/JKXQpRQmDgJjBBENDTP5R047cjsjOHV8XtP+qUuBKixMDATWDewSACoUjSjd+OGNnM5tOjHVKXQpQQGLgJrLt/+Al/so3fni87w4i6M93wDXGPXCIGbgLr6R+CTqNM2P1vxyIn04BwRMDhMz1Sl0IkOQZuAuv1+JFh0Sfs/rdjkWHRwWrS4tCpLqlLIZIcAzdBBYJhDAyFkJFgp/NGS6FQ4NrZ2Th8phvhCDezodTGwE1QvedWaCV74ALAwjk5GBgK4VRrn9SlEEmKgZugRgI33ayVuJKJK5tph0qpwKFT3VKXQiQpBm6C6vH4YTVpoVYl/5fIqNdgVmE6DnIcl1Jc8v80y5AgCHB7/ciUwXDCiJLpWejo8XGpL6U0Bm4C8gwGEQoLshi/HVEyPQsAeJdLKY2Bm4Dk9MBshD3dgPwsE6eHUUpj4Cag3n4/NOrkXvBwKSXTs3CypY+rzihlMXATUK/XjwyLLqkXPFxK6fQsRASuOqPUxcBNMMFQBB5fEBlm+QwnjJiaZ4XZoOGwAqUsBm6CcXvlN347QqlUoGSajavOKGUxcBPMSOCmW5J/wcOllEzP4qozSlkM3ATj9gZg1KuhVSfHCb3Rmjslk6vOKGUxcBOM2+uXxXLeyzHo1Fx1RimLgZtA/IEwBv1hpMvwgdn5uOqMUhUDN4G4B0Y2rJF34JZy1RmlKAZuAnF7AwCANJN8hxQAICvdgHw7V51R6mHgJhC3xw+zQQONWv5fltJzq84GuOqMUoj8f7KTiNsbkPUDs/OVnFt1doSrziiFMHATxJA/BH9Q/g/MRkzNtcJi5KozSi0M3AQxMn6bKne4SqUCC6Zy1RmlFgZughhZYWaV+QOz83HVGaUaBm6CcHsDsBg1sjhSZ6zmTsmEWqXg9DBKGanz053ARo7UkeMOYVdi0KkxszADB7nMl1IEAzcBDAXCCIQiSEuR8dvzlU7PgrPHhw6uOqMUwMBNAH0Dww/MUmn8dkTJNBsAcLYCpQRRA/e5557DnXfeicrKSrzyyisAgJqaGlRVVaGiogKbN28W8/JJo/9c4KYZUy9wueqMUologbt//37s3bsX27dvx1//+le8+uqrOH78ONavX48tW7Zgx44dOHLkCHbv3i1WCUmjbyAAk14NdQqsMLsUrjqjVCHaT/jChQvxhz/8AWq1Gt3d3QiHw+jv70dRUREKCgqgVqtRVVWFnTt3ilVC0ugfCKTkcMKIktGzzvjwjORN1GNhNRoNnn/+efz2t7/FsmXL0NnZCbvdPvpxh8MBp9MZVZs2mznWZUoqGIpgYCiEqZPSYTJNfJZCLNqIdVvhcAR2u+WyH8+0mZFmPozjLX2oKi8eU3uqBJs+d6X+yQX7OHGin8O9du1aPProo3j88cfR2Nh4wUm0giBEfTJtd7cXkYgQ6zIlYbdb4PYMAQAMGiUGzm3POBGxaCPWbalUSry09dAVX2M1arGnth2//utBKK/yPfHoihK4XJ6Y1BYLdrsloeoRA/sYXTuXI9ptwunTp3Hs2DEAgMFgQEVFBfbt2weXyzX6GpfLBYfDIVYJSaHXMxxqct+S8WqyMw0IhiPo6Y/dLwyiRCNa4La2tmLDhg0IBAIIBALYtWsXVq1ahYaGBjQ1NSEcDqO6uhpLliwRq4Sk4Pb4oVUrodfK8wyzsXKkGaBUgKdAkKyJNqRQXl6O2tpaLF++HCqVChUVFaisrERmZibWrFkDv9+P8vJyLFu2TKwSkkKvxw+rSRv10IrcqNVK2NL06OjxYc7kjJT/70HyJOoY7po1a7BmzZoL3rd48WJs375dzMsmjXA4ArfXjyk58n8YMRY5mUYcPtMDz2AQ1hSck0zyl1iPelNMm2v4AWAqTwk7X67NCABo7+KwAskTA1dCZ872A+ADsxF6rRo2qw5nuwekLoVIFAxcCTW09UGpUMBs0EhdSsLItZng8QXh9XHVGckPA1dCZ872Ic2shVLJB0QjRoYVeJdLcsTAlYggCGg424cMq17qUhKKQadGhkWH9m6O45L8MHAl4vYG0OcNIMOSWpuOj0WuzYi+gQA3syHZYeBKpKVzeAlhOgP3K/I4W4FkioErkZZOLwCkzLHo0TDqNUgzaTmOS7LDwJVIs9OLHJsRWk1qL+m9nLwsI9zeAHz+kNSlEMXMmAJ3/fr1X3nf2rVrY15MKmnu9GJKXprUZSSsXJsJANDOu1ySkSsu7X3mmWfgdDpx4MAB9PT0jL4/FAqhpaVF9OLkaigQQmePD7deVwhBkMdWk7FmNmhgNWrQ3uXDNP5iIpm4YuCuXLkS9fX1OHHiBG6//fbR96tUKpSWlopdm2y1ugYgAJiaZ8Xptj6py0lYuVkmnGh2Y9AfgkEn+tbNRKK74nfx/PnzMX/+fNxwww3IycmJV02y1+IcnqEwJT+NgXsF+ecCt61rANPzeZdLyW9Mtw3t7e343ve+h76+vgv+BH7rrbdEK0zOmju9MOnVsKcbpC4loZkNGqSbtWhzMXBJHsYUuE8//TRWrFiBOXPmcJ/SGGh2elHgMPO/5RhMsptxpKEHHl8AFm7ZSEluTIGrVqvx8MMPi11LSohEBLS5vLi5LF/qUpJCXpYRRxp60OYawKwiBi4ltzFNCysuLsaJEyfEriUlOHt9CIQiKHDI6/Rhsei1amSl6dHaNcAZHZT0xnSH29LSgm984xvIy8uDTvflyiiO4Uav2Tm8woyBO3aT7CYcPNUNN/eeoCQ3psBdt26d2HWkjOZOD1RKBfKyTFKXkjRybSbUnu5Gq8vLwKWkNqbAnTFjhth1pIwWpxf5WSaoVVxVPVYatRLZmUa0dQ1g7pRMqcshGrcxBe6iRYugUCggCMLok3W73Y4PP/xQ1OLkqLnTi/lTGRrRyreb0N7tQ5d7SOpSiMZtTIF7/Pjx0X8HAgFUV1ejoaFBtKLkqs/rR/9AAIUOntIbrewMA9QqBdq6vFKXQjRuUf9dq9VqsWLFCnzyySdi1CNrI1syFmbzgVm0VEolcm3Dd7n+YFjqcojGZUx3uG63e/TfgiDgyJEj6O/vF6sm2Wru5AyFiSiwm9DS6cWew+2YW8CVZ5R8oh7DBQCbzYZ//dd/FbUwOWp2emCz6mHU85Te8bCl6WHUqfH3/U2YW7BA6nKIohb1GC6NX0unl8MJE6BQKFCQbcah+i50uQeRxb0oKMmMaQw3EongpZdewurVq/HAAw/gxRdfRCjEnfij4Q+E0dHt43DCBA3vQQF8fLhd6lKIojamwP35z3+OvXv34qGHHsLDDz+ML774Aps2bRK7Nllp7fJCAFCYzRkKE2HUqVFabMcnh9sR4VJfSjJjCtyPPvoIv/rVr3DbbbehoqIC//Ef/8E5uFFqObekt5B3uBP2DwuL0N3vx7GmXqlLIYrKmAJXEARoNF8+6NFqtRe8TVfX0umFQaeGLU0vdSlJ7/p5OTDp1fi4lsMKlFzGFLizZs3Cj3/8YzQ3N6OlpQU//vGPudw3Ss2dHhRyD9yY0GpUWDQnBwdOuDAwFJS6HKIxG1PgPvPMM+jv78eqVatw3333obe3F//2b/8mdm2yEYkIaO0cQAFnKMTMTQtyEQpHsO+oU+pSiMbsioEbCATw/e9/H3v27MHGjRtRU1ODBQsWQKVSwWxmeIxVp3sQ/mCYMxRiqCjHgkKHGR9xWIGSyBUD9/nnn4fX68U111wz+r5nn30W/f39eOGFF0QvTi6azx0ayT0UYuvrJXlo6vCM/vclSnRXDNwPPvgAP//5z2Gz2Ubfl52djU2bNuHvf/+76MXJRUunl3vgimDR3Gxo1Uq893mb1KUQjckVA1ej0UCv/+pTdbPZDK2W50uNVbPTi1ybCRo198CNJZNeg0Vzs7H3aAd8fHhGSeCKCaBUKuH1fnU7PK/Xy5VmUWjp9HBJr0iWXjMJgWAEHx/ukLoUoqu6YuDedddd2LBhA3w+3+j7fD4fNmzYgIqKCtGLk4P+gQDc3gAXPIikMNuCaflWvP95K1eeUcK7YuA+9NBDsFgsuPHGG3H//fdj5cqVuPHGG2G1WvHEE0/Eq8akNrIHbgGX9Ipm6TWT4OwdxLFGrjyjxHbF3cKUSiWeffZZPP7446irq4NSqcSCBQvgcDjiVV/Sa+4cfoLOKWHiuXamA3/eVY/3Pm/lmWeU0Ma0PWN+fj7y8/PFrkWWWpxeZFp1MBu4FFosGrUSS0rysGNvE7r7hrh8mhIWH5uLrLnTy/m3cXBz6fANwQcHOUWMEhcDV0SBYBjt3QMcTogDW5oepdOz8OGhswiGIlKXQ3RJDFwRtXUNQBB4aGS8LL1mEjy+IPYf4/4KlJgYuCLiDIX4mjM5A/l2E/62v2X0/D2iRMLAFVGz0wODToUsPsSJC4VCgduvK0Sry4ujnCJGCYiBK6LmTi8K7GYouQdu3Fw/JxtpZi127m+WuhSir2DgiiQiCGjp9KKAMxTiSqNW4ravTUJdQ8/okA5RohjTPFyKnss9CH8gzE3HRRAOR2C3X/4X2crbZuLtPU3YXduOdQ9cc9nXAcMzSfrcviu+hihWGLgiGT00koEbcyqVEi9tPXTF1+TZjHj/QAtUEGDQXf7b/NEVJbEuj+iyRB1SePHFF1FZWYnKysrRY9VrampQVVWFiooKbN68WczLS6q50wulQoF87oErial5VggC0NDeL3UpRKNEC9yamhp8/PHH2LZtG9544w3U1dWhuroa69evx5YtW7Bjxw4cOXIEu3fvFqsESbU4PcjNMkKjVkldSkoy6jXIyzKiscPDhRCUMEQLXLvdjqeeemr0SPVp06ahsbERRUVFKCgogFqtRlVVFXbu3ClWCZIaXtLL4QQpTc9LQygsoKmDR/BQYhAtcIuLi1FaWgoAaGxsxDvvvAOFQgG73T76GofDAadTfquCPL4Aej1+zlCQWLpFB3u6HqfO9iEU5l0uSU/0h2b19fV47LHH8OSTT0KlUqGxsXH0Y4IgQBHlHFWbLfHvGs+edAEA5s+wX/Fp+giTSReza6dCW9G0V1Jsx98/bUFH7xBmFmVc8jVj+RpdTSzaSHTs48SJGrgHDhzA2rVrsX79elRWVmL//v1wuVyjH3e5XFHvrdvd7UUkktjLNg+dHL5rT9Or4XJd/s/ZkS/uwIA/ZtdOhbaiac+oVcFm1aPuTDdyM/VQKb/6R92VvkZjYbdbJtxGomMfo2vnckQbUmhvb8cTTzyBn/3sZ6isrAQAlJSUoKGhAU1NTQiHw6iursaSJUvEKkEyzU4vbNwDN2HMLEiDPxhGs5MLIUhaot3h/uY3v4Hf78fGjRtH37dq1Sps3LgRa9asgd/vR3l5OZYtWyZWCZJp6vCgkBvWJAxbmh6ZFh3qW/tQmG2BSsml1iQN0QJ3w4YN2LBhwyU/tn37drEuK7mhQAjOHh8WzcmWuhQ6R6FQYEZBOvYedaKl04vJOfxlSNLgXgox1tLphQDwDjfB2NP1SDdrcaq1L+GfAZB8MXBjbGScsIh3UQlFoVBgZkE6fP4QWlwcyyVpMHBjrKnDA4tRg3SzVupS6CKODAPSzVqcbHYjHOG8XIo/Bm6MNTs9KMq2RD2/mMSnUCgwuygDg4EwGtvlPcWJEhMDN4aCoQjaugY4fpvA7OkG2NP1qG/t4x4LFHcM3Bg62zWAcETg+G2Cm12YgUAogtNn+6QuhVIMAzeGmpzDf6ZyD9zElm7RIddmxOm2frg9sV0BR3QlDNwYajp3aKQ93SB1KXQVswszEIkIeH3XSalLoRTCwI2h5g4PChwWHhqZBMxGDQqyzdhR04gu96DU5VCKYODGSCQyfGhkER+YJY2ZBelQKICtH52RuhRKEQzcGGnv8SEQinD8NokYdGosL5+GvXVOnG7jAzQSHwM3RprPPTDjHW5yue/WGUgzafH/dtUjInDJL4mLgRsjTR0eaNRK5GYZpS6FomDQqfGN8mk4c7Yf+47K7/QRSiwM3BhpdnowyW665AbXlNhumJ+DyTkW/OWD0/AHwlKXQzLGdIgBQRDQ5OQDs2SlVCjwwG3F6PX48c6+JqnLIRlj4MaAq28Ig/4Ql/QmseJJ6Vg424F39jWju29I6nJIphi4MdDcMbLCjIGbzO67eToA4LX3T0lcCckVAzcGGjr6oVIqUOAwSV0KTYAtTY/KxUX49HgnjpzplrockiEGbgw0tnswyW6GRq2SuhSaoDuuL0J2phF/fPckAkE+QKPYYuBOUEQQ0NjRjym5HE6QA41aiQcrZqDTPYi39/ABGsUWA3eCnD0+DPrDmJxrlboUipHZkzOxeG42duxtQnv3gNTlkIwwcCdo5OSAKQxcWbl/aTF0GhVe/dsJCFyBRjHCwJ2gho5+aNVK5HGFmaykmbRYefM0HG92Y09dh9TlkEwwcCeosd2DwhwLV5jJ0JLSPEzLs+LPu06hfyAgdTkkA2qpC0hm4UgEzU4PykvzpS6FxikcjsBuv/wDz3/+r9fif/77B3h99xk89dB1l32d3W5BIBhGn9snRpkkEwzcCWhzDSAQinCGQhJTqZR4aeuhK75men4aPqk9i//9f2qQl/XVudYmkw4DA348uqJErDJJJvh38AQ0nlthxhkK8jYt34p0sxa1p7vh59xcmgAG7gQ0tPfDoFPDkcEzzORMqVCgtDgLoXAEh09zBRqNHwN3Ahra+zE5h2eYpQKrUYuZhek42+3D2S7OzaXxYeCOUzAURptrgPNvU8i0/DSkmYaHFoYCIanLoSTEh2ZRSks3QqtR4WhDN8IRAaWzsq/4lJvkQ6lQ4JoZWfjwUDu+qO/CojnZUPCvG4oCAzdKWo0KL209hFPnDh38rK4dR+o7x9UWn2onH4tRi7mTM1B7pgcN7R5MzeNfODR2HFIYp16PH0adGnotdwhLNUU5FmRnGHC0sYcLIigqDNxxEAQBPf1+ZFh0UpdCElAoFCidngWNWokDJ10IhyNSl0RJgoE7DoP+MPzBMDKtDNxUpdOqUFacBY8viIP1LqnLoSTBMdxx6PUMn3nFO9zU5sgwYkquBSeb3UgzaqUuh5IAA3ccejx+qJQKWPlDlvLmTM5E30AQX9S70NE9AI7o05VwSGEcej1+pJu1UCo5JSjVqZQK3LggFwDw01c/QzDE8Vy6PAZulPzBMPoGAsiw6KUuhRKE2ahFWXEWTrW48dp7PPGXLo+BG6VTLW4IApDJ8Vs6T67NhOXl07Dr81bsP+aUuhxKUAzcKJ1o6gHAB2b0VQ9VzsG0PCteeec42rjfAl0CAzdKx5t6YdSroeOCB7qIWqXEf18+Dzq1Ei/+tRa+oaDUJVGCYeBGQRAEHG3o5nACXVamVY//ce98dPUN4dfbjyIS4QGU9CUGbhTau33o8wZgS+MDM7q8GQXp+NZtxTh8phvbPjojdTmUQDgPNwonWtwAAJuVgUtXdnNZPpqcHry9pwmF2RZcN8shdUmUAHiHG4WTLW5kWnUw6fl7iq5MoVDgv/zDTEzLt+I3bx9FS6dX6pIoATBwx0gQBJxo7sW8qVncA5XGRKNW4ol758OgU+OFv9bCO8iHaKmOgTtGLvcg3N4A5k6zSV0KJZF0sw7fvnc+3F4/frn1MELcWSylMXDH6ESzGwAwbyoDl6IzLT8ND985Gyda3PjdO8chCJy5kKo4GDlGJ1rcMBs0KMjmcToUvcVzc9DZO4g3P25AdoYBVTdOkbokkgADd4xOtrgxsyCd47c0bnffOBmdvT5s+6gB9gwDFs3JkbokijNRhxS8Xi/uuusutLa2AgBqampQVVWFiooKbN68WcxLx1R33xC6+oYwozBd6lIoiSkUCvzjHbMxoyAdv337OOpb3VKXRHEmWuAeOnQIDzzwABobGwEAQ0NDWL9+PbZs2YIdO3bgyJEj2L17t1iXj6kTLb0AgJkF6dIWQklPo1bi2yvmw2bV4YW/HkZnr0/qkiiORAvc1157Dc888wwcjuEJ37W1tSgqKkJBQQHUajWqqqqwc+dOsS4fU8eb3DDp1ZhkN0tdCsmA2aDBd+4rgSAI2Px6Lfp9PIgyVYg2hvujH/3ogrc7Oztht9tH33Y4HHA6o9/GzmaLb+gJgoBjzb0onelAdvbwkdgmU2z3Uohle6nQVqzbi0VbI23Y7WN7qGq3W/D0f1uEf/tVDba8cQQ/fPxGGHSJ/UhlrH1LZmL3MW5f4UgkcsEDJ0EQxvUAqrvbG9cNQdq6BtDdN4TpuRa4XB7Y7RYMDPhjeo1YtpcKbcW6vYm2ZTLpRttwuTxj/jy7WYvH7pmLF7cexg9e2oO1KxdArUrMmZp2uyWqviWjWPXxSqEdt69uTk4OXK4vTzd1uVyjww2JrK5heP/buZMzJa6E5Kis2I6Hls3CkYYe/HbHMUQ4R1fW4ha4JSUlaGhoQFNTE8LhMKqrq7FkyZJ4XX7c6hp6kJ1pRFa6QepSSKaWlOThG+VTsbfOidfeO8WFETIWtyEFnU6HjRs3Ys2aNfD7/SgvL8eyZcvidflxCYYiONHci68vyJO6FJK5OxcVoc8bwLuftiDNrMUd1xdJXRKJQPTAfe+990b/vXjxYmzfvl3sS8bMqbY+BEIRzJ3C4QQSl0KhwKrbiuEZDOL190/DpNdgSQl/0ctNYj8WlVhdQw9USgVmcsEDjUE4HJnwU+7vP7QQP3plH36/8zisVj1K+cteVhi4V1DX0INp+WkJP12HEoNKpcRLWw9NuJ2cdD1sVj1efO0gHr9nHq7l5uWykZhzUBJAvy+AJqeHwwkUdyqVEgtnOzCzKBO/3l6Hg/VdUpdEMcLAvYzDp7sBAPMYuCQBtUqJ//XoIhRmm7HljcOj0xMpuTFwL+NgfRfSzVoU5ch/dQ0lJqNeg3X3lyLXZsILf63FieZeqUuiCWLgXkIgGMbhhm6UFduh5HaMJCGzQYN/XlWKrHQDNr9+CMcaeaebzBi4l3C0qReBYARlxVlSl0IEq1GL7z1QBke6Ab/4Sy0On+mWuiQaJwbuJRysd8GgU2FWUYbUpRABANJMWjz5rWuQZzPh+b/U4ouTrqt/EiUcBu5FIhEBB+u7MH+qLWE3EqHUZDZo8L0HSlGUY8GWN45g/7Hod9sjaTFRLnLmbD/6fUGUFduv/mKiODPqNfjnb5ZiWp4Vv36zDrsOtEpdEkWBgXuRz+tdUCkVmM/TeSlBGXRqrPtmKUqmZ+FP/3kSf/ngNDe8SRIM3PMIgoAv6rswqygDRj1Xl1Hi0mlUeGLFPNxcmocde5vwm7ePIRSOSF0WXQVT5TzNTi+cPT7cfl2B1KUQjWlvhu/+12sxKeck/rjzOHyBML6/+lqYjdqvvC4QDKPPzfPTpMbAPc+eug6olAquXaeEEM3eDKXTbThU78J/+9F/YuFsBywXhe6jK0rEKJGixCGFcyIRAfuOOrFgmg1mg0bqcoiiUphtwQ3zchAKR/DhoXa0d/NuNhExcM851tSLvoEAFs/NkboUonGxWfVYUpIHi1GDT4934nhzLx+mJRgG7jl76jpg0KlRMp2zEyh5GXRq3Dg/B5PsJpxs6UPNkQ4M+kNSl0XnMHAB+ANhHDjpwnWz7NCoVVKXQzQhKqUSZcVZKJ1ug9sbwAcHz+KT2rNSl0XgQzMAwBenXPAHwhxOINlQKBQozLYg06rH5ydd2Pj7T/EPCwvxT3fPg2mczyjOnzHBWQ/jw8AFUHO4A5lWHYoL0qUuhSimzAYNbpqfC4NRi9d31eOjg22YNzUTuZlGKKLYCc9k0mFgwD/6Nmc9jE/KDyk4e3w40tCDJQvyuBUjyZJSqcCDd87B1xfkQqtR4rPjLnx6vJNjuxJI+cB97/M2qJQKlJfyhFSStwyLDktK8jBncgZc7iG893kbTra4EeYKtbhJ6SGFoUAIHx9ux7WzHEgz66Quh0h0SoUC0/PTkGszoq6hB8eb3Wjq8GBWUQYm2U1RDTNQ9FL6DndvnROD/hBuvWaS1KUQxZVJr8HC2dm4YV4OtBoVvqjvwoeH2tHZO8i5uyJK2cAVBAG7Pm9FYbYZ0/KtUpdDJImsND2WlOSirDgLgVAYe4868fHhDnT2+hi8IkjZIYUTzW60uQbw8B2z+GcUpTSFQoEChxn5WSY0d3pR3+rG3qOdyDBrMbMwHfZ0g9QlykbKBu47+5phNmhw/ZxsqUshSghKpQKTcywodJjR0unFyXPBm27WYs4UGzItWs7kmaCUDNymDg8On+nGiiVTodVwZRnR+ZRKBYpyLCg4F7yn2vpQc7gdBp0KU3OtKMy+8paRdHkpGbjVNY0w6NRYyodlRJc1EryF2Wa4fSEcPdOFusZenGhxQ2/Q4sY5DmRa9VKXmVRSLnAb2vtx4KQLd984mac6EI2BQqHAJIcZGSYNej1+nD7bhzc/PI3tH57G12bacUtZPmYUpPNZyBikXOJs3X0aZoMGty8slLoUoqSTYdHh2pkO3H1zMV579zg+rm3H/mOdyMsy4ebSPNwwL1fSG5m0dOOEhgkvPmEj1ntGpFTgHmnoRl1jL765dDoMupTqOlFMZWcaserWYty7ZCr2H3Pigy/a8H//Xo+/7D6N62dn48b5uSielBb3u16tRjXmUzIudvF+EUDs94xImdQJhSP4f3+vhyPdwLFbohjRaVT4+oI8fH1BHho7+vH+523Yd8yJj2rbkZWmx+K5ObhhXg6yM41Sl5oQUiZw3zvQivZuH9Z8Yz406pRd70Ekmsk5Vjx8pxUP3FaMz0+6sOdIB6prGvFWTSMm2U0oK7bjmhl2FGabU3a8NyUCt8s9iG0fNWDBNBtKp2dJXQ6RrOm1atwwLxc3zMtFr8ePT4858UV9F6r3DIdvplWH2UUZmFWYgdlFGSk100H2gSsIAv7wtxMAgNUVM1P2NyuRFDIsOlQsLETFwkJ4fAEcOtWNQ6e6cLC+C58c7gAA2NP1mJJrRVG2BYXZw9PQLj51WC5kH7jvfd6GIw09+C//MAO2tNT5TUqUaCxGLW5akIubFuQiIgho7fTieLMbJ1vcON3Wj/3HOkdfazZo4MgwDP8v3YDsDCPs6QbY0vRIMyfvijdZB67HF8Br75/C/Kk2LL0mX+pyiOgc5bkjgAqzLai4rgAA4B0MotnpQbPTC2evD529g6hv6cO+OifO30ZHpVTAZtXDlqYf/f+sc/+OqFQQBCFh/5KVdeBq1EosW1iIW782KWG/AEQ0zGzQYM7kTMyZnHnB+4OhCLr6BuFyD6K7bwhd/UPo7hv+3+GGbvR5Axe8XqVUwGLUwGrUwmLSIMOsQ7pZB6VS+gyQdeDqtWrcu2Sq1GUQ0QRo1Erk2kzItZku+fFgKIyefj+6+ofgDwvY8fEZ9PuC6Oj1oblz+DQLpVKBDLMWNqse9nQDMqw6SYYlZB24RCR/GrUK2ZlGZGcaYbdb0Hy2b/RjQ4EQevr96On3o7t/CCdb+3CytQ8atRKODANyMoxwZBjiNlU0JQJ3osv9iOhC4XDkK8tgxysUjkCtEifw9Fo18rLUyMsavjsOhiJwuQfR0TM8RtzmGoBSqUBOhgHTCtKRZtCIOvSQEoE7keV+F+Px0ESASqWM6c9UvH4+NWol8rJMyMsyQRAE9Hj8ONs1gLauAZw9eHb040UOM9LMsZ+alhKBS0R0MYXi3GwHqx5zJ2fCMxTCqVY3Wju9aOrwwGrU4M6vT4Muhje8DFwiSnlKpQJ5djPSjBoEQxG0ubzo6B3EwGAQOqMmdteJWUtERDKgUSsxOdeKRXOyMbMo8+qfEAUGLhFRnDBwiYjihIFLRBQnDFwiojiRJHDfeust3HnnnaioqMCf/vQnKUogIoq7uE8Lczqd2Lx5M7Zu3QqtVotVq1bh+uuvx/Tp0+NdChFRXMU9cGtqarBo0SKkp6cDAG6//Xbs3LkT3/72t8f0+eNddmeO4Vy6WLZl0mugECIxay9R+xnLtmLd3kTbMp73NZRrP42X+D5NlNpi1dal+giMP3MuRSEIgnD1l8XOr3/9a/h8Pqxbtw4A8Prrr6O2thbPPvtsPMsgIoq7uI/hRiKRC/amTeTNgomIYinugZuTkwOXyzX6tsvlgsPhiHcZRERxF/fAveGGG7Bnzx709PRgcHAQ7777LpYsWRLvMoiI4i7uD82ys7Oxbt06PPjggwgGg1i5ciUWLFgQ7zKIiOIu7g/NiIhSFVeaERHFCQOXiChOGLhERHHCwCUiihMGbpx5vV7cddddaG1tBTC81LmqqgoVFRXYvHmzxNVN3IsvvojKykpUVlZi06ZNAOTXx+eeew533nknKisr8corrwCQXx8B4Kc//SmeeuopAPLr3+rVq1FZWYl77rkH99xzDw4dOhSfPgoUNwcPHhTuuusuYe7cuUJLS4swODgolJeXC83NzUIwGBQeeeQR4YMPPpC6zHH75JNPhG9+85uC3+8XAoGA8OCDDwpvvfWWrPq4b98+YdWqVUIwGBQGBweFW265RTh27Jis+igIglBTUyNcf/31wve//33ZfZ9GIhHhpptuEoLB4Oj74tVH3uHG0WuvvYZnnnlmdGVdbW0tioqKUFBQALVajaqqKuzcuVPiKsfPbrfjqaeeglarhUajwbRp09DY2CirPi5cuBB/+MMfoFar0d3djXA4jP7+fln10e12Y/PmzXj88ccByO/79MyZMwCARx55BHfffTf++Mc/xq2PDNw4+tGPfoRrr7129O3Ozk7Y7fbRtx0OB5xOpxSlxURxcTFKS0sBAI2NjXjnnXegUChk1UcA0Gg0eP7551FZWYnFixfL7uv49NNPY926dbBarQDk933a39+PxYsX45e//CV+97vf4c9//jPOnj0blz4ycCUk14186uvr8cgjj+DJJ59EQUGBLPu4du1a7NmzB+3t7WhsbJRNH19//XXk5uZi8eLFo++T2/dpWVkZNm3aBIvFgszMTKxcuRLPP/98XPoY96W99CU5buRz4MABrF27FuvXr0dlZSX2798vqz6ePn0agUAAs2fPhsFgQEVFBXbu3AmVSjX6mmTu444dO+ByuXDPPfegr68PPp8PbW1tsukfAHz22WcIBoOjv1QEQUB+fn5cvk95hyuhkpISNDQ0oKmpCeFwGNXV1Um9kU97ezueeOIJ/OxnP0NlZSUA+fWxtbUVGzZsQCAQQCAQwK5du7Bq1SrZ9PGVV15BdXU13nzzTaxduxZLly7Fyy+/LJv+AYDH48GmTZvg9/vh9Xqxbds2fPe7341LH3mHKyGdToeNGzdizZo18Pv9KC8vx7Jly6Qua9x+85vfwO/3Y+PGjaPvW7Vqlaz6WF5ejtraWixfvhwqlQoVFRWorKxEZmambPp4Mbl9n95yyy04dOgQli9fjkgkgm9961soKyuLSx+5eQ0RUZxwSIGIKE4YuEREccLAJSKKEwYuEVGcMHCJiOKEgUtEFCcMXEoajzzyCHp6eib8mn379uGuu+666vVmzpx5ybZ27dqFH/7whwCGt/nbuXMnWltbUVZWdtU2KbVx4QMljU8++SQmr5moW2+9Fbfeeqvo1yH54R0uJYV/+Zd/AQA89NBD2L9/P1avXo2qqircfffdeOONN77ymvb2drz//vtYtWoVVqxYgZtvvhm/+MUvor7uL37xC9x7772455578P777wMAtm7disceeywm/aLUwjtcSgo/+clPsHXrVvz+97/H/fffjyeffBIVFRVwOp247777UFRUdMFrMjIy8OSTT2Ljxo2YPHkynE4nbrnlFjz44INRXXfSpEn4wQ9+gJMnT2L16tV45513ROohpQIGLiWV06dPw+/3o6KiAgCQnZ2NiooKfPTRRxeMoSoUCvzqV7/CBx98gOrqapw+fRqCIGBwcDCq6z3wwAMAgBkzZmDatGn44osvYtcZSjkcUqCkolAovrJPqSAICIVCF7zP5/Ph3nvvRV1dHebMmYMnn3wSarUa0W4dolR++SMSiUSgVvMehcaPgUtJQ6VSIT8/H2q1Gu+++y4AwOl04m9/+xtuuOGG0deEQiE0NTXB6/XiO9/5DpYuXYp9+/YhEAggEolEdc1t27YBAOrq6tDc3IySkpLYdopSCn9dU9JYtmwZ/vEf/xFbtmzBD3/4Q7zwwgsIh8N44oknsGjRotHXrF69Gs899xxuvvlm3HHHHdBqtZgxYwamT5+OpqYmaLXaMV+zpaUFy5cvh0KhwL//+78jPT1dpN5RKuD2jEREccI7XEpZL7/8Mt56661Lfuyf/umfcPfdd8e5IpI73uESEcUJH5oREcUJA5eIKE4YuEREccLAJSKKEwYuEVGc/H/38wAJadXY2gAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 360x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with pandas df\n",
    "sns.displot(data=pandas_tips, x=\"total_bill\", kde=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='total_bill', ylabel='Frequency'>"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZRUlEQVR4nO3de3BU9fnH8c8uu8lgE0TSBSla7aBo6SXSiyGWEsAakCQEA8WEmUQKjFKtXDoaICKMchVsIxaZ1sogCiihgkAkqZbbKFCoaEEdFEoJEMnEGMolJiS72fP7wx+pWIFNOOck6/f9+iu7yT7f5yFhP3vO7jnHY1mWJQCAcbyt3QAAoHUQAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQvtZuoLn+85/PFA5//Q9dSEiIU3V1TWu30WqYn/lNnd/u2b1ej6666htf+b2oC4Bw2DIiACQZM+eFMD/zm8qt2dkFBACGIgAAwFAEAAAYigAAAEMRAABgKEcDoKamRunp6SovLz/v/uXLlys3N9fJpQEAl+BYAOzdu1c5OTkqKys77/5//etfevbZZ51aFgAQIceOAygqKtKMGTOUn5/fdF9DQ4OmT5+u8ePHa926dU4tjQvocGV7xcbY/yuvbwjp9Kk62+sCcJZjATB79uz/ue93v/udhg0bpmuuuabFdRMS4i6nragSCMTbXrNg8Xbba865/2eO9OpEzWjC/ObO79bsrh0JvH37dlVUVGjq1KnatWtXi+tUV9cYcYRgIBCvqqozttcMBkO21jzHiV7trhlNmN/c+e2e3ev1XPCFs2sBUFxcrIMHDyozM1O1tbX69NNPNXHiRD311FNutQAA+ALXAmDu3LlNX+/atUuLFi3iyR8AWhHHAQCAoRzfAti8efP/3JeUlKSkpCSnlwYAXARbAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQUXdReFMEQ2Gjz4UCwHkEQBvl93n18MJtttZcMCHF1noAohu7gADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKEcDoKamRunp6SovL5ckrVq1Sunp6crIyNDUqVPV0NDg5PIAgItwLAD27t2rnJwclZWVSZIOHz6sJUuW6OWXX9b69esVDoe1cuVKp5YHAFyCYwFQVFSkGTNmqHPnzpKkmJgYzZgxQ3FxcfJ4POrRo4eOHz/u1PIAgEtw7HoAs2fPPu92t27d1K1bN0nSiRMntGLFCs2dO9ep5QEAl+D6BWEqKys1duxYDRs2TElJSc1+fEJCnANdtU1+v/2/HidqSnLk6mWmXxGN+c2d363ZXQ2AQ4cOaezYscrNzdXo0aNbVKO6ukbhsGVzZ21PIBCvYDBke10nakpSVdUZW+sFAvG214wmzG/u/HbP7vV6LvjC2bUAqKmp0ZgxYzRx4kQNHTrUrWUBABfg2nEAf/nLX/Tpp59q6dKlyszMVGZmphYuXOjW8gCAL3F8C2Dz5s2SpFGjRmnUqFFOLwcAiBBHAgOAoQgAADAUAQAAhnL9OAB8/QRDYUc+t9zhyvY6farO9roAPkcA4LL5fV49vHCbvTX9Ps25/2e21gRwPnYBAYChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQBAAAGIoAAABDEQAAYCgCAAAM5WgA1NTUKD09XeXl5ZKkHTt2KCMjQ6mpqSosLHRyaQDAJTgWAHv37lVOTo7KysokSWfPnlVBQYEWL16sjRs36v3339e2bfZeRQoAEDnHAqCoqEgzZsxQ586dJUn79u3Tddddp2uvvVY+n08ZGRkqLS11ankAwCU4dk3g2bNnn3f7k08+USAQaLrduXNnVVZWNrtuQkLcZfcWLfx++389TtR0sq4TF5uPFibPLpk9v1uzu3ZR+HA4LI/H03TbsqzzbkequrpG4bBlZ2ttUiAQr2AwZHtdJ2o6UfdcoFRVnbG1brQIBOKNnV0ye367Z/d6PRd84ezap4CuvvpqVVVVNd2uqqpq2j0EAHCfawGQmJiow4cP68iRI2psbFRxcbH69u3r1vIAgC9xbRdQbGys5s2bpwcffFD19fVKSUnRoEGD3FoeAPAljgfA5s2bm75OTk7W+vXrnV4SABABjgQGAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhoooAF588UXV1NQ43QsAwEURBcBHH32kgQMH6pFHHtF7773ndE8AABdEdCDYrFmzVFNTow0bNuixxx6TZVnKyclRRkaGYmNjne4RAOCAiN8DiIuL06BBg5Senq6TJ09q5cqVGjRo0HlH+gIAokdEWwA7d+7UqlWrtHPnTg0cOFDPPPOMbr75Zh09elQjR47UgAEDnO4TAGCziALgscce08iRIzVz5kzFx//3QgXf/va3NWLECMeaAwA4J6IAWL9+vUpLSxUfH6+qqiq99tprysvLk9fr1fjx453uEYYKhsKOXBmpviGk06fqbK8LRJuIAmDmzJn67LPPNGTIEHm9Xu3Zs0fl5eWaNm2a0/3BYH6fVw8v3GZ73QUTUmyvCUSjiALg3XffVXFxsSQpISFBCxcuVGZmpqONAQCcFdGngILBoBoaGppuh0LOXFcWAOCeiLYA+vXrpzFjxigzM1Mej0fFxcVKSWEzGgCiWUQBkJ+frxUrVmjTpk3y+Xy64447lJ2d7XRvAAAHRRQA7dq1U15envLy8pzuBwDgkogC4G9/+5vmzJmjU6dOybKspvvfeecdxxoDADgrogBYsGCBpkyZop49e8rj8Vz2ouvWrdOzzz4rSerbt68mT5582TUBAM0TUQB06NBBqamptixYV1en2bNnq7S0VB06dFBOTo527Nih2267zZb6AIDIRPQx0MTERG3bZs8BOY2NjQqHw6qrq1MoFFIoFOKMogDQCiLaAti2bZuWL18uv98vv98vy7Lk8Xha9B5AXFycJkyYoDvvvFPt27fXT3/6U/3oRz9qdh0AwOWJKACef/552xb88MMP9corr2jLli2Kj4/XQw89pCVLlmjs2LERPT4hIc62Xto6vz+iX0+r14zGuk6cY8hu0dCjk0ye363ZI/rf1a1bN5WWlmr//v0aN26cNm3apPT09BYt+NZbbyk5OVkJCQmSpKysLK1cuTLiAKiurlE4bF36B6NcIBCvYND+I66dqOlE3XNP/E71W1V1xpG6dgkE4tt8j04yeX67Z/d6PRd84RzRewDPPvusXnrpJZWWlurs2bNatGiRnnnmmRY1c/PNN2vHjh2qra2VZVnavHmzfvCDH7SoFgCg5SIKgNdee01//vOf1b59e1111VUqKipqOjlcc/Xp00dpaWnKysrSkCFDFAqFdO+997aoFgCg5SLaBeTz+RQTE9N0u0OHDvL5Wr5v9t577+VJHwBaWUTP4l27dtXWrVvl8XjU0NCgJUuWqFu3bk73BgBwUEQB8Oijjyo/P18fffSRbrnlFiUmJurJJ590ujcAgIMiCoAuXbpo2bJlqqurU2Njo+LizPkoJgB8XUUUAEuXLv3K+3/1q1/Z2gwAwD0RBcCBAweavm5oaNA//vEPJScnO9YUAMB5EQXA3Llzz7tdWVmpRx55xJGGAADuiOg4gC/r0qWLPv74Y7t7AQC4qNnvAViWpffff7/pVA4AgOjU7PcApM+PC8jPz3ekIQCAO1r0HgAAIPpFFAC5ubkXvRTkCy+8YFtDAAB3RBQA3//+93Xo0CGNGDFCfr9f69atUygUUlpamtP9AQAcElEAvPPOO1q5cqXatWsnSfr5z3+uESNGaODAgY42BwBwTkQfAz1x4oTq6+ubbn/22Wc6e/asY00BAJwX0RZAenq67r77bt1xxx2yLEslJSXKy8tzujcAgIMiCoAJEyaoZ8+e+vvf/67Y2Fg9/vjjuvXWW53uDQDgoIiPBO7SpYtuvPFGTZw4UX6/38meAAAuiCgAXnnlFU2dOlXPPfeczpw5o/vvv19FRUVO9wYAcFBEAbB8+XKtWrVKcXFxSkhI0Jo1a7Rs2TKnewMAOCiiAPB6veddBKZr165NHwkFAESniAKgY8eO2r9/f9PRwOvXr9eVV17Z4kU3b96srKws3XnnnZo1a1aL6wAAWi6iTwEVFBRowoQJOnr0qPr06aPY2FgtXry4RQseO3ZMM2bM0OrVq5WQkKB77rlH27ZtU0pKSovqAQBaJqIAOHv2rNatW6eysjI1NjbqO9/5Tos/CfTGG29o8ODBuvrqqyVJhYWFio2NbVEtoCWCobACgXhba9Y3hHT6VJ2tNQGnRRQADz30kEpKStS9e/fLXvDIkSPy+/0aN26cKioq1K9fP02cOPGy6wKR8vu8enjhNltrLpjAFiyiT0QBcNNNN2nDhg368Y9/rCuuuKLp/o4dOzZ7wcbGRr399tt68cUXdcUVV+jXv/611q5dq6ysrIgen5AQd+kf+prw+yP69bR6Tep+zu6tCrvrRRuT53dr9oj+F2zatEmlpaXn3efxeLR///5mL/jNb35TycnJ6tSpkyTpF7/4hfbt2xdxAFRX1ygctpq9brQJBOIVDIZsr+tETSfqnnuCjpZ+Jamq6oxttQKBeFvrRRuT57d7dq/Xc8EXzhEFwHvvvWdbM/3799fkyZN1+vRpfeMb39Cbb76p22+/3bb6AIDIXPRjoI8++mjT1ydOnLBlwcTERI0dO1YjR47U4MGD9a1vfUvDhg2zpTYAIHIX3QJ4//33m74eM2aM1q5da8uiw4cP1/Dhw22pBQBomYtuAViW9ZVfAwCiX8RnA73YNYEBANHnoruAwuGwTp06Jcuy1NjY2PT1OS35GCgAoG24aAAcOHBAvXv3bnrST0pKavpeSz8GCgBoGy4aAB9++KFbfQAAXBbxewAAgK8XAgAADEUAAIChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQFz0dNIDIBENhBQLxttYMBOJV3xDS6VN1ttYFziEAABv4fV49vHCbffX8PgWDIS2YkGJbTeDLWnUX0BNPPKEpU6a0ZgsAYKxWC4CdO3dq7dq1rbU8ABivVXYBnTx5UoWFhRo3blzUX3ayw5XtFRvDnjQA0adVnrmmT5+uSZMmqaKiotmPTUiIc6Cjy1OweLvtNefc/zP5/fb/epyoSV1nap6rZ/eby9HC1Lkl92Z3PQBWr16trl27Kjk5WWvWrGn246uraxQOWw501jKBQLyCwZAjtZ2oGy29nnvyi5Z+7a557k1gSaqqOmNb3WgRCMQbObdk/+xer+eCL5xdD4CNGzeqqqpKmZmZOnXqlGprazVnzhwVFBS43QoAGM31AFi6dGnT12vWrNHu3bt58geAVsCRwABgqFb9+EpWVpaysrJaswUAMBZbAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQBAAAGIoAAABDEQAAYCgCAAAMRQAAgKEIAAAwFAEAAIYiAADAUFzNHGjDgqGwI9eHrW8I6fSpOtvrIroQAEAb5vd59fDCbbbXXTAhxfaaiD7sAgIAQxEAAGAoAgAADEUAAIChWuVN4EWLFqmkpESSlJKSovz8/NZoAwCM5voWwI4dO/TWW29p7dq1evXVV/XBBx/ojTfecLsNADCe61sAgUBAU6ZMUUxMjCSpe/fuOn78uNttAIDxXA+AG2+8senrsrIylZSU6KWXXnK7DcBoHGAGqRUPBDt48KDuu+8+5efn6/rrr4/4cQkJcc411UJ+vzP/jE7UjaZeo62u3TXP1XOkV59XBYu32153zv0/sy1YnAioaOHW7K0SAHv27NH48eNVUFCgtLS0Zj22urpG4bDlUGfNFwjEKxgMOVLbibrR0uu5J71o6dfumn6/r6leNP0bSFJV1ZnLrhEIxNtSJxrZPbvX67ngC2fXA6CiokIPPPCACgsLlZyc7PbyAID/53oALFmyRPX19Zo3b17TfdnZ2crJyXG7FQA2s/O9hXN1eF/BOa4HwLRp0zRt2jS3lwXgArtOXvfFXWCcuM45HAkMAIYiAADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChuCg8ANiow5XtFRtzeU+tX3UwnRMHxBEAAGCj2BjfZR0M98WD4L7IiQPi2AUEAIYiAADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChjDkOwI6DMwC4z86rjJ1ft1F+Xzvb60YTY54RL/fgjAvhakWAs+y6ytiXLZiQYvxzAruAAMBQBAAAGIoAAABDtUoAbNiwQYMHD1ZqaqpWrFjRGi0AgPFcfxO4srJShYWFWrNmjWJiYpSdna2kpCTdcMMNbrcCAEZzPQB27Nih3r17q2PHjpKkgQMHqrS0VL/5zW8ierzX62nx2lfFx7b4sV+XutHSq8/vc6TuOW3939bn9ykUbGd73S9qy3W/OL9dNb9KW6z75dm/qCXPfxd7jMeyLKvZFS/Dn/70J9XW1mrSpEmSpNWrV2vfvn2aOXOmm20AgPFcfw8gHA7L4/lvIlmWdd5tAIA7XA+Aq6++WlVVVU23q6qq1LlzZ7fbAADjuR4At912m3bu3KkTJ06orq5Or7/+uvr27et2GwBgPNffBO7SpYsmTZqkvLw8BYNBDR8+XD/84Q/dbgMAjOf6m8AAgLaBI4EBwFAEAAAYigAAAEMRAABgKAKgjaipqVF6errKy8slfX7KjIyMDKWmpqqwsLCVu3PWokWLlJaWprS0NM2fP1+SWfMvXLhQgwcPVlpampYuXSrJrPnPeeKJJzRlyhRJZs2fm5urtLQ0ZWZmKjMzU3v37nVvfgut7p///KeVnp5ufe9737OOHTtm1dXVWSkpKdbRo0etYDBojR492tq6dWtrt+mI7du3W3fffbdVX19vNTQ0WHl5edaGDRuMmX/Xrl1Wdna2FQwGrbq6Oqt///7W/v37jZn/nB07dlhJSUnW5MmTjfr7D4fDVp8+faxgMNh0n5vzswXQBhQVFWnGjBlNR0Tv27dP1113na699lr5fD5lZGSotLS0lbt0RiAQ0JQpUxQTEyO/36/u3burrKzMmPlvvfVWvfDCC/L5fKqurlZjY6NOnz5tzPySdPLkSRUWFmrcuHGSzPr7//e//y1JGj16tIYMGaLly5e7Oj8B0AbMnj1bP/nJT5puf/LJJwoEAk23O3furMrKytZozXE33nijbrnlFklSWVmZSkpK5PF4jJlfkvx+v55++mmlpaUpOTnZqN+/JE2fPl2TJk1Shw4dJJn193/69GklJyfrmWee0fPPP6+XX35Zx48fd21+AqANMvGEeQcPHtTo0aOVn5+va6+91rj5x48fr507d6qiokJlZWXGzL969Wp17dpVycnJTfeZ9Pffq1cvzZ8/X/Hx8erUqZOGDx+up59+2rX5XT8VBC7NtBPm7dmzR+PHj1dBQYHS0tK0e/duY+Y/dOiQGhoa9N3vflft27dXamqqSktL1a7df88H/3Wef+PGjaqqqlJmZqZOnTql2tpaffzxx8bM//bbbysYDDYFoGVZ6tatm2t//2wBtEGJiYk6fPiwjhw5osbGRhUXF39tT5hXUVGhBx54QE8++aTS0tIkmTV/eXm5pk2bpoaGBjU0NGjTpk3Kzs42Zv6lS5equLhY69at0/jx4zVgwAA999xzxsx/5swZzZ8/X/X19aqpqdHatWv129/+1rX52QJog2JjYzVv3jw9+OCDqq+vV0pKigYNGtTabTliyZIlqq+v17x585ruy87ONmb+lJQU7du3T0OHDlW7du2UmpqqtLQ0derUyYj5v4pJf//9+/fX3r17NXToUIXDYY0cOVK9evVybX5OBgcAhmIXEAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAjjR49WidOnLjsn9m1a5fS09Mvud5NN930lbU2bdqkWbNmSfr8tMClpaUqLy9Xr169LlkTuFwcCAYjbd++3ZafuVy33367br/9dsfXAb4KWwAwztSpUyVJ99xzj3bv3q3c3FxlZGRoyJAhevXVV//nZyoqKrRlyxZlZ2crKytL/fr101NPPdXsdZ966indddddyszM1JYtWyRJa9as0X333WfLXEBzsQUA48ydO1dr1qzRsmXLNGLECOXn5ys1NVWVlZX65S9/qeuuu+68n7nqqquUn5+vefPm6frrr1dlZaX69++vvLy8Zq17zTXX6PHHH9eBAweUm5urkpIShyYEIkMAwFiHDh1SfX29UlNTJUldunRRamqq3nzzzfP2wXs8Hv3xj3/U1q1bVVxcrEOHDsmyLNXV1TVrvZycHElSjx491L17d7377rv2DQO0ALuAYCyPx/M/51m3LEuhUOi8+2pra3XXXXfpgw8+UM+ePZWfny+fz6fmnkbL6/3vf7dwOCyfj9dfaF0EAIzUrl07devWTT6fT6+//rokqbKyUn/961912223Nf1MKBTSkSNHVFNTo4kTJ2rAgAHatWuXGhoaFA6Hm7Xm2rVrJUkffPCBjh49qsTERHuHApqJlyAw0qBBgzRq1CgtXrxYs2bN0h/+8Ac1NjbqgQceUO/evZt+Jjc3VwsXLlS/fv105513KiYmRj169NANN9ygI0eOKCYmJuI1jx07pqFDh8rj8ej3v/+9Onbs6NB0QGQ4HTQAGIotAMAGzz33nDZs2PCV3xszZoyGDBnickfApbEFAACG4k1gADAUAQAAhiIAAMBQBAAAGIoAAABD/R+g2LBfFQqybgAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with Modin df\n",
    "sns.histplot(data=modin_tips, x=\"total_bill\", stat='frequency')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='total_bill', ylabel='Frequency'>"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZRUlEQVR4nO3de3BU9fnH8c8uu8lgE0TSBSla7aBo6SXSiyGWEsAakCQEA8WEmUQKjFKtXDoaICKMchVsIxaZ1sogCiihgkAkqZbbKFCoaEEdFEoJEMnEGMolJiS72fP7wx+pWIFNOOck6/f9+iu7yT7f5yFhP3vO7jnHY1mWJQCAcbyt3QAAoHUQAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQvtZuoLn+85/PFA5//Q9dSEiIU3V1TWu30WqYn/lNnd/u2b1ej6666htf+b2oC4Bw2DIiACQZM+eFMD/zm8qt2dkFBACGIgAAwFAEAAAYigAAAEMRAABgKEcDoKamRunp6SovLz/v/uXLlys3N9fJpQEAl+BYAOzdu1c5OTkqKys77/5//etfevbZZ51aFgAQIceOAygqKtKMGTOUn5/fdF9DQ4OmT5+u8ePHa926dU4tjQvocGV7xcbY/yuvbwjp9Kk62+sCcJZjATB79uz/ue93v/udhg0bpmuuuabFdRMS4i6nragSCMTbXrNg8Xbba865/2eO9OpEzWjC/ObO79bsrh0JvH37dlVUVGjq1KnatWtXi+tUV9cYcYRgIBCvqqozttcMBkO21jzHiV7trhlNmN/c+e2e3ev1XPCFs2sBUFxcrIMHDyozM1O1tbX69NNPNXHiRD311FNutQAA+ALXAmDu3LlNX+/atUuLFi3iyR8AWhHHAQCAoRzfAti8efP/3JeUlKSkpCSnlwYAXARbAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQUXdReFMEQ2Gjz4UCwHkEQBvl93n18MJtttZcMCHF1noAohu7gADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKEcDoKamRunp6SovL5ckrVq1Sunp6crIyNDUqVPV0NDg5PIAgItwLAD27t2rnJwclZWVSZIOHz6sJUuW6OWXX9b69esVDoe1cuVKp5YHAFyCYwFQVFSkGTNmqHPnzpKkmJgYzZgxQ3FxcfJ4POrRo4eOHz/u1PIAgEtw7HoAs2fPPu92t27d1K1bN0nSiRMntGLFCs2dO9ep5QEAl+D6BWEqKys1duxYDRs2TElJSc1+fEJCnANdtU1+v/2/HidqSnLk6mWmXxGN+c2d363ZXQ2AQ4cOaezYscrNzdXo0aNbVKO6ukbhsGVzZ21PIBCvYDBke10nakpSVdUZW+sFAvG214wmzG/u/HbP7vV6LvjC2bUAqKmp0ZgxYzRx4kQNHTrUrWUBABfg2nEAf/nLX/Tpp59q6dKlyszMVGZmphYuXOjW8gCAL3F8C2Dz5s2SpFGjRmnUqFFOLwcAiBBHAgOAoQgAADAUAQAAhnL9OAB8/QRDYUc+t9zhyvY6farO9roAPkcA4LL5fV49vHCbvTX9Ps25/2e21gRwPnYBAYChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQBAAAGIoAAABDEQAAYCgCAAAM5WgA1NTUKD09XeXl5ZKkHTt2KCMjQ6mpqSosLHRyaQDAJTgWAHv37lVOTo7KysokSWfPnlVBQYEWL16sjRs36v3339e2bfZeRQoAEDnHAqCoqEgzZsxQ586dJUn79u3Tddddp2uvvVY+n08ZGRkqLS11ankAwCU4dk3g2bNnn3f7k08+USAQaLrduXNnVVZWNrtuQkLcZfcWLfx++389TtR0sq4TF5uPFibPLpk9v1uzu3ZR+HA4LI/H03TbsqzzbkequrpG4bBlZ2ttUiAQr2AwZHtdJ2o6UfdcoFRVnbG1brQIBOKNnV0ye367Z/d6PRd84ezap4CuvvpqVVVVNd2uqqpq2j0EAHCfawGQmJiow4cP68iRI2psbFRxcbH69u3r1vIAgC9xbRdQbGys5s2bpwcffFD19fVKSUnRoEGD3FoeAPAljgfA5s2bm75OTk7W+vXrnV4SABABjgQGAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhoooAF588UXV1NQ43QsAwEURBcBHH32kgQMH6pFHHtF7773ndE8AABdEdCDYrFmzVFNTow0bNuixxx6TZVnKyclRRkaGYmNjne4RAOCAiN8DiIuL06BBg5Senq6TJ09q5cqVGjRo0HlH+gIAokdEWwA7d+7UqlWrtHPnTg0cOFDPPPOMbr75Zh09elQjR47UgAEDnO4TAGCziALgscce08iRIzVz5kzFx//3QgXf/va3NWLECMeaAwA4J6IAWL9+vUpLSxUfH6+qqiq99tprysvLk9fr1fjx453uEYYKhsKOXBmpviGk06fqbK8LRJuIAmDmzJn67LPPNGTIEHm9Xu3Zs0fl5eWaNm2a0/3BYH6fVw8v3GZ73QUTUmyvCUSjiALg3XffVXFxsSQpISFBCxcuVGZmpqONAQCcFdGngILBoBoaGppuh0LOXFcWAOCeiLYA+vXrpzFjxigzM1Mej0fFxcVKSWEzGgCiWUQBkJ+frxUrVmjTpk3y+Xy64447lJ2d7XRvAAAHRRQA7dq1U15envLy8pzuBwDgkogC4G9/+5vmzJmjU6dOybKspvvfeecdxxoDADgrogBYsGCBpkyZop49e8rj8Vz2ouvWrdOzzz4rSerbt68mT5582TUBAM0TUQB06NBBqamptixYV1en2bNnq7S0VB06dFBOTo527Nih2267zZb6AIDIRPQx0MTERG3bZs8BOY2NjQqHw6qrq1MoFFIoFOKMogDQCiLaAti2bZuWL18uv98vv98vy7Lk8Xha9B5AXFycJkyYoDvvvFPt27fXT3/6U/3oRz9qdh0AwOWJKACef/552xb88MMP9corr2jLli2Kj4/XQw89pCVLlmjs2LERPT4hIc62Xto6vz+iX0+r14zGuk6cY8hu0dCjk0ye363ZI/rf1a1bN5WWlmr//v0aN26cNm3apPT09BYt+NZbbyk5OVkJCQmSpKysLK1cuTLiAKiurlE4bF36B6NcIBCvYND+I66dqOlE3XNP/E71W1V1xpG6dgkE4tt8j04yeX67Z/d6PRd84RzRewDPPvusXnrpJZWWlurs2bNatGiRnnnmmRY1c/PNN2vHjh2qra2VZVnavHmzfvCDH7SoFgCg5SIKgNdee01//vOf1b59e1111VUqKipqOjlcc/Xp00dpaWnKysrSkCFDFAqFdO+997aoFgCg5SLaBeTz+RQTE9N0u0OHDvL5Wr5v9t577+VJHwBaWUTP4l27dtXWrVvl8XjU0NCgJUuWqFu3bk73BgBwUEQB8Oijjyo/P18fffSRbrnlFiUmJurJJ590ujcAgIMiCoAuXbpo2bJlqqurU2Njo+LizPkoJgB8XUUUAEuXLv3K+3/1q1/Z2gwAwD0RBcCBAweavm5oaNA//vEPJScnO9YUAMB5EQXA3Llzz7tdWVmpRx55xJGGAADuiOg4gC/r0qWLPv74Y7t7AQC4qNnvAViWpffff7/pVA4AgOjU7PcApM+PC8jPz3ekIQCAO1r0HgAAIPpFFAC5ubkXvRTkCy+8YFtDAAB3RBQA3//+93Xo0CGNGDFCfr9f69atUygUUlpamtP9AQAcElEAvPPOO1q5cqXatWsnSfr5z3+uESNGaODAgY42BwBwTkQfAz1x4oTq6+ubbn/22Wc6e/asY00BAJwX0RZAenq67r77bt1xxx2yLEslJSXKy8tzujcAgIMiCoAJEyaoZ8+e+vvf/67Y2Fg9/vjjuvXWW53uDQDgoIiPBO7SpYtuvPFGTZw4UX6/38meAAAuiCgAXnnlFU2dOlXPPfeczpw5o/vvv19FRUVO9wYAcFBEAbB8+XKtWrVKcXFxSkhI0Jo1a7Rs2TKnewMAOCiiAPB6veddBKZr165NHwkFAESniAKgY8eO2r9/f9PRwOvXr9eVV17Z4kU3b96srKws3XnnnZo1a1aL6wAAWi6iTwEVFBRowoQJOnr0qPr06aPY2FgtXry4RQseO3ZMM2bM0OrVq5WQkKB77rlH27ZtU0pKSovqAQBaJqIAOHv2rNatW6eysjI1NjbqO9/5Tos/CfTGG29o8ODBuvrqqyVJhYWFio2NbVEtoCWCobACgXhba9Y3hHT6VJ2tNQGnRRQADz30kEpKStS9e/fLXvDIkSPy+/0aN26cKioq1K9fP02cOPGy6wKR8vu8enjhNltrLpjAFiyiT0QBcNNNN2nDhg368Y9/rCuuuKLp/o4dOzZ7wcbGRr399tt68cUXdcUVV+jXv/611q5dq6ysrIgen5AQd+kf+prw+yP69bR6Tep+zu6tCrvrRRuT53dr9oj+F2zatEmlpaXn3efxeLR///5mL/jNb35TycnJ6tSpkyTpF7/4hfbt2xdxAFRX1ygctpq9brQJBOIVDIZsr+tETSfqnnuCjpZ+Jamq6oxttQKBeFvrRRuT57d7dq/Xc8EXzhEFwHvvvWdbM/3799fkyZN1+vRpfeMb39Cbb76p22+/3bb6AIDIXPRjoI8++mjT1ydOnLBlwcTERI0dO1YjR47U4MGD9a1vfUvDhg2zpTYAIHIX3QJ4//33m74eM2aM1q5da8uiw4cP1/Dhw22pBQBomYtuAViW9ZVfAwCiX8RnA73YNYEBANHnoruAwuGwTp06Jcuy1NjY2PT1OS35GCgAoG24aAAcOHBAvXv3bnrST0pKavpeSz8GCgBoGy4aAB9++KFbfQAAXBbxewAAgK8XAgAADEUAAIChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQFz0dNIDIBENhBQLxttYMBOJV3xDS6VN1ttYFziEAABv4fV49vHCbffX8PgWDIS2YkGJbTeDLWnUX0BNPPKEpU6a0ZgsAYKxWC4CdO3dq7dq1rbU8ABivVXYBnTx5UoWFhRo3blzUX3ayw5XtFRvDnjQA0adVnrmmT5+uSZMmqaKiotmPTUiIc6Cjy1OweLvtNefc/zP5/fb/epyoSV1nap6rZ/eby9HC1Lkl92Z3PQBWr16trl27Kjk5WWvWrGn246uraxQOWw501jKBQLyCwZAjtZ2oGy29nnvyi5Z+7a557k1gSaqqOmNb3WgRCMQbObdk/+xer+eCL5xdD4CNGzeqqqpKmZmZOnXqlGprazVnzhwVFBS43QoAGM31AFi6dGnT12vWrNHu3bt58geAVsCRwABgqFb9+EpWVpaysrJaswUAMBZbAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQBAAAGIoAAABDEQAAYCgCAAAMRQAAgKEIAAAwFAEAAIYiAADAUFzNHGjDgqGwI9eHrW8I6fSpOtvrIroQAEAb5vd59fDCbbbXXTAhxfaaiD7sAgIAQxEAAGAoAgAADEUAAIChWuVN4EWLFqmkpESSlJKSovz8/NZoAwCM5voWwI4dO/TWW29p7dq1evXVV/XBBx/ojTfecLsNADCe61sAgUBAU6ZMUUxMjCSpe/fuOn78uNttAIDxXA+AG2+8senrsrIylZSU6KWXXnK7DcBoHGAGqRUPBDt48KDuu+8+5efn6/rrr4/4cQkJcc411UJ+vzP/jE7UjaZeo62u3TXP1XOkV59XBYu32153zv0/sy1YnAioaOHW7K0SAHv27NH48eNVUFCgtLS0Zj22urpG4bDlUGfNFwjEKxgMOVLbibrR0uu5J71o6dfumn6/r6leNP0bSFJV1ZnLrhEIxNtSJxrZPbvX67ngC2fXA6CiokIPPPCACgsLlZyc7PbyAID/53oALFmyRPX19Zo3b17TfdnZ2crJyXG7FQA2s/O9hXN1eF/BOa4HwLRp0zRt2jS3lwXgArtOXvfFXWCcuM45HAkMAIYiAADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChuCg8ANiow5XtFRtzeU+tX3UwnRMHxBEAAGCj2BjfZR0M98WD4L7IiQPi2AUEAIYiAADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChjDkOwI6DMwC4z86rjJ1ft1F+Xzvb60YTY54RL/fgjAvhakWAs+y6ytiXLZiQYvxzAruAAMBQBAAAGIoAAABDtUoAbNiwQYMHD1ZqaqpWrFjRGi0AgPFcfxO4srJShYWFWrNmjWJiYpSdna2kpCTdcMMNbrcCAEZzPQB27Nih3r17q2PHjpKkgQMHqrS0VL/5zW8ierzX62nx2lfFx7b4sV+XutHSq8/vc6TuOW3939bn9ykUbGd73S9qy3W/OL9dNb9KW6z75dm/qCXPfxd7jMeyLKvZFS/Dn/70J9XW1mrSpEmSpNWrV2vfvn2aOXOmm20AgPFcfw8gHA7L4/lvIlmWdd5tAIA7XA+Aq6++WlVVVU23q6qq1LlzZ7fbAADjuR4At912m3bu3KkTJ06orq5Or7/+uvr27et2GwBgPNffBO7SpYsmTZqkvLw8BYNBDR8+XD/84Q/dbgMAjOf6m8AAgLaBI4EBwFAEAAAYigAAAEMRAABgKAKgjaipqVF6errKy8slfX7KjIyMDKWmpqqwsLCVu3PWokWLlJaWprS0NM2fP1+SWfMvXLhQgwcPVlpampYuXSrJrPnPeeKJJzRlyhRJZs2fm5urtLQ0ZWZmKjMzU3v37nVvfgut7p///KeVnp5ufe9737OOHTtm1dXVWSkpKdbRo0etYDBojR492tq6dWtrt+mI7du3W3fffbdVX19vNTQ0WHl5edaGDRuMmX/Xrl1Wdna2FQwGrbq6Oqt///7W/v37jZn/nB07dlhJSUnW5MmTjfr7D4fDVp8+faxgMNh0n5vzswXQBhQVFWnGjBlNR0Tv27dP1113na699lr5fD5lZGSotLS0lbt0RiAQ0JQpUxQTEyO/36/u3burrKzMmPlvvfVWvfDCC/L5fKqurlZjY6NOnz5tzPySdPLkSRUWFmrcuHGSzPr7//e//y1JGj16tIYMGaLly5e7Oj8B0AbMnj1bP/nJT5puf/LJJwoEAk23O3furMrKytZozXE33nijbrnlFklSWVmZSkpK5PF4jJlfkvx+v55++mmlpaUpOTnZqN+/JE2fPl2TJk1Shw4dJJn193/69GklJyfrmWee0fPPP6+XX35Zx48fd21+AqANMvGEeQcPHtTo0aOVn5+va6+91rj5x48fr507d6qiokJlZWXGzL969Wp17dpVycnJTfeZ9Pffq1cvzZ8/X/Hx8erUqZOGDx+up59+2rX5XT8VBC7NtBPm7dmzR+PHj1dBQYHS0tK0e/duY+Y/dOiQGhoa9N3vflft27dXamqqSktL1a7df88H/3Wef+PGjaqqqlJmZqZOnTql2tpaffzxx8bM//bbbysYDDYFoGVZ6tatm2t//2wBtEGJiYk6fPiwjhw5osbGRhUXF39tT5hXUVGhBx54QE8++aTS0tIkmTV/eXm5pk2bpoaGBjU0NGjTpk3Kzs42Zv6lS5equLhY69at0/jx4zVgwAA999xzxsx/5swZzZ8/X/X19aqpqdHatWv129/+1rX52QJog2JjYzVv3jw9+OCDqq+vV0pKigYNGtTabTliyZIlqq+v17x585ruy87ONmb+lJQU7du3T0OHDlW7du2UmpqqtLQ0derUyYj5v4pJf//9+/fX3r17NXToUIXDYY0cOVK9evVybX5OBgcAhmIXEAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAjjR49WidOnLjsn9m1a5fS09Mvud5NN930lbU2bdqkWbNmSfr8tMClpaUqLy9Xr169LlkTuFwcCAYjbd++3ZafuVy33367br/9dsfXAb4KWwAwztSpUyVJ99xzj3bv3q3c3FxlZGRoyJAhevXVV//nZyoqKrRlyxZlZ2crKytL/fr101NPPdXsdZ966indddddyszM1JYtWyRJa9as0X333WfLXEBzsQUA48ydO1dr1qzRsmXLNGLECOXn5ys1NVWVlZX65S9/qeuuu+68n7nqqquUn5+vefPm6frrr1dlZaX69++vvLy8Zq17zTXX6PHHH9eBAweUm5urkpIShyYEIkMAwFiHDh1SfX29UlNTJUldunRRamqq3nzzzfP2wXs8Hv3xj3/U1q1bVVxcrEOHDsmyLNXV1TVrvZycHElSjx491L17d7377rv2DQO0ALuAYCyPx/M/51m3LEuhUOi8+2pra3XXXXfpgw8+UM+ePZWfny+fz6fmnkbL6/3vf7dwOCyfj9dfaF0EAIzUrl07devWTT6fT6+//rokqbKyUn/961912223Nf1MKBTSkSNHVFNTo4kTJ2rAgAHatWuXGhoaFA6Hm7Xm2rVrJUkffPCBjh49qsTERHuHApqJlyAw0qBBgzRq1CgtXrxYs2bN0h/+8Ac1NjbqgQceUO/evZt+Jjc3VwsXLlS/fv105513KiYmRj169NANN9ygI0eOKCYmJuI1jx07pqFDh8rj8ej3v/+9Onbs6NB0QGQ4HTQAGIotAMAGzz33nDZs2PCV3xszZoyGDBnickfApbEFAACG4k1gADAUAQAAhiIAAMBQBAAAGIoAAABD/R+g2LBfFQqybgAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with pandas df\n",
    "sns.histplot(data=pandas_tips, x=\"total_bill\", stat='frequency')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='total_bill', ylabel='tip'>"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEKCAYAAAD0Luk/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABA7ElEQVR4nO29eZhU5Zn//T1bbb1vNItBQjOIWUg0TBAnV1wYGbAjKMEJ0ZBMzO991Z+jwcmko8YxE18TtN+8cUkMZoxijBI1pCVuEAhKflGwDaJ2UBBpAw3SNL1X13q25/3j1Dld1V3VXdVdVaeqz/25Li851VXnPM+pU/d9P/dzLxxjjIEgCIJwJLzdAyAIgiDsg5QAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEg7FVCdxzzz245ZZb7BwCQRCEo7FNCezduxfPPvusXZcnCIIgAIh2XHRgYAD33nsvrrvuOhw6dCijz/b3B6Hrzshvq6kpRW9vwO5h2IKT5w7Q/Gn+2Zs/z3OoqipJ+XdblMAdd9yBm2++GZ2dnRl/dqzJTEVqakrtHoJtOHnuAM2f5p+f+eddCfzud7/DjBkzsGTJErS0tGT8+d7egGNWAnV1ZejuHrJ7GLbg5LkDNH+af/bmz/PcmAol70rgpZdeQnd3N1atWoXBwUGEQiH8+Mc/xm233ZbvoRAEQTievCuBTZs2Wf9uaWnBG2+8QQqAIAjCJihPgCAIwsHYsjFssnr1aqxevdrOIRAEQRQ0be092N7agZ7BCGorPFi+eDYWNtRm7fy2KgGCIAgiNW3tPXhy52EIAg+fR8RAUMaTOw8DQNYUAbmDCIIgCpTtrR0QBB5uSQDHcXBLAgSBx/bWjqxdg5QAQRBEgdIzGIFLTBTTLpFHz2Aka9cgJUAQBFGg1FZ4IKt6wmuyqqO2wpO1a5ASIAiCKFCWL54NTdMRVTQwxhBVNGiajuWLZ2ftGrQxTBAEUaCYm78UHUQQBOFQFjbUZlXoj4SUQBbJdTwvQRBEtiElkCXyEc9LEASRbWhjOEvkI56XIAgi25ASyBL5iOclCILINqQEskQ+4nkJgiCyDSmBLJGPeF6CIIhsQxvDWSIf8bwEQRDZhpRAFsl1PC9BEES2IXcQQRCEg6GVADHloSQ+gkgNKQFiSkNJfAQxNuQOIqY0lMRHEGNDSoCY0lASH0GMjS3uoPvvvx9//OMfwXEc1qxZg29+85t2DINwALUVHgwEZbglwXqNkviIfFLoe1J5Xwm88cYbeP311/Hcc8/h97//PX7zm9/gww8/zPcwCIdASXyEnZh7UgNBOWFPqq29x+6hWeRdCXz+85/H448/DlEU0dvbC03T4PP58j0MwiEsbKjF1ZfMR2WJC6GIisoSF66+ZH5BWWLE1KUY9qRscQdJkoQHHngAjz76KJYvX476+vq0P1tTU5rDkRUedXVldg/BNrI196V1ZVh63sezcq584uTvHpga8+8LyCjziuA4znpNFDj0B+Rx55ev+XOMMZaXKyUhHA7juuuuw6WXXoqvfOUraX2mtzcAXbdtyHmlrq4M3d1Ddg/DFpw8d4DmP1Xm37x5/6g9qaiiobLEhaarzk35uWzOn+e5MY3nvLuD2tvbcfDgQQCA1+vFsmXL8P777+d7GARBEDmnGPak8q4ETpw4gdtvvx2yLEOWZezatQuf+9zn8j0MgiCInFMMe1J53xO44IIL0NbWhssvvxyCIGDZsmVobGzM9zAIgiDyQqEXlrRlY/jGG2/EjTfeaMelCYIgiDgoY5ggCMLBkBIgCIJwMFRFlCBySKGXDCAIUgIEkSOojDVRDJA7iCByRDGUDCAIWgkQRI7oGYzA50n8iVEZ60TIXWY/tBIgiBxRW+GBrOoJr1EZ62GKocKmEyAlQBA5ohhKBtgJucsKA3IHEUSOMN0a5O5IDrnLCgNSAgSRQwq9ZICdUNe3woCUAEGANijtYPni2Xhy52FEYawAZFUnd5kNkBIgHMFYQp7i+e2B3GWFASkBYsoznpCP36AEALckIApDOJFAyi3kLrMfig4ipjzjRaH0DEbgEhN/CrRBSTgFWgkQU57xolCcvEFJeyEErQSIKc94SVtOjeenZC0CICVAOIDxhHwxtADMBZSsRQDkDioYki3Ll9aV2T2sKUE6UShO3KCkZC0CICVQEKSKXqmo8OHMWp/dw5sSOFHIj4eT90KIYWxxB/385z9HY2MjGhsb0dzcbMcQCopUy/KW3UfsHhoxhXHqXgiRSN6VwJ49e/Dqq6/i2WefxdatW/Huu+9i586d+R5GQZEqRPF0X8imERFOwKl7IUQieXcH1dXV4ZZbboHL5QIANDQ04OTJk/keRkGRalk+rZpcQVOVQgnNJDcZwTHGmF0XP3r0KL761a/it7/9LebMmWPXMGxn38Eu/LKlDaJouIKiigZVZbh29UIsOrve7uHZzr6DXWjZfQRdfSHUV/uw+sJ5RX1f6PsmCgnblMAHH3yAa6+9FjfeeCOuuOKKtD/X2xuArtumt3JG0uig8z6O7u4hu4dmC3V1ZejuHkrYNI8vMlbMbovmzftHrfyiiobKEhearjoXwPD8nQrNP3vz53kONTWlKf9uS3TQm2++iZtuugm33XYbGhsb7RhCwUHL8uRMxbo+FJpJFBJ5VwKdnZ244YYbcO+992LJkiX5vjxRZExFgUmhmUQhkffooEceeQTRaBR33303Vq1ahVWrVuG3v/1tvodBFAlTsU8vhWYShUTeVwK33347br/99nxflihSMmk8UigRN+NBdfSJQoIyhomCJl2BWWyNYWgPiCgUSAkQGZNvizsdgTkVN5AzpVhWQkRhQUqAyIhCtbin4gZyJhTq90IUPqQEHE6m1mOhWtxOj7gp1O+FKHyon4CDmUhTkUJtxej0iJtC/V6IwodWAg5mItZjoVrcuY64KXR/e6F+L0ThQ0rAwUzEj55JyGa+yVXEjV3+9kwUTyF/L0RhQ0rAwUzEenRijLsd/vZ9B7syUjxO/F6I7EBKwMFM1Hp0Qox7vBU+GIiisswNxCnLXPvbW3YfyVjxOOF7IbIPKQEHQ9Zjcka6f/xBGX3+CDiOg9dt/GRy7W/v6gvBI9FGL5F7SAk4nMlYj4W+WTpRRrp/Kkpd6PNHMTAUhccl5MXfXl/tQ3d/iDZ6iZxDIaLEhDCt5a7+MIZCMj44MYAHWw7guVc/tHtok2ZkuKXPI6GqzAXGkLc2jKsvnOfokFcif9BKgJgQ21s7oGoMQyEZAAeB56HpDC++3oE5M8qxsKG2aFcKyTbMRVFAwyyv1fQl1yw6ux5XXzK/KO8fUVyQEihi7BSyPYMRhCIKAA48Z7zGc4CmM2xv7QCAoi1jkOmGea6+B9roJfIBKYEixe5aMbUVHvQPRSDww24TBkASjM3LiYRVxgvTGXWlWHrOTNuarwPpbZjb/T0QxGQhJVCk2F0rZvni2Wj/yA9NZ+A5QwEwAF63gNoKT0IiWiiiwB9SoKo6egciaGvvGbcUdL8/bKswTdcKt/t7IIjJQkqgSMk02zfbLouFDbVoXDIbL77eAU1nkAQeXrcASeSxfPFsbG/twEBQhqbp6BuKIuYxAschqXAfKUw9kgBVY7YJ03Tvl9OrlxLFDymBIiWTbN9cuSxWfmEu5swoTyksn9x5GIMBOaYAOHAcg9ctYDAg48GWv6FhVoX1/okI01z54jO5X9ms2VMo7jDCWZASKFIy2bzMtssiHeFrHj/Y8jcwxiCJxvUDERUcAMZYgnDNVJjm0hefyf3KVs2eQnOHEc6B8gSKlIUNtbj6kvmoLHGNG7uezTLDmZSfXthQi4ZZFair8qG+2oeIolurAkkU4JYECAKP7a0do0pBR2R1TGEaL6g5jks412TJ5H5l8j2Mxcj5eFxi1uZDEGNh20ogEAhg7dq1eOihh3DGGWfYNYyiJt3Ny2y6LDJdVcRbyqqqAwA4jqG8xA1gWLiOjMgx3SEA0Lx5/6hVRy598Zner2yEcprzCUdV+IMyVI1B4IFQWJnUeQliPGxRAu+88w5uv/12HD161I7LO45slhnOVPjGC/fegQg4DqgscyetwRMvTOvqyrDr9b9bLhKOAz486ccDW9ows8YHj2TMIxdlFewoy1xb4bGyr43kO0DVGHSmJY2mIohsYYs76JlnnsEPfvADTJs2zY7LO45suSwAQ1jJMYveZDzhe7TTj46uABRNh6LqCEWUtEohbG/tgKLq6B2MoHsgAiV23dMDEfjDKkJhJSdlFbJ5v9Jl+eLZCIQUMAZwYNB1ABxQ4hHJJUTkFI4xxuy6+MUXX4zHH3+c3EFFxL6DXfhlSxtE0fDDRxUNqspw7eqFWHR2/aj3/3bHITy98zDAAQIHKBoDY0CJV8LcWRVYfeG8pJ8DgK/9YBuCYQWqlviI8hwws64EoiCgrMSF030hTKv2jXmuYuBrP9iGqKxB1XRIIo/KUjd8HhGBsIpfff8Su4dHTFGKLjqotzcAXbdNb+WVuroydHcP2T2MBM6s9WHt0nmjooPOrPUlHevW3e0AAIEztoQlgYOmMzCd4eY1CwEg6efq6sqgKDpME8XMMzCT0niOQyAk445vLEr4XKHdr0yYUe2z9iIkkYei6ghGVFSVuop6XhOhEJ/9fJLN+fM8h5qa0pR/LzolQNhPJhuhEVmFYBYXisFzxuvjIQocogozQkrjXucwNcsqx+9FGHOnyqFE7iElQEyasfIGPC4RUUWDEKcHdGa8Ph4za0vQ1R9GIKxAUY3wUp6HUbF0CgrH+E30/oCMqlLXqByMYq3MShQupAQIi4kImPgkJ4DhgxMDONQxAIHnMKPGh880VKP10GlourEC0BnAwLDsH8ffBzIt45oKDzRNx2BAhqrpmFbpwaIF07C9tQNP7DicU2GYb6FrrrKSuQOoWF1xUuiK21Yl8PLLL9t5eSKOiQiYtvYe/M9z7yGiaOA5JGzgajpDZ28Q/pCCxQum4Z32PkRkFR6XiGX/eAZWfmHuuGMamTswd2a5Zf3nWhi2tfdgy+52nOwJQhR4lJdItgtdKlZXfBSD4qaVgEMZaZ0EQnJGAsZ8uKOKCoHjEhSA6cPnOB6RqIrjpwOYXV9qXWvOjPK0x5ls/6F58/6cCkNzboMBGRzHQWcM/QEZ1WVuK4vXjh8wFasrPopBcZMScCDJrJPTfSFUl7sTkq/GEjDmwy2JRrVPFrd1aygAw/2jqDpO9oYwjSFrllAmwnAiS3Fzbjpj1qa2zhj8IQX1VV7bhG66mcyF7n5wEsWguEkJFCht7T3YtaUNnd2BrP+QzSQss8a/KBoZuf6gghKvy3rfWBE45sNdXuJCnz8yKoJH4DnoDAAHiFm2hDIRhhNZiptzEwUeqmb0S+BglL2wMyopnUzmybgfnnv1Q+z464mM3XZEarJZsiVXkBIoQMwfstsl5MSP+FFPEKGoUc2T5wBN08FgPJxRRUsqYEZal0zXcao3ZDSV4Y0y0WZMv8Aj9m8jw7ei1JVw/claQumWdZjoUtz84ZoKTmdcbF6crVFJ6XQ8m+icn3v1Qzy35yg4cBB4Izz1uT1HAYAUwSSwowRJppASKEDMH7LHJUKJ1cfJph9R0xjAjCQSIObDZ0Yilz8gJ1iCZsP4eOvyVF8Ig0EZgJG0xRgDx3HwShx8Hgn+oAKAob66BACgjkjuG2kJJXNfLK0rSzn+dNs/TnQpbv5wBYFHVZk7FpXEML3GizUXzbPVtTJejsZE57zjrycsBQAY2d2abrxOSmDiZNKq1C5ICRQgufYjmklYOuOssE2dMeiMoabUA58qwB9U8NxrR7Hv/W4ASLAuw7IGnuPA84bQUDUdAs+hpsKLO7+1OOFa1gYykltCqdwXFRU+nFnrSzmHdBLWJroUTxWVVEg/3FRMdM6TSeojxiYbVWZzCSmBAsT8Ibty5Ec0k7DCURWqpkMUeDBwEDjD3dEf6wbGmOE6YsxQHNXlHnjdIlRVN/oKs2FrnzGGUGRYYMRb9x6JBzgOoYg6yhJK5b5o2X3EKisxUSazFC/0H24qJjrnyST1EcUNfcMFiPlDjsgqeI6btB9xpLtlwexK9PojqCxzW4LidF8I5eUu+EOK1fmLAdZur6oxnO4Pg+cNF5DOACmu8Uq8khpp3cuqDk3V8LVloytxplr1nO4LTWiu8RTDUjzbTHTOy/7xDDy35+iEkvqI4oaUQAFi/mB3vXUyaXRQJiGApkBWVB3hqIa+oSjaT/qxaH4t+gOydQ6RA1QGqKpsbBanqNGn64Ae0wwe3tgPGKmkRlr3ZrbvyL7CQGr3xbTqRFfQRMMei9WinwwTmbPp96foIOdhaynpieD0KqLxVnb8cj9Vvfvmzftxqi+EobBh4XMwrDye43DD6k8lKBYzQUpnbFT55pEIPAee4+BxC6gocQGMIaIYq4GPeoKoKnOD4ziEIgr6hqLW6qKuypcw3lTz+d9rPmvtCZjvUTWGUESBqungOR6NS2bnRUjZEXdvdxVNu3MN7J6/3eSziij1GC4yMu2t2zMYQTiqxcJBOXAcF4vh1xM+YzZSmVbpga4zcFzS0yVQU+lBRYnL6CkQlwwWiWrwx6KHTPdSsr7C8dcd2cAlvi/A9tYOqBrDUEiGpscKyOk6/vDaUax/4C9o3rw/aY/jbJBJT+WpghPn7GTSdgcNDg5CEASUlqbWKIRBLq2o8SKHRl7b4xJiljiDorNY5ypAELhR0UamG6GtvQdbXjmCEz3J/fIcAFE0LPfOniBqKr3QNB1dQ1Goqm4knoUMJRCVtdhnGEpjiWgjI53SCXsMRRQAZjQTg7kYjCpaTuuxFEPaf7Zx4pydzLhK4MMPP8R3v/tdHDx4EBzH4ZxzzkFzczNmzpyZj/EVHbkuGDVWCGC822QoGE0ZUspgFHjzuISkfzeF8nOvfojn9xyDNtL9xgHlPinWZpKDqmpWRBHPGefWGTAQkK2P8DyHQFiBSxLA81xGkU4el4CeQbNJPazNai52rVwKqUJO+8+VsVHIcyayz7hK4NZbb8WVV16JL3/5y2CM4emnn8b3v/99bNq0KR/jKzpybUWNFQJouk0GA1GMtW0i8ADAAeNsB638wlzMmVGOLbvb0RnLDhZ4oCpWSE3TdNRXeXB6IGK5mzRdT7i2wBtJR8bWE4eBoSgqYnXy06GtvcdyLQGJQ+ZglKQAhoVUtgVjJiUqtrxyBF39ERiJcj6subAhZ5ZzLo2NYih1QGSPcZVAOBzG2rVrreN169bhmWeeyemgiplcW1FjhQA+seNwrIm78d74lozx8DyPihIJEUXHeMS7auIFbGXJsCB/YEsbOM4osKOPOCUXM911BkuCp9rEjj//jLpSLD1nJra3dsDnlazaRvFz4XkO5SWGi0lWdXhcAh598SAisgZNZ/AHZTz64kFc03h22oIxWTjtawdOjVuv59EXDyIYUcHFdGtnbxCPvnQI11y6ICeKIJfGRjGUOiCyx7hKYO7cudi/fz/OPfdcAMDhw4epMfwYpGNFTdZajffdDzdW6YDHJaB/SE8QlCMVgChwVvTP9GpvRnNL5bufWVuC0/1h6LHcAoE3lAEDRkUZaTrDb7YfwrrlicJxpGV7snsIDz57AKqmwyUKKC+RUFclwh+UEZU1MABlPhc8LsFqw6jIuiWIzb2DYETFlleOjBp3su8AGN2n4LUDp/BPn5qOQx0DY9bricha7LoxZcg4RKJqRkI5mRJM9dlcGhtOzK9wMuMqgZMnT2LdunU466yzIIoi3nvvPdTV1eGyyy4DADz//PM5H2QxMZ4Vla1lfLLzhMLG5qkp+kcqgGGXjSEcF8yuRPPm/ZP+oa+5sMEaS78/ClXTwfEAS+GT6h2S8bPf/w0lHhEza0ssV5Zp2fb7IxgKDVv9sqKhb0hHdZkb9dU+RBUNIgeU+lwJY3+w5cCwII7NVweLuWjGvnePvnQIUdnIoJZEAeUlLnjdIqIADnUMoOmqc1POv2cwYhTSi4uoMvZG9LSF8sgx9fvDYz4XuXbZODG/wqmMqwT+8z//Mx/jmDKMZ0VteeUIBgMyNJ1BFHmU+6QJNSpJ5g4AjCze/kB0lFuG52DVyBcFI0z0tQOnrNj7/qEI2j/yTyj2Pn7OwbACXWco9UkYCERTbjtoOkuI7InIGqrK3AhFFPhDSsJ7GQBdY/AHFWsvYm1Sl1KskmmcMGYM4LjEQYy8d7rOEAwb4aeSYDTI6fNHUF3uiW1Kjy3Iays88AeN/Arz0jozQlnTFcojx+SRjD4NqZ4LctkQ2SKlEmhvb0dDQwNKSkqS/v2Tn/xkzgZV7KSyotrae3CyNwSe56wSzn1DUVSVujJexqdyB6iqjpu+vNDapFQ1HbzAoSZW9wcwwir9scqYQyEZABeLvWd48fUOzJlRnrEVmGzvIBCSocTcQcn2J8zIHr+sIRhWEYyoMHMXh7uTDZcxUFTN2otINr76ah86e4MJhfHiq5mmunfGxrNxRQbzehz8QRk87x5XkC9fPNvaE9BNRQQGjyf9DfBM3TvksiGyRUol0NzcjF/+8pe48sorMWPGDMQnFofDYezdu3fCF33++eexceNGqKqKb3zjG7j66qsnfK5iYntrB0SBh84M4WZk7xoW7tyZwy0XTSF6qi8MWdEgCBxmxdwmqcothCIKBgMyGDOsR7Pkselm4EeUeBAELiH2HhgO75zo5uJIP3vjkjPx4t4OKJqe1DUlCjzCURX+YBQMAItbvZjvF3jOUgjzz6gY0y2z5sIGPPrSIUSiKjRdh8Dz8HhcWHNhQ8L7Rt47VTNyG0SBM1YdMYteSdO6XthQi2saz7YUL8cZiieT6KCJuHfIZUNkg5RK4J577sHAwAAaGhrwm9/8xqoZrygKvva1r034gl1dXbj33nvR0tICl8uFtWvXYvHixZg3b96Ez1ks9AxGUFHqQv9Q1LJWGQNUffSegaox+IPDoZ6HOgZwqGMAZ9QZwiXeHaCqGvqHZAAM1eUey8dd7hURUfSkVTy3t3bggxMDEPjhpHEGQBImtrnY1t6TIID9QRkne0NYdFYt3jjUPSrXgItF9viDsmE5s1jTlrj38dzwHobAc+kJ40sXjGsdj3SlMMaswmmCEDtmgEcSU0YyJbv2ZATyyDFFZLVg3Dt2l5AgckvKshHf+c53cN555+HIkSNYsmQJzj//fCxZsgQXXXQRPvWpT034gnv27MF5552HyspK+Hw+/Mu//Au2b9+e9udfffXPAABVVdHUtB4vv7wTABCJRNDUtB5//vPLAIBgMICmpvV47bX/A8DIeG5qWo/XX98DAOjr60NT03rs2/cGAKC7+zSamtbjrbfeBAB0dp5EU9N6tLW9DQA4caIDTU3r8d57BwAAR4/+HU1N6/H++4cAAO3tR9DUtB7t7UcAAO+/fwhNTetx9OjfAQDvvXcAx1sfhRLqRXW5B8rgMZx68zHo0QHMrC2B5j+Gpqb12PryOxAEHt0nDqL7rcehyQFjfj2H0fv24zjR2Y0ndx7GgbdbEXj3SZRKGgYDCiLd76K/7Qm4RQZdZzj9931o2/UQPC4OKgM62/+K4HtPoumqc7GwoRZV6mF0v/0ENJ2BMYbB42/g9NtPwusWUFvhwdatW/Df//19675v2fI07rrrDuv4mWc2Y8OGO63jB37xPzj+5tOWL7y//RUc378Fx7uDuPHLnwY79Rf4P3jRELQ8h/CxXej82/NQVA06AwaP/BGDR/4IMVbLePCDbRg4stNYuegMQ0dexLY/PGld76c/vQePP/6oddzc/CNs3vw4FjbUoumqc8GdeBHHDuzCEzsOo3nzfnz3lluwZcvTAAyBrbS3YKjjdfQPRQFw6PvbZgRP7jOqpzKgv+0JfKbmpCXsmprWY+fO7Tl79p745V34/MeM0NuB3tM48pdfYckcBQsbarPy7DU1rceJE0apjra2t9HUtB6dnScBAG+99Saamtaju/s0AGDfvjfQ1LQefX19aGvvwcbf/AF/3fZzSFwUA0EZv/j1s7jhxhsQDBrP5p///DKamtYjEjGMh5df3ommpvVQVaO0+M6d29HUtN76rrZtewG33vod6/iFF7biv/7re9bx1q1bcPPNN6f97G3e/Diam39kHT/++KP46U/vsY43bXoY99//E+v44Yc34sEH77OOH3ro53jooZ9bxw8+eB8efnijdXz//T/Bpk0PW8epnj2TDRvuxDPPbLaO77rrDuvZA4D//u/vY+vWLdbxf/3X9/DCC1ut41tv/Q6effZZ63iyz178vUpGypXAI488EhvQrdiwYcOYJ8mE06dPo66uzjqeNm0a2tra0v58SYkbdXVlUFUVkiSgrMyDuroyRCISJElAebkXdXVl8Hq5hGNJ0iBJAioqjGOOiyYca1ow4TgaLYEkCais9KGurgzBYOLx4KAPkiSgqso47utLPD59OvG4stKHuiofwACXxKOu0oOgxKOy1I1vrfo0NP9RSJKAgaCCmhoRmpY8hl9nQJ8/ip3HTqBMFPD/XP8FfPv+vfBGPTh1moMk8ugdUmLx+YBLEuDhBQyIHPoDMupiHbvOnluLfdU+cPywpS2JRjezryxbgA/ePgmXS7DeX1rqhsslWsclJW643cPHgZhryWxMYtYe6uoPY+l5H8fBfXMwODiI739/FfYd7MKGe/6CYDAMj1tCKKxYXc4EngeL8wu5JQFV5W6c6OBx8Fg/jvWEsOjseng8kvUsAEg43newCx1dAZRWl2NmqQuBiILj3QHMD8o41hNCy+4j+KgnhBln8qiu8KKiVINfEmKrIg6iYNyLs+fWWuePf9Zy9eydc/Z0XLd4MU6cOIE773wF535iRtaePeO4JOG4uto4rqjwJj2uqSnBr19+H5LIQ+A5uEQBkkeCX+TRF5BRW1uG0tJSlJcb76+rK4PH40FZmcc6FkUx4RgAyso8Cc9SaenoYwBpP3slJW54PFLCcSAwfOzzuaAoroRjQWAJx/HX83pdcLtdCcc+nyvps5bs2O0WE45dLhGlpfHHAkpLPSP+nngcP55sPHtjkfcqohs3bkQ0GsX69esBAM888wwOHDiAO+8cW1uZFHsV0fGW1s2b92MgKKOrNzTKj25iZuHynBGjDxgtHE1/8kfdhoUmCDymx0oym01fmq8/P6PxJCNZQ/IX93aAgcV6DbDYCsPw/X/7yoVjlrp+sOUAdGaEWMY8Q1Y464za4U1df1BGKKLEvn8O9VWepO0ezXsY71+PKhrEWO/c+Iqlp/tCqKnwwOeRrPemulf5pFCqaDZt3AOfR7SMCiA/96dQ5m8X+awimvd+AtOnT8e+ffus4+7ubkybNi3fw7CN8XzHyxfPxqMvHUqpAABDAXAwsnFP94eNGkCm5S/ylh+93Dcs2FJtMmbqyzYbkoMZq5JQVMXWV4/CLfFQNUBjOuIXMTyPMePdFzbUonHJbLz4egc0nUESeHjdAvwhBVVlww3qw1HVCjkVYo3tO/vCSTOCU0XamMXu4sNqRYHHYEBOUAJUImEYKiEx9cl7Kenzzz8fe/fuRV9fH8LhMHbs2IEvfvGL+R5GYROL5R8LnkesJDSDzytBEjj0+yPo6ApAVnXoupEVzBizMmqzscm4468nLAUADId+RhUdLonHSC9WqVcas9Q1YNQouuGKT2H+GRUo80mYXu3D7PoyCMLw42luIHMY7mXAcUBE1kadu7bCEytuN4xZ7M4lJj7y5SUSVE1HVNGyfq+mAssXz4ZG92dKk/eVQH19PW6++WZ8/etfh6IoWLNmDRYunFwv2bEopsiGtvYe/M9z7yGiaJBEHmVeASFZg6xoCUlXxuaqEWoqxpKnev1RCLzhz2bMCPXsH4piIBCFN+ayyca8I7I6SgGY+NwiwtFY6ehYCGwwosIljZ9wNXJF8qe3PsLv/vQBdGb0QDaFelwwEzgY8xx57lSJVPVVhnKIt2pFUcDMGt+o7ONCfUbyTSHlIxTTb7mYsKW95GWXXWaVncgluS7rPNZ1M31YzbFGFdVq+D4U1lFV5obX7cXAUBRul2B1/jITocpL3BgYihpC1wypjNtYZXEum22tHVixOHVGcDrj9rhEhKLqKAVgFHiT4RL5mH/feIfOgMGAnJAHkc79e/mvx1HiNZSKohmFiAwXWGJIq5CkLHUqwQUgqXJInn1MmBRCPoJdv2UnMKV7DNvRHCN5T98Do3r6JitCJgg8JFGIFV0z3Dk9AxFIIo9pVV6subABW145gpO9IYgCh4pSN3ieM5KdMGyZjywZYRJVdGx99Si6+kL4v1Ymhvmm+yNb9o9nYOurR0ftWXhdAhSVobxEQn8gsYSCqrGM3AfbW40EM6MaqA6XyEMUOISjGhjToTHOqtbpcQtpnzubVi1ZpfmFGt3kjimtBOxojrG9tQOKqls9fYVYeYi9751GiUeEqulJ6/SYYy0vMdwSZgQUg+HPPtEdROu7p3Dn/zrPis7pHYzA4xJRVebGUEixBO94sVN73zuNDz7ag68tm58gGFP9yMy/W+WVP1aBQ8cHrfOZ2b/VZRJUBlSXueEPKVBVHQLPYXqNUa003WJ1J3uCsWqghu9f1RgUVYNbElBd7kFXXwgAh+nVyaODxlNokxUaZJXmH2p0kzumtBKwI7JhZE9fwKxIA4QiKgSBT1qnxxyr1y0aG75JwmD3vncawAEcOelHeakLtaLh4w6FFQgCB01hGF2gIfU47/tdG1wij0vPm53yR3ayJ2gJPIDhw5N+yKoOnjOyfsGYEV/PcVi0YBpeO3AKgsCjvspruVsWLZiWkdBUNcP3k1DOghlrnTu/tTjpfOIt81BEhUvirYifbFuNZJXmH4pSyh1TutG8HZENtRUeKDH3jIm5qWsWJwMS6/TEj9UflKGoKfw5MBTByEbzPq+EmjI3ZlR7x2sWNgpZNVxEA0MRq4NXOKqiqy+Ej7oDCIRVq4aO6eYBDF8/B6CmwosZtSXweUQc6hhI2jT+UMfAqDGPFTEkCJxVV4kxZlyTM15PxsjG6FFFxVBYidVGMsim1dgzGBkVZURWafZoa+/BbRtfQ9PGPWjevB9t7T0UpZRDprQSWNhQm1Qo5dJaW754dix00zg2hSUwnEULjK7Ts7ChFv/0qekIhBPLKCdDVbWEY5fII6LoWHPRPFSXu1FV5h61cTvuOXWjJ/CxU0M43R82FBFnFJ0bCivoH4omrG5iM7IUhykEzbINX1s2HwDwxI7DaP9ocFQG9FhCc1ZtCSpKXbHS10bSW5lXwqza5BVt4y1zjuMgiQLAkFCSOptWY6oQVLJKJ4+p0Pv94VGrxon8ltvae9C8eX+CQiESmdLuICD/kQ0LG2rReN5svLi3A5puhDd6XCICYRVgRralmRVr1ukBjId1x19PQNd1SCI/5mrAH1RQ4h1OpDIFkCkMfR7B6CswFB3zPGPBGENNuQdDIQWKqkPVGSRhuFmLuapRY8I9XgiO9Jn7gzL6/BFwHGeVsx5LaC5fPBtP7TqCqjIhrVr5I11Z5SUu9A4aiiy+cmq2rEaq5Z87zGfY4xKhxMJ5TVebWfcqXWjvJj2mvBLIFplEg5gN2uPfX1Xqwr7DPQlZsZLIY/ni2dbDGlE0CBwXa4SSvA+8zy0gImuIKtooAfTEjsOWMPS6RXjdIkIRBT0DkbR2CuI3lRkAn0eCrGiIyMbKQ9UYOI5Z4aiabiS1jVyaj/SZV5S60OePYmAoCo9LGFdoLmyoRUWFD0/vOJTW/R7pL/a6RZSXuBGVtYTKqSPbWU40uqeQYuenGtncAKa9m/QgJZAGE7Eokq1A6uNq7gg8hws/a/SQbd68PxYeaiR+8RwA3vh3QmN1zhDE1eUeVJYMJzctmF2J7a0dGAxE4Q/KqCh1WZuigsDjrNmVWL54thVemqr0UkJvYgYc7xqy3Fnm6oUxoNwngec5BCMq3JIwqtHLyB+yzyMZlUoDSkqhPJJFZ9fjzFpfyr/Hk8wyFwUO31j5iZTNfSZrIRZC7PxUxFTorixsAFNEUXqQEkiDbFgUbe09eO3AqYSontcOnMKcGeXD4aE+CX1D0YQYe8sFzwwBLKtG39pgWMaZ08uxYHalFZFTWeZGnz+CPn8UjDGIomBZ3PFC67lXP8S21g5EldGuouGOXIkb2pWlLkSiGiKKZvnaF3ysAk1Xf27UOZJFcoiigIZZXquXwRM7DqO2oiMrFnSmlnk2LUTKF8gupkKPyCp4jpuUq40iitKDlEAaZMOiGEvwmA8rYFjdaqwlo9sloNQjYiAQhTbCeg/LOjq6hvD+8QFwMMrNlvsk1FQY2cWDAcUSuiOF0sovzMXKL8xFW3sPfvPH99HrjwIYrk4KJCoDgediPXQTx3Do+CCu//9ewfWXfzrhGql85gtmV+bMR5uJZZ4tCzFZI51HXzqEay5dQIpggpj3bddbJ9HZHZiUYqW9m/QgJZAG2bAoxhI8X1s23+pRy8WargAMPrexOSYKAjhOTyjPzACEYnV6GIb7FVeXuTG9xodQRE1oxZjKYv1//3dtQmloDkCpT0R1uRen+kIx9xQHWU3uQ4oqDA/8vg0rz59jJb6lsswnaoFn29rOloW4ZXc7gmEZHMdbtZyCYRlbdreTEpgECxtqsfS8j0+6lDLt3aQHKYE0SNeiGEtYjSV4FjbUorzEhYisQWcMksijvMQFl8SjbzAKRdMhxDaKR2YEx28gayPKTMSPaywL3FwZAMO1+AFY7qmRrSFHouvAi3sTG9Qns8zjN65NxrPAcxHhkS0L0cxcHpnUZrxOFAK0dzM+UzpPIFukk28wMmHJFFZmXPJ4yS4RWcP0Gh9m1ZWizCdhMCjjVG8IUUUDwKzm9AkKAMNx+6YLKb7MxB2PtFqKKd1krfhxet0iyryS1fVrLHSmj1kuGphYfH0mY0+X7OWPcAm5H4C5h5NplgZB2AetBNJkPItiPFfHeEtTc6VgunUsMcIB0AEtLl403l9vuo90nVkKgueGG848ufMworIGl2TkDaiqDlHkUeYVk1rgI8c5vdqH5Ytno/XdU7GyFaMxaweN51OfiAWeqwiPbFiI9VUedPaFoWO4ZhNjwPRq2ngkigdSAlkiHWE1luAxBeRgQIaZkmUmlnEcB4k3/q9oRjIZxwGaBpT6JPiDUQiCUWiN5wyBDBhlFwSBR0SWrf0GPlbQrn9Ixoxqb9KxJBvnwoZa9A+9mVA4zoTjjDDQkRZ9MvfY1ZfMz8hHW8gRHmsumodHXzwYq3Zq9Gn2uAWsuWie3UMjiLQhJZAlJiusTEH4YMvfwGL7Ajoz/O1czB10Rl0JooqGyhIXmq461xKygZAMjuPAgVmN3s2GMy6Rj/n0GQA+bg9Bxyhfxjg0Xf05tLX34Ikdhy3lJvJAWYkbosAlWPSpfPlXXzI/YcN6PAo5wmNhQy2uaTybNh6JooaUQJbIhrBa2FCLhlkVljL5qDtouX7EWMGy+NWFabGbAtdsOMPiGs6YFT8ry4wSEKpmlLIo83msTOBMWNhQi+bra8eN2MlWLP7Chloc7fSPamxfKIJ2PLdSseQRFMs4iexDSiBLZCscLV6ZiAIXK+QGq2l8stWFeY1kDWc0TUd9tQ+qzlBfPZyBa64oxmIswTCe8MtmLH6qJLvJCKl8CL1iqV2Ty3GScil8SAlkkWxsNsYrk0hUg6YrKPEYdYDGKp8bvyowf3RmOQcgeVvFsVYpkxUM2fLl56L+S76Ec7HUrsnVOItFCTodUgIFiCnQ6+rKsOv1v2dkSY2liDI5z/bWDqgaw1AoarmQvG4xbcGQLV9+LqKD8iWcxxp7IVnIuYrAKhYlmEsK6XtOhW1K4L777oMgCLjxxhvtGkJRkK1kl0zPY7R4VMBxvNXi0R+MjuoLMNb1gMwUT/wPZkZdKZaeMzMn0UH5KiyWauwelzCmhdzW3oNdW9omXTYhk3F29YcRjqoJCr++Knn0WLo4vYBbsayE8p4sNjQ0hNtuuw2bNm3K96WJDDCSzxKzYQHOSkrLNiOT7U52B/BgywEcPeVH74DR9SxbHaXy1RQmVYIgYqG7yRLgUjVVyWUzlAWzKzEYNHpPcAAUVcdgMIoFsysndV6nN9/JRaJjLsi7Eti1axfmzJmDb37zm/m+NJEBgsABnJFroGq60WJSNwRZOgJpvAzqkcT/YCKyFhfpxKHEKyIQVjAwFM1Kd7h8tSpMlZkcUfSU7Snjm6qkIziy0TnrUMcAyn0uiLGwZFHkUe5z4VDHwESmbeH0lpDF0oY07+6gyy+/HADws5/9bEKfr6kpzeJoCp+6ujJbrjtnZgVOdg8lVA/leSP34KldR1BR4cOis+tTfn7Xlja4XQI8LuMRc0kCIrKKXW+dxNLzPj7q/X0BGWVeMZbprMQS2zhouo7aylKUyiqqyr348fX/NOm5La0rQ0WFDy27j+B0XwjTqn1YfeG8MeczmWuNnO+ut06i3x+GJ85NFJFVzKgrRVdfCGVe455JMQEiChz6A/KoZ2HfwS48tesIRJFDRakLgYiS1nczkr6AjJoKD7i4vBHGWNJrZsJk77Ndz362mFFXmvJ7Tmdu+Zp/zpTAtm3bsGHDhoTX5s6di8cee2xS5+3tDUAfp6DZVKGurmzSlRQnytJzZuLJnYfBgYMoAGbpuopSF8ABT+84NGbTl87uAHweMaG9Jc9x6OwOJJ1TdanL8p8rqgaB54wsXMFotTnWZ5Mx3obcmbU+3LxmYcJn8nWvzXuraixh03zpOTOxvbUDA0EZpV7JundRRUNVqWvU+J7ecQjgAIHnoWrM+D+njfvdjCT+3pukumamTPQ+2/nsZ4uxvufx5pbN+fM8N6bxnDMlsGLFCqxYsSJXpyfSIBstFOMzmMtL3PC6RTDGxl3SZrqhGx9NJPAc1JiiHys/IhWFviE33qZ5uk1VsrXxWshZ2cVMsZSyphDRKUq2WijGZzCbpCOQMxUs8T+YUFgBkzX40siPSEYxhCamitbKpKlKtiKnikVYFSPFUMqalMAUpK29B//z3HuIKJphwfsk+DzShAThRK3EiQiW+B/MsZ5Q2o3mR1LsoYnpNlUZ77vJZCVYDMKKyA22KQHKD8gN5gogqqgQOM4qTQ0AXnfy8tFjMRkrcTKCJZNG8yPxSDxO9Yag6SwW6SJBEPgpF5o41ndT6C4xonCglcAUw3SFSKJglZbWGYM/pExYEBaTldjW3gN/2Oj5C8SUoD+KEo+ItRdPvRLPqb6bYnCJEYUBKYEphukKKS9xoc8fgc44KwHICZt921s74POI8LgE+IMyVE2HwHMoL3HZKvzyXT6g2F1iRP4gJTDFMDcLvW4R1eUe+IMyFFWHRxInnWRVDJjCj+M4eN3G480YQyii2jYmO1wzhdyMhygsSAlMMeI3Cz0uATzvhqbpBakAcmEdpyv8zGuf7AkaMfYCh1m1JTmx0O1wzVDYJ5EupASmGMUS7pcr6zgd4WdeW9UYghEFAAeowKm+UE4sdDtcM8XyHBD2Q0pgClIMG7m5so7TEX7mtYdCUXAcD4BB1RgGAzIkkceW3e1ZvX92uWaK4Tkg7IeUAJFXTDfM4eMDkEQB5SVGDgOQPes43a5naqwstlkZlQHQdIaTPUG0tfdkTYCSa4YoZPJeRZRwLvGVRSWRhxrLYQhFFAD527g0SxyLAg8trjQ2xxn/iVku95uqmihZ6UQhQCsBIm/Eu4AqSt3o80fAGOAPGjkM+bKOTcvc6xYRkTXrdaNEHlBeImXdX0+uGaJQISVA5I34DVIzhHUwEIWialY/5HwIyvh9A6NUNgMYIEmClV1cWeLK+TgIohAgJUDkjZEbpF63CJ7nUFniQtNV5+Z1LKZlHh+lRP56womQEiAmTbrx/oW4QUqhlITTISVATIpM4v0LVeCSv55wMqQEiEmRabz/VBW4+a4NRBDZgpQAMSmoUFnhdzIjiLGgPAFiUpgx9/E4rVBZ/GqI4zi4JQFClnMNCCJX0EqAmBSFuNmbb9JdDZHLiChESAkQk6JQN3vzSTq1gZzoMiKlVxyQEiAmzVTd7E2XdFZDTuv05USlV6zkXQm8+eab2LBhAxRFQWVlJX784x9j1qxZ+R4GQWSNdFZDTttAT6X0trxyhFYHBUbelcB3v/td/OIXv8CCBQuwZcsW3HXXXdi4cWO+h0EQWWW81ZDTOn0lU3qqquG0P4ppDLQ6KCDyGh0kyzK+/e1vY8GCBQCAs846C52dnfkcAkHYwvLFs6FpOqKKBsYYooqWcgO9rb0Ht218DU0b96B58360tffYMOLJkSxqzB9UIFIUVcHBMcbY+G/LPrqu4/rrr8enP/1p/Pu//7sdQyCIvLLvYBdadh/B6b4QplX7sPrCeVh0dv2o9/yypQ2iaAjJqKJBVRmuXb1w1HsLmWTzONkTwrQqL0q9kvU+xhgCYRW/+v4lNo7W2eRMCWzbtg0bNmxIeG3u3Ll47LHHIMsybrnlFgwODuKhhx6CJEkpzjKa3t4AdN0WvZV36urK0N09ZPcwbMGpc2/evB8DQRmlXgn+oAx/UIaianBLIv7vlZ8oKrfJyOigQFiBqrMEl1hU0ZIWEHTq92+SzfnzPIeamtKUf8/ZnsCKFSuwYsWKUa8Hg0Fcf/31qKysxMaNGzNSAAQx1TF96cGIij5/BAAHnuMQUbSi85+P3CcxI4bSzSmhENP8YMvG8Jlnnokf/vCH4HlKWCaKj1wKJ3MDeTAgw1AAgM4ASeQt/3mxCsJMckr2HeyyNcTUSQoor0rgvffew65duzBv3jxcccUVAIBp06bh4YcfzucwCGLC5Dr+3cw5kBXNUgAMQLlPmhIhpenmlLTsPmJbXoXTchzyqgQ+8YlP4P3338/nJQkiq+Q66cs8x8MvHEQ4qkISeZT7JPg8EqKKNmVDSkfS1ReCR0r0FORLCTotsY/8MQSRAT2DEbjE3AqnhQ21+M+rP4eacjeqytzwusUxQ0qnIvXVPtsKE+bjOy4kSAkQRAbkq2rqorPrcfUl81FZ4kIooqKyxIWrL5k/JS3RZKy+cF7aeRXZxmmVcal2EEFkQD6rpjq5JpOpBO3YnHVaZVxSAgSRAVQ1NX/YpQSd9h2TEiCIDHGyhe4UnPQd054AQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTiYvCuBffv2YfXq1bjssstw3XXXYXBwMN9DIAiCIGLkXQnceuutaG5uxvPPP4958+bhkUceyfcQCIIgiBh57yz20ksvQZIkKIqCrq4unHXWWfkeAkEQBNraexzTQnIsOMYYy/dF33//fXzzm9+EKIp4+umnMWPGjHwPgSAIB7PvYBd+2dIGUeTglgREFQ2qynDt6oVYdHa93cPLKzlTAtu2bcOGDRsSXps7dy4ee+wx6/ipp57C1q1b8dRTT6V93t7eAHQ973rLFurqytDdPWT3MGzByXMHaP65nn/z5v0YCMpwS4L1WlTRUFniQtNV5+bsuumSzfnzPIeamtKUf8+ZO2jFihVYsWJFwmvRaBR/+tOf8M///M8AgJUrV+Kee+7J1RAIwrGQq2NsegYj8HkSxZ9L5NEzGLFpRPaR141hURTxwx/+EAcOHABgrBbOPdd+rUsQU4m29h48ufMwBoIyfB4RA0EZT+48jLb2HruHVjDUVnggq3rCa7Kqo7bCY9OI7COvG8OCIODee+/FHXfcAU3TUF9fjx/96Ef5HAJBTHm2t3ZAEHjL1eGWBERjr9NqwGD54tl4cudhRGGsAGRVh6bpWL54tt1Dyzt5jw5atGgRWlpa8n1ZgnAM5OoYH1MZksvMBiVAEERuqa3wjNr0dKqrYywWNtQ6UuiPhMpGEMQUY/ni2dA0HVFFA2MMUUVzrKuDGB9aCRDEFINcHUQmkBIgiCkIuTqIdCF3EEEQhIMhJUAQBOFgSAkQBEE4GFICBEEQDqboNoZ5nrN7CHnFafONx8lzB2j+NP/szH+889hSSpogCIIoDMgdRBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIFRCAQwJe+9CWcOHECALBnzx5cdtllWLZsGe69916bR5dbfv7zn6OxsRGNjY1obm4G4Jz533///bj00kvR2NiITZs2AXDO3OO55557cMsttwBw1vzXrVuHxsZGrFq1CqtWrcI777yT3/kzoiB4++232Ze+9CX2yU9+kh0/fpyFw2F2wQUXsI6ODqYoCrvmmmvY7t277R5mTnjttdfYV77yFRaNRpksy+zrX/86e/755x0x/9bWVrZ27VqmKAoLh8PsoosuYgcPHnTE3OPZs2cPW7x4Mfve977nqGdf13X2hS98gSmKYr2W7/nTSqBAeOaZZ/CDH/wA06ZNAwC0tbXhzDPPxMc+9jGIoojLLrsM27dvt3mUuaGurg633HILXC4XJElCQ0MDjh496oj5f/7zn8fjjz8OURTR29sLTdPg9/sdMXeTgYEB3HvvvbjuuusAOOvZ//DDDwEA11xzDVauXIknnngi7/MnJVAg/OhHP8KiRYus49OnT6Ours46njZtGrq6uuwYWs75h3/4B3z2s58FABw9ehTbtm0Dx3GOmb8kSXjggQfQ2NiIJUuWOOq7B4A77rgDN998M8rLywE469n3+/1YsmQJHnzwQTz22GN46qmncPLkybzOn5RAgaLrOjhuuA44YyzheCrywQcf4JprrkFTUxM+9rGPOWr+N910E/bu3YvOzk4cPXrUMXP/3e9+hxkzZmDJkiXWa0569s855xw0NzejrKwM1dXVWLNmDR544IG8zr/omso4henTp6O7u9s67u7utlxFU5E333wTN910E2677TY0NjbijTfecMT829vbIcsyzj77bHi9Xixbtgzbt2+HIAjWe6bq3AHgpZdeQnd3N1atWoXBwUGEQiF89NFHjpn/vn37oCiKpQQZY5g1a1Zen31aCRQon/nMZ/D3v/8dx44dg6ZpeOGFF/DFL37R7mHlhM7OTtxwww34yU9+gsbGRgDOmf+JEydw++23Q5ZlyLKMXbt2Ye3atY6YOwBs2rQJL7zwAv7whz/gpptuwsUXX4xf/epXjpn/0NAQmpubEY1GEQgE8Oyzz+I//uM/8jp/WgkUKG63G3fffTduvPFGRKNRXHDBBVi+fLndw8oJjzzyCKLRKO6++27rtbVr1zpi/hdccAHa2tpw+eWXQxAELFu2DI2Njaiurp7yc0+Fk579iy66CO+88w4uv/xy6LqOq666Cuecc05e50/tJQmCIBwMuYMIgiAcDCkBgiAIB0NKgCAIwsGQEiAIgnAwpAQIgiAcDCkBwpFcc8016Ovrm/R7Wltb8aUvfWnc65111llJz7Vr1y7cddddAIxqktu3b8eJEydwzjnnjHtOgsgGlCdAOJLXXnstK++ZLEuXLsXSpUtzfh2CSAWtBAjHceuttwIAvvGNb+CNN97AunXrcNlll2HlypXYunXrqPd0dnbilVdewdq1a7F69WpceOGFuO+++zK+7n333YcrrrgCq1atwiuvvAIAaGlpwbXXXpuVeRHERKCVAOE4NmzYgJaWFvz617/Gv/7rv6KpqQnLli1DV1cXrrzySpx55pkJ76mqqkJTUxPuvvtuzJkzB11dXbjooovw9a9/PaPrnnHGGbjzzjtx+PBhrFu3Dtu2bcvRDAkifUgJEI6lvb0d0WgUy5YtAwDU19dj2bJl+Mtf/pLgk+c4Dg899BB2796NF154Ae3t7WCMIRwOZ3S9r371qwCA+fPno6GhAW+99Vb2JkMQE4TcQYRj4ThuVIlexhhUVU14LRQK4YorrsC7776LT3ziE2hqaoIoisi04grPD//cdF2HKJINRtgPKQHCkQiCgFmzZkEURezYsQMA0NXVhT/+8Y84//zzrfeoqopjx44hEAhg/fr1uPjii9Ha2gpZlqHrekbXfPbZZwEA7777Ljo6OvCZz3wmu5MiiAlApgjhSJYvX45/+7d/wy9+8Qvcdddd+NnPfgZN03DDDTfgvPPOs96zbt063H///bjwwguxYsUKuFwuzJ8/H/PmzcOxY8fgcrnSvubx48dx+eWXg+M4/PSnP0VlZWWOZkcQ6UNVRAmCIBwMrQQIIgv86le/wvPPP5/0b9/61rewcuXKPI+IINKDVgIEQRAOhjaGCYIgHAwpAYIgCAdDSoAgCMLBkBIgCIJwMKQECIIgHMz/D7RrkTIqFAnbAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with Modin df\n",
    "sns.residplot(data=modin_tips, x=\"total_bill\", y=\"tip\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='total_bill', ylabel='tip'>"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEKCAYAAAD0Luk/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABA7ElEQVR4nO29eZhU5Zn//T1bbb1vNItBQjOIWUg0TBAnV1wYGbAjKMEJ0ZBMzO991Z+jwcmko8YxE18TtN+8cUkMZoxijBI1pCVuEAhKflGwDaJ2UBBpAw3SNL1X13q25/3j1Dld1V3VXdVdVaeqz/25Li851VXnPM+pU/d9P/dzLxxjjIEgCIJwJLzdAyAIgiDsg5QAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEg7FVCdxzzz245ZZb7BwCQRCEo7FNCezduxfPPvusXZcnCIIgAIh2XHRgYAD33nsvrrvuOhw6dCijz/b3B6Hrzshvq6kpRW9vwO5h2IKT5w7Q/Gn+2Zs/z3OoqipJ+XdblMAdd9yBm2++GZ2dnRl/dqzJTEVqakrtHoJtOHnuAM2f5p+f+eddCfzud7/DjBkzsGTJErS0tGT8+d7egGNWAnV1ZejuHrJ7GLbg5LkDNH+af/bmz/PcmAol70rgpZdeQnd3N1atWoXBwUGEQiH8+Mc/xm233ZbvoRAEQTievCuBTZs2Wf9uaWnBG2+8QQqAIAjCJihPgCAIwsHYsjFssnr1aqxevdrOIRAEQRQ0be092N7agZ7BCGorPFi+eDYWNtRm7fy2KgGCIAgiNW3tPXhy52EIAg+fR8RAUMaTOw8DQNYUAbmDCIIgCpTtrR0QBB5uSQDHcXBLAgSBx/bWjqxdg5QAQRBEgdIzGIFLTBTTLpFHz2Aka9cgJUAQBFGg1FZ4IKt6wmuyqqO2wpO1a5ASIAiCKFCWL54NTdMRVTQwxhBVNGiajuWLZ2ftGrQxTBAEUaCYm78UHUQQBOFQFjbUZlXoj4SUQBbJdTwvQRBEtiElkCXyEc9LEASRbWhjOEvkI56XIAgi25ASyBL5iOclCILINqQEskQ+4nkJgiCyDSmBLJGPeF6CIIhsQxvDWSIf8bwEQRDZhpRAFsl1PC9BEES2IXcQQRCEg6GVADHloSQ+gkgNKQFiSkNJfAQxNuQOIqY0lMRHEGNDSoCY0lASH0GMjS3uoPvvvx9//OMfwXEc1qxZg29+85t2DINwALUVHgwEZbglwXqNkviIfFLoe1J5Xwm88cYbeP311/Hcc8/h97//PX7zm9/gww8/zPcwCIdASXyEnZh7UgNBOWFPqq29x+6hWeRdCXz+85/H448/DlEU0dvbC03T4PP58j0MwiEsbKjF1ZfMR2WJC6GIisoSF66+ZH5BWWLE1KUY9qRscQdJkoQHHngAjz76KJYvX476+vq0P1tTU5rDkRUedXVldg/BNrI196V1ZVh63sezcq584uTvHpga8+8LyCjziuA4znpNFDj0B+Rx55ev+XOMMZaXKyUhHA7juuuuw6WXXoqvfOUraX2mtzcAXbdtyHmlrq4M3d1Ddg/DFpw8d4DmP1Xm37x5/6g9qaiiobLEhaarzk35uWzOn+e5MY3nvLuD2tvbcfDgQQCA1+vFsmXL8P777+d7GARBEDmnGPak8q4ETpw4gdtvvx2yLEOWZezatQuf+9zn8j0MgiCInFMMe1J53xO44IIL0NbWhssvvxyCIGDZsmVobGzM9zAIgiDyQqEXlrRlY/jGG2/EjTfeaMelCYIgiDgoY5ggCMLBkBIgCIJwMFRFlCBySKGXDCAIUgIEkSOojDVRDJA7iCByRDGUDCAIWgkQRI7oGYzA50n8iVEZ60TIXWY/tBIgiBxRW+GBrOoJr1EZ62GKocKmEyAlQBA5ohhKBtgJucsKA3IHEUSOMN0a5O5IDrnLCgNSAgSRQwq9ZICdUNe3woCUAEGANijtYPni2Xhy52FEYawAZFUnd5kNkBIgHMFYQp7i+e2B3GWFASkBYsoznpCP36AEALckIApDOJFAyi3kLrMfig4ipjzjRaH0DEbgEhN/CrRBSTgFWgkQU57xolCcvEFJeyEErQSIKc94SVtOjeenZC0CICVAOIDxhHwxtADMBZSsRQDkDioYki3Ll9aV2T2sKUE6UShO3KCkZC0CICVQEKSKXqmo8OHMWp/dw5sSOFHIj4eT90KIYWxxB/385z9HY2MjGhsb0dzcbMcQCopUy/KW3UfsHhoxhXHqXgiRSN6VwJ49e/Dqq6/i2WefxdatW/Huu+9i586d+R5GQZEqRPF0X8imERFOwKl7IUQieXcH1dXV4ZZbboHL5QIANDQ04OTJk/keRkGRalk+rZpcQVOVQgnNJDcZwTHGmF0XP3r0KL761a/it7/9LebMmWPXMGxn38Eu/LKlDaJouIKiigZVZbh29UIsOrve7uHZzr6DXWjZfQRdfSHUV/uw+sJ5RX1f6PsmCgnblMAHH3yAa6+9FjfeeCOuuOKKtD/X2xuArtumt3JG0uig8z6O7u4hu4dmC3V1ZejuHkrYNI8vMlbMbovmzftHrfyiiobKEhearjoXwPD8nQrNP3vz53kONTWlKf9uS3TQm2++iZtuugm33XYbGhsb7RhCwUHL8uRMxbo+FJpJFBJ5VwKdnZ244YYbcO+992LJkiX5vjxRZExFgUmhmUQhkffooEceeQTRaBR33303Vq1ahVWrVuG3v/1tvodBFAlTsU8vhWYShUTeVwK33347br/99nxflihSMmk8UigRN+NBdfSJQoIyhomCJl2BWWyNYWgPiCgUSAkQGZNvizsdgTkVN5AzpVhWQkRhQUqAyIhCtbin4gZyJhTq90IUPqQEHE6m1mOhWtxOj7gp1O+FKHyon4CDmUhTkUJtxej0iJtC/V6IwodWAg5mItZjoVrcuY64KXR/e6F+L0ThQ0rAwUzEj55JyGa+yVXEjV3+9kwUTyF/L0RhQ0rAwUzEenRijLsd/vZ9B7syUjxO/F6I7EBKwMFM1Hp0Qox7vBU+GIiisswNxCnLXPvbW3YfyVjxOOF7IbIPKQEHQ9Zjcka6f/xBGX3+CDiOg9dt/GRy7W/v6gvBI9FGL5F7SAk4nMlYj4W+WTpRRrp/Kkpd6PNHMTAUhccl5MXfXl/tQ3d/iDZ6iZxDIaLEhDCt5a7+MIZCMj44MYAHWw7guVc/tHtok2ZkuKXPI6GqzAXGkLc2jKsvnOfokFcif9BKgJgQ21s7oGoMQyEZAAeB56HpDC++3oE5M8qxsKG2aFcKyTbMRVFAwyyv1fQl1yw6ux5XXzK/KO8fUVyQEihi7BSyPYMRhCIKAA48Z7zGc4CmM2xv7QCAoi1jkOmGea6+B9roJfIBKYEixe5aMbUVHvQPRSDww24TBkASjM3LiYRVxgvTGXWlWHrOTNuarwPpbZjb/T0QxGQhJVCk2F0rZvni2Wj/yA9NZ+A5QwEwAF63gNoKT0IiWiiiwB9SoKo6egciaGvvGbcUdL8/bKswTdcKt/t7IIjJQkqgSMk02zfbLouFDbVoXDIbL77eAU1nkAQeXrcASeSxfPFsbG/twEBQhqbp6BuKIuYxAschqXAfKUw9kgBVY7YJ03Tvl9OrlxLFDymBIiWTbN9cuSxWfmEu5swoTyksn9x5GIMBOaYAOHAcg9ctYDAg48GWv6FhVoX1/okI01z54jO5X9ms2VMo7jDCWZASKFIy2bzMtssiHeFrHj/Y8jcwxiCJxvUDERUcAMZYgnDNVJjm0hefyf3KVs2eQnOHEc6B8gSKlIUNtbj6kvmoLHGNG7uezTLDmZSfXthQi4ZZFair8qG+2oeIolurAkkU4JYECAKP7a0do0pBR2R1TGEaL6g5jks412TJ5H5l8j2Mxcj5eFxi1uZDEGNh20ogEAhg7dq1eOihh3DGGWfYNYyiJt3Ny2y6LDJdVcRbyqqqAwA4jqG8xA1gWLiOjMgx3SEA0Lx5/6hVRy598Zner2yEcprzCUdV+IMyVI1B4IFQWJnUeQliPGxRAu+88w5uv/12HD161I7LO45slhnOVPjGC/fegQg4DqgscyetwRMvTOvqyrDr9b9bLhKOAz486ccDW9ows8YHj2TMIxdlFewoy1xb4bGyr43kO0DVGHSmJY2mIohsYYs76JlnnsEPfvADTJs2zY7LO45suSwAQ1jJMYveZDzhe7TTj46uABRNh6LqCEWUtEohbG/tgKLq6B2MoHsgAiV23dMDEfjDKkJhJSdlFbJ5v9Jl+eLZCIQUMAZwYNB1ABxQ4hHJJUTkFI4xxuy6+MUXX4zHH3+c3EFFxL6DXfhlSxtE0fDDRxUNqspw7eqFWHR2/aj3/3bHITy98zDAAQIHKBoDY0CJV8LcWRVYfeG8pJ8DgK/9YBuCYQWqlviI8hwws64EoiCgrMSF030hTKv2jXmuYuBrP9iGqKxB1XRIIo/KUjd8HhGBsIpfff8Su4dHTFGKLjqotzcAXbdNb+WVuroydHcP2T2MBM6s9WHt0nmjooPOrPUlHevW3e0AAIEztoQlgYOmMzCd4eY1CwEg6efq6sqgKDpME8XMMzCT0niOQyAk445vLEr4XKHdr0yYUe2z9iIkkYei6ghGVFSVuop6XhOhEJ/9fJLN+fM8h5qa0pR/LzolQNhPJhuhEVmFYBYXisFzxuvjIQocogozQkrjXucwNcsqx+9FGHOnyqFE7iElQEyasfIGPC4RUUWDEKcHdGa8Ph4za0vQ1R9GIKxAUY3wUp6HUbF0CgrH+E30/oCMqlLXqByMYq3MShQupAQIi4kImPgkJ4DhgxMDONQxAIHnMKPGh880VKP10GlourEC0BnAwLDsH8ffBzIt45oKDzRNx2BAhqrpmFbpwaIF07C9tQNP7DicU2GYb6FrrrKSuQOoWF1xUuiK21Yl8PLLL9t5eSKOiQiYtvYe/M9z7yGiaOA5JGzgajpDZ28Q/pCCxQum4Z32PkRkFR6XiGX/eAZWfmHuuGMamTswd2a5Zf3nWhi2tfdgy+52nOwJQhR4lJdItgtdKlZXfBSD4qaVgEMZaZ0EQnJGAsZ8uKOKCoHjEhSA6cPnOB6RqIrjpwOYXV9qXWvOjPK0x5ls/6F58/6cCkNzboMBGRzHQWcM/QEZ1WVuK4vXjh8wFasrPopBcZMScCDJrJPTfSFUl7sTkq/GEjDmwy2JRrVPFrd1aygAw/2jqDpO9oYwjSFrllAmwnAiS3Fzbjpj1qa2zhj8IQX1VV7bhG66mcyF7n5wEsWguEkJFCht7T3YtaUNnd2BrP+QzSQss8a/KBoZuf6gghKvy3rfWBE45sNdXuJCnz8yKoJH4DnoDAAHiFm2hDIRhhNZiptzEwUeqmb0S+BglL2wMyopnUzmybgfnnv1Q+z464mM3XZEarJZsiVXkBIoQMwfstsl5MSP+FFPEKGoUc2T5wBN08FgPJxRRUsqYEZal0zXcao3ZDSV4Y0y0WZMv8Aj9m8jw7ei1JVw/claQumWdZjoUtz84ZoKTmdcbF6crVFJ6XQ8m+icn3v1Qzy35yg4cBB4Izz1uT1HAYAUwSSwowRJppASKEDMH7LHJUKJ1cfJph9R0xjAjCQSIObDZ0Yilz8gJ1iCZsP4eOvyVF8Ig0EZgJG0xRgDx3HwShx8Hgn+oAKAob66BACgjkjuG2kJJXNfLK0rSzn+dNs/TnQpbv5wBYFHVZk7FpXEML3GizUXzbPVtTJejsZE57zjrycsBQAY2d2abrxOSmDiZNKq1C5ICRQgufYjmklYOuOssE2dMeiMoabUA58qwB9U8NxrR7Hv/W4ASLAuw7IGnuPA84bQUDUdAs+hpsKLO7+1OOFa1gYykltCqdwXFRU+nFnrSzmHdBLWJroUTxWVVEg/3FRMdM6TSeojxiYbVWZzCSmBAsT8Ibty5Ec0k7DCURWqpkMUeDBwEDjD3dEf6wbGmOE6YsxQHNXlHnjdIlRVN/oKs2FrnzGGUGRYYMRb9x6JBzgOoYg6yhJK5b5o2X3EKisxUSazFC/0H24qJjrnyST1EcUNfcMFiPlDjsgqeI6btB9xpLtlwexK9PojqCxzW4LidF8I5eUu+EOK1fmLAdZur6oxnO4Pg+cNF5DOACmu8Uq8khpp3cuqDk3V8LVloytxplr1nO4LTWiu8RTDUjzbTHTOy/7xDDy35+iEkvqI4oaUQAFi/mB3vXUyaXRQJiGApkBWVB3hqIa+oSjaT/qxaH4t+gOydQ6RA1QGqKpsbBanqNGn64Ae0wwe3tgPGKmkRlr3ZrbvyL7CQGr3xbTqRFfQRMMei9WinwwTmbPp96foIOdhaynpieD0KqLxVnb8cj9Vvfvmzftxqi+EobBh4XMwrDye43DD6k8lKBYzQUpnbFT55pEIPAee4+BxC6gocQGMIaIYq4GPeoKoKnOD4ziEIgr6hqLW6qKuypcw3lTz+d9rPmvtCZjvUTWGUESBqungOR6NS2bnRUjZEXdvdxVNu3MN7J6/3eSziij1GC4yMu2t2zMYQTiqxcJBOXAcF4vh1xM+YzZSmVbpga4zcFzS0yVQU+lBRYnL6CkQlwwWiWrwx6KHTPdSsr7C8dcd2cAlvi/A9tYOqBrDUEiGpscKyOk6/vDaUax/4C9o3rw/aY/jbJBJT+WpghPn7GTSdgcNDg5CEASUlqbWKIRBLq2o8SKHRl7b4xJiljiDorNY5ypAELhR0UamG6GtvQdbXjmCEz3J/fIcAFE0LPfOniBqKr3QNB1dQ1Goqm4knoUMJRCVtdhnGEpjiWgjI53SCXsMRRQAZjQTg7kYjCpaTuuxFEPaf7Zx4pydzLhK4MMPP8R3v/tdHDx4EBzH4ZxzzkFzczNmzpyZj/EVHbkuGDVWCGC822QoGE0ZUspgFHjzuISkfzeF8nOvfojn9xyDNtL9xgHlPinWZpKDqmpWRBHPGefWGTAQkK2P8DyHQFiBSxLA81xGkU4el4CeQbNJPazNai52rVwKqUJO+8+VsVHIcyayz7hK4NZbb8WVV16JL3/5y2CM4emnn8b3v/99bNq0KR/jKzpybUWNFQJouk0GA1GMtW0i8ADAAeNsB638wlzMmVGOLbvb0RnLDhZ4oCpWSE3TdNRXeXB6IGK5mzRdT7i2wBtJR8bWE4eBoSgqYnXy06GtvcdyLQGJQ+ZglKQAhoVUtgVjJiUqtrxyBF39ERiJcj6subAhZ5ZzLo2NYih1QGSPcZVAOBzG2rVrreN169bhmWeeyemgiplcW1FjhQA+seNwrIm78d74lozx8DyPihIJEUXHeMS7auIFbGXJsCB/YEsbOM4osKOPOCUXM911BkuCp9rEjj//jLpSLD1nJra3dsDnlazaRvFz4XkO5SWGi0lWdXhcAh598SAisgZNZ/AHZTz64kFc03h22oIxWTjtawdOjVuv59EXDyIYUcHFdGtnbxCPvnQI11y6ICeKIJfGRjGUOiCyx7hKYO7cudi/fz/OPfdcAMDhw4epMfwYpGNFTdZajffdDzdW6YDHJaB/SE8QlCMVgChwVvTP9GpvRnNL5bufWVuC0/1h6LHcAoE3lAEDRkUZaTrDb7YfwrrlicJxpGV7snsIDz57AKqmwyUKKC+RUFclwh+UEZU1MABlPhc8LsFqw6jIuiWIzb2DYETFlleOjBp3su8AGN2n4LUDp/BPn5qOQx0DY9bricha7LoxZcg4RKJqRkI5mRJM9dlcGhtOzK9wMuMqgZMnT2LdunU466yzIIoi3nvvPdTV1eGyyy4DADz//PM5H2QxMZ4Vla1lfLLzhMLG5qkp+kcqgGGXjSEcF8yuRPPm/ZP+oa+5sMEaS78/ClXTwfEAS+GT6h2S8bPf/w0lHhEza0ssV5Zp2fb7IxgKDVv9sqKhb0hHdZkb9dU+RBUNIgeU+lwJY3+w5cCwII7NVweLuWjGvnePvnQIUdnIoJZEAeUlLnjdIqIADnUMoOmqc1POv2cwYhTSi4uoMvZG9LSF8sgx9fvDYz4XuXbZODG/wqmMqwT+8z//Mx/jmDKMZ0VteeUIBgMyNJ1BFHmU+6QJNSpJ5g4AjCze/kB0lFuG52DVyBcFI0z0tQOnrNj7/qEI2j/yTyj2Pn7OwbACXWco9UkYCERTbjtoOkuI7InIGqrK3AhFFPhDSsJ7GQBdY/AHFWsvYm1Sl1KskmmcMGYM4LjEQYy8d7rOEAwb4aeSYDTI6fNHUF3uiW1Kjy3Iays88AeN/Arz0jozQlnTFcojx+SRjD4NqZ4LctkQ2SKlEmhvb0dDQwNKSkqS/v2Tn/xkzgZV7KSyotrae3CyNwSe56wSzn1DUVSVujJexqdyB6iqjpu+vNDapFQ1HbzAoSZW9wcwwir9scqYQyEZABeLvWd48fUOzJlRnrEVmGzvIBCSocTcQcn2J8zIHr+sIRhWEYyoMHMXh7uTDZcxUFTN2otINr76ah86e4MJhfHiq5mmunfGxrNxRQbzehz8QRk87x5XkC9fPNvaE9BNRQQGjyf9DfBM3TvksiGyRUol0NzcjF/+8pe48sorMWPGDMQnFofDYezdu3fCF33++eexceNGqKqKb3zjG7j66qsnfK5iYntrB0SBh84M4WZk7xoW7tyZwy0XTSF6qi8MWdEgCBxmxdwmqcothCIKBgMyGDOsR7Pkselm4EeUeBAELiH2HhgO75zo5uJIP3vjkjPx4t4OKJqe1DUlCjzCURX+YBQMAItbvZjvF3jOUgjzz6gY0y2z5sIGPPrSIUSiKjRdh8Dz8HhcWHNhQ8L7Rt47VTNyG0SBM1YdMYteSdO6XthQi2saz7YUL8cZiieT6KCJuHfIZUNkg5RK4J577sHAwAAaGhrwm9/8xqoZrygKvva1r034gl1dXbj33nvR0tICl8uFtWvXYvHixZg3b96Ez1ks9AxGUFHqQv9Q1LJWGQNUffSegaox+IPDoZ6HOgZwqGMAZ9QZwiXeHaCqGvqHZAAM1eUey8dd7hURUfSkVTy3t3bggxMDEPjhpHEGQBImtrnY1t6TIID9QRkne0NYdFYt3jjUPSrXgItF9viDsmE5s1jTlrj38dzwHobAc+kJ40sXjGsdj3SlMMaswmmCEDtmgEcSU0YyJbv2ZATyyDFFZLVg3Dt2l5AgckvKshHf+c53cN555+HIkSNYsmQJzj//fCxZsgQXXXQRPvWpT034gnv27MF5552HyspK+Hw+/Mu//Au2b9+e9udfffXPAABVVdHUtB4vv7wTABCJRNDUtB5//vPLAIBgMICmpvV47bX/A8DIeG5qWo/XX98DAOjr60NT03rs2/cGAKC7+zSamtbjrbfeBAB0dp5EU9N6tLW9DQA4caIDTU3r8d57BwAAR4/+HU1N6/H++4cAAO3tR9DUtB7t7UcAAO+/fwhNTetx9OjfAQDvvXcAx1sfhRLqRXW5B8rgMZx68zHo0QHMrC2B5j+Gpqb12PryOxAEHt0nDqL7rcehyQFjfj2H0fv24zjR2Y0ndx7GgbdbEXj3SZRKGgYDCiLd76K/7Qm4RQZdZzj9931o2/UQPC4OKgM62/+K4HtPoumqc7GwoRZV6mF0v/0ENJ2BMYbB42/g9NtPwusWUFvhwdatW/Df//19675v2fI07rrrDuv4mWc2Y8OGO63jB37xPzj+5tOWL7y//RUc378Fx7uDuPHLnwY79Rf4P3jRELQ8h/CxXej82/NQVA06AwaP/BGDR/4IMVbLePCDbRg4stNYuegMQ0dexLY/PGld76c/vQePP/6oddzc/CNs3vw4FjbUoumqc8GdeBHHDuzCEzsOo3nzfnz3lluwZcvTAAyBrbS3YKjjdfQPRQFw6PvbZgRP7jOqpzKgv+0JfKbmpCXsmprWY+fO7Tl79p745V34/MeM0NuB3tM48pdfYckcBQsbarPy7DU1rceJE0apjra2t9HUtB6dnScBAG+99Saamtaju/s0AGDfvjfQ1LQefX19aGvvwcbf/AF/3fZzSFwUA0EZv/j1s7jhxhsQDBrP5p///DKamtYjEjGMh5df3ommpvVQVaO0+M6d29HUtN76rrZtewG33vod6/iFF7biv/7re9bx1q1bcPPNN6f97G3e/Diam39kHT/++KP46U/vsY43bXoY99//E+v44Yc34sEH77OOH3ro53jooZ9bxw8+eB8efnijdXz//T/Bpk0PW8epnj2TDRvuxDPPbLaO77rrDuvZA4D//u/vY+vWLdbxf/3X9/DCC1ut41tv/Q6effZZ63iyz178vUpGypXAI488EhvQrdiwYcOYJ8mE06dPo66uzjqeNm0a2tra0v58SYkbdXVlUFUVkiSgrMyDuroyRCISJElAebkXdXVl8Hq5hGNJ0iBJAioqjGOOiyYca1ow4TgaLYEkCais9KGurgzBYOLx4KAPkiSgqso47utLPD59OvG4stKHuiofwACXxKOu0oOgxKOy1I1vrfo0NP9RSJKAgaCCmhoRmpY8hl9nQJ8/ip3HTqBMFPD/XP8FfPv+vfBGPTh1moMk8ugdUmLx+YBLEuDhBQyIHPoDMupiHbvOnluLfdU+cPywpS2JRjezryxbgA/ePgmXS7DeX1rqhsslWsclJW643cPHgZhryWxMYtYe6uoPY+l5H8fBfXMwODiI739/FfYd7MKGe/6CYDAMj1tCKKxYXc4EngeL8wu5JQFV5W6c6OBx8Fg/jvWEsOjseng8kvUsAEg43newCx1dAZRWl2NmqQuBiILj3QHMD8o41hNCy+4j+KgnhBln8qiu8KKiVINfEmKrIg6iYNyLs+fWWuePf9Zy9eydc/Z0XLd4MU6cOIE773wF535iRtaePeO4JOG4uto4rqjwJj2uqSnBr19+H5LIQ+A5uEQBkkeCX+TRF5BRW1uG0tJSlJcb76+rK4PH40FZmcc6FkUx4RgAyso8Cc9SaenoYwBpP3slJW54PFLCcSAwfOzzuaAoroRjQWAJx/HX83pdcLtdCcc+nyvps5bs2O0WE45dLhGlpfHHAkpLPSP+nngcP55sPHtjkfcqohs3bkQ0GsX69esBAM888wwOHDiAO+8cW1uZFHsV0fGW1s2b92MgKKOrNzTKj25iZuHynBGjDxgtHE1/8kfdhoUmCDymx0oym01fmq8/P6PxJCNZQ/IX93aAgcV6DbDYCsPw/X/7yoVjlrp+sOUAdGaEWMY8Q1Y464za4U1df1BGKKLEvn8O9VWepO0ezXsY71+PKhrEWO/c+Iqlp/tCqKnwwOeRrPemulf5pFCqaDZt3AOfR7SMCiA/96dQ5m8X+awimvd+AtOnT8e+ffus4+7ubkybNi3fw7CN8XzHyxfPxqMvHUqpAABDAXAwsnFP94eNGkCm5S/ylh+93Dcs2FJtMmbqyzYbkoMZq5JQVMXWV4/CLfFQNUBjOuIXMTyPMePdFzbUonHJbLz4egc0nUESeHjdAvwhBVVlww3qw1HVCjkVYo3tO/vCSTOCU0XamMXu4sNqRYHHYEBOUAJUImEYKiEx9cl7Kenzzz8fe/fuRV9fH8LhMHbs2IEvfvGL+R5GYROL5R8LnkesJDSDzytBEjj0+yPo6ApAVnXoupEVzBizMmqzscm4468nLAUADId+RhUdLonHSC9WqVcas9Q1YNQouuGKT2H+GRUo80mYXu3D7PoyCMLw42luIHMY7mXAcUBE1kadu7bCEytuN4xZ7M4lJj7y5SUSVE1HVNGyfq+mAssXz4ZG92dKk/eVQH19PW6++WZ8/etfh6IoWLNmDRYunFwv2bEopsiGtvYe/M9z7yGiaJBEHmVeASFZg6xoCUlXxuaqEWoqxpKnev1RCLzhz2bMCPXsH4piIBCFN+ayyca8I7I6SgGY+NwiwtFY6ehYCGwwosIljZ9wNXJF8qe3PsLv/vQBdGb0QDaFelwwEzgY8xx57lSJVPVVhnKIt2pFUcDMGt+o7ONCfUbyTSHlIxTTb7mYsKW95GWXXWaVncgluS7rPNZ1M31YzbFGFdVq+D4U1lFV5obX7cXAUBRul2B1/jITocpL3BgYihpC1wypjNtYZXEum22tHVixOHVGcDrj9rhEhKLqKAVgFHiT4RL5mH/feIfOgMGAnJAHkc79e/mvx1HiNZSKohmFiAwXWGJIq5CkLHUqwQUgqXJInn1MmBRCPoJdv2UnMKV7DNvRHCN5T98Do3r6JitCJgg8JFGIFV0z3Dk9AxFIIo9pVV6subABW145gpO9IYgCh4pSN3ieM5KdMGyZjywZYRJVdGx99Si6+kL4v1Ymhvmm+yNb9o9nYOurR0ftWXhdAhSVobxEQn8gsYSCqrGM3AfbW40EM6MaqA6XyEMUOISjGhjToTHOqtbpcQtpnzubVi1ZpfmFGt3kjimtBOxojrG9tQOKqls9fYVYeYi9751GiUeEqulJ6/SYYy0vMdwSZgQUg+HPPtEdROu7p3Dn/zrPis7pHYzA4xJRVebGUEixBO94sVN73zuNDz7ag68tm58gGFP9yMy/W+WVP1aBQ8cHrfOZ2b/VZRJUBlSXueEPKVBVHQLPYXqNUa003WJ1J3uCsWqghu9f1RgUVYNbElBd7kFXXwgAh+nVyaODxlNokxUaZJXmH2p0kzumtBKwI7JhZE9fwKxIA4QiKgSBT1qnxxyr1y0aG75JwmD3vncawAEcOelHeakLtaLh4w6FFQgCB01hGF2gIfU47/tdG1wij0vPm53yR3ayJ2gJPIDhw5N+yKoOnjOyfsGYEV/PcVi0YBpeO3AKgsCjvspruVsWLZiWkdBUNcP3k1DOghlrnTu/tTjpfOIt81BEhUvirYifbFuNZJXmH4pSyh1TutG8HZENtRUeKDH3jIm5qWsWJwMS6/TEj9UflKGoKfw5MBTByEbzPq+EmjI3ZlR7x2sWNgpZNVxEA0MRq4NXOKqiqy+Ej7oDCIRVq4aO6eYBDF8/B6CmwosZtSXweUQc6hhI2jT+UMfAqDGPFTEkCJxVV4kxZlyTM15PxsjG6FFFxVBYidVGMsim1dgzGBkVZURWafZoa+/BbRtfQ9PGPWjevB9t7T0UpZRDprQSWNhQm1Qo5dJaW754dix00zg2hSUwnEULjK7Ts7ChFv/0qekIhBPLKCdDVbWEY5fII6LoWHPRPFSXu1FV5h61cTvuOXWjJ/CxU0M43R82FBFnFJ0bCivoH4omrG5iM7IUhykEzbINX1s2HwDwxI7DaP9ocFQG9FhCc1ZtCSpKXbHS10bSW5lXwqza5BVt4y1zjuMgiQLAkFCSOptWY6oQVLJKJ4+p0Pv94VGrxon8ltvae9C8eX+CQiESmdLuICD/kQ0LG2rReN5svLi3A5puhDd6XCICYRVgRralmRVr1ukBjId1x19PQNd1SCI/5mrAH1RQ4h1OpDIFkCkMfR7B6CswFB3zPGPBGENNuQdDIQWKqkPVGSRhuFmLuapRY8I9XgiO9Jn7gzL6/BFwHGeVsx5LaC5fPBtP7TqCqjIhrVr5I11Z5SUu9A4aiiy+cmq2rEaq5Z87zGfY4xKhxMJ5TVebWfcqXWjvJj2mvBLIFplEg5gN2uPfX1Xqwr7DPQlZsZLIY/ni2dbDGlE0CBwXa4SSvA+8zy0gImuIKtooAfTEjsOWMPS6RXjdIkIRBT0DkbR2CuI3lRkAn0eCrGiIyMbKQ9UYOI5Z4aiabiS1jVyaj/SZV5S60OePYmAoCo9LGFdoLmyoRUWFD0/vOJTW/R7pL/a6RZSXuBGVtYTKqSPbWU40uqeQYuenGtncAKa9m/QgJZAGE7Eokq1A6uNq7gg8hws/a/SQbd68PxYeaiR+8RwA3vh3QmN1zhDE1eUeVJYMJzctmF2J7a0dGAxE4Q/KqCh1WZuigsDjrNmVWL54thVemqr0UkJvYgYc7xqy3Fnm6oUxoNwngec5BCMq3JIwqtHLyB+yzyMZlUoDSkqhPJJFZ9fjzFpfyr/Hk8wyFwUO31j5iZTNfSZrIRZC7PxUxFTorixsAFNEUXqQEkiDbFgUbe09eO3AqYSontcOnMKcGeXD4aE+CX1D0YQYe8sFzwwBLKtG39pgWMaZ08uxYHalFZFTWeZGnz+CPn8UjDGIomBZ3PFC67lXP8S21g5EldGuouGOXIkb2pWlLkSiGiKKZvnaF3ysAk1Xf27UOZJFcoiigIZZXquXwRM7DqO2oiMrFnSmlnk2LUTKF8gupkKPyCp4jpuUq40iitKDlEAaZMOiGEvwmA8rYFjdaqwlo9sloNQjYiAQhTbCeg/LOjq6hvD+8QFwMMrNlvsk1FQY2cWDAcUSuiOF0sovzMXKL8xFW3sPfvPH99HrjwIYrk4KJCoDgediPXQTx3Do+CCu//9ewfWXfzrhGql85gtmV+bMR5uJZZ4tCzFZI51HXzqEay5dQIpggpj3bddbJ9HZHZiUYqW9m/QgJZAG2bAoxhI8X1s23+pRy8WargAMPrexOSYKAjhOTyjPzACEYnV6GIb7FVeXuTG9xodQRE1oxZjKYv1//3dtQmloDkCpT0R1uRen+kIx9xQHWU3uQ4oqDA/8vg0rz59jJb6lsswnaoFn29rOloW4ZXc7gmEZHMdbtZyCYRlbdreTEpgECxtqsfS8j0+6lDLt3aQHKYE0SNeiGEtYjSV4FjbUorzEhYisQWcMksijvMQFl8SjbzAKRdMhxDaKR2YEx28gayPKTMSPaywL3FwZAMO1+AFY7qmRrSFHouvAi3sTG9Qns8zjN65NxrPAcxHhkS0L0cxcHpnUZrxOFAK0dzM+UzpPIFukk28wMmHJFFZmXPJ4yS4RWcP0Gh9m1ZWizCdhMCjjVG8IUUUDwKzm9AkKAMNx+6YLKb7MxB2PtFqKKd1krfhxet0iyryS1fVrLHSmj1kuGphYfH0mY0+X7OWPcAm5H4C5h5NplgZB2AetBNJkPItiPFfHeEtTc6VgunUsMcIB0AEtLl403l9vuo90nVkKgueGG848ufMworIGl2TkDaiqDlHkUeYVk1rgI8c5vdqH5Ytno/XdU7GyFaMxaweN51OfiAWeqwiPbFiI9VUedPaFoWO4ZhNjwPRq2ngkigdSAlkiHWE1luAxBeRgQIaZkmUmlnEcB4k3/q9oRjIZxwGaBpT6JPiDUQiCUWiN5wyBDBhlFwSBR0SWrf0GPlbQrn9Ixoxqb9KxJBvnwoZa9A+9mVA4zoTjjDDQkRZ9MvfY1ZfMz8hHW8gRHmsumodHXzwYq3Zq9Gn2uAWsuWie3UMjiLQhJZAlJiusTEH4YMvfwGL7Ajoz/O1czB10Rl0JooqGyhIXmq461xKygZAMjuPAgVmN3s2GMy6Rj/n0GQA+bg9Bxyhfxjg0Xf05tLX34Ikdhy3lJvJAWYkbosAlWPSpfPlXXzI/YcN6PAo5wmNhQy2uaTybNh6JooaUQJbIhrBa2FCLhlkVljL5qDtouX7EWMGy+NWFabGbAtdsOMPiGs6YFT8ry4wSEKpmlLIo83msTOBMWNhQi+bra8eN2MlWLP7Chloc7fSPamxfKIJ2PLdSseQRFMs4iexDSiBLZCscLV6ZiAIXK+QGq2l8stWFeY1kDWc0TUd9tQ+qzlBfPZyBa64oxmIswTCe8MtmLH6qJLvJCKl8CL1iqV2Ty3GScil8SAlkkWxsNsYrk0hUg6YrKPEYdYDGKp8bvyowf3RmOQcgeVvFsVYpkxUM2fLl56L+S76Ec7HUrsnVOItFCTodUgIFiCnQ6+rKsOv1v2dkSY2liDI5z/bWDqgaw1AoarmQvG4xbcGQLV9+LqKD8iWcxxp7IVnIuYrAKhYlmEsK6XtOhW1K4L777oMgCLjxxhvtGkJRkK1kl0zPY7R4VMBxvNXi0R+MjuoLMNb1gMwUT/wPZkZdKZaeMzMn0UH5KiyWauwelzCmhdzW3oNdW9omXTYhk3F29YcRjqoJCr++Knn0WLo4vYBbsayE8p4sNjQ0hNtuuw2bNm3K96WJDDCSzxKzYQHOSkrLNiOT7U52B/BgywEcPeVH74DR9SxbHaXy1RQmVYIgYqG7yRLgUjVVyWUzlAWzKzEYNHpPcAAUVcdgMIoFsysndV6nN9/JRaJjLsi7Eti1axfmzJmDb37zm/m+NJEBgsABnJFroGq60WJSNwRZOgJpvAzqkcT/YCKyFhfpxKHEKyIQVjAwFM1Kd7h8tSpMlZkcUfSU7Snjm6qkIziy0TnrUMcAyn0uiLGwZFHkUe5z4VDHwESmbeH0lpDF0oY07+6gyy+/HADws5/9bEKfr6kpzeJoCp+6ujJbrjtnZgVOdg8lVA/leSP34KldR1BR4cOis+tTfn7Xlja4XQI8LuMRc0kCIrKKXW+dxNLzPj7q/X0BGWVeMZbprMQS2zhouo7aylKUyiqqyr348fX/NOm5La0rQ0WFDy27j+B0XwjTqn1YfeG8MeczmWuNnO+ut06i3x+GJ85NFJFVzKgrRVdfCGVe455JMQEiChz6A/KoZ2HfwS48tesIRJFDRakLgYiS1nczkr6AjJoKD7i4vBHGWNJrZsJk77Ndz362mFFXmvJ7Tmdu+Zp/zpTAtm3bsGHDhoTX5s6di8cee2xS5+3tDUAfp6DZVKGurmzSlRQnytJzZuLJnYfBgYMoAGbpuopSF8ABT+84NGbTl87uAHweMaG9Jc9x6OwOJJ1TdanL8p8rqgaB54wsXMFotTnWZ5Mx3obcmbU+3LxmYcJn8nWvzXuraixh03zpOTOxvbUDA0EZpV7JundRRUNVqWvU+J7ecQjgAIHnoWrM+D+njfvdjCT+3pukumamTPQ+2/nsZ4uxvufx5pbN+fM8N6bxnDMlsGLFCqxYsSJXpyfSIBstFOMzmMtL3PC6RTDGxl3SZrqhGx9NJPAc1JiiHys/IhWFviE33qZ5uk1VsrXxWshZ2cVMsZSyphDRKUq2WijGZzCbpCOQMxUs8T+YUFgBkzX40siPSEYxhCamitbKpKlKtiKnikVYFSPFUMqalMAUpK29B//z3HuIKJphwfsk+DzShAThRK3EiQiW+B/MsZ5Q2o3mR1LsoYnpNlUZ77vJZCVYDMKKyA22KQHKD8gN5gogqqgQOM4qTQ0AXnfy8tFjMRkrcTKCJZNG8yPxSDxO9Yag6SwW6SJBEPgpF5o41ndT6C4xonCglcAUw3SFSKJglZbWGYM/pExYEBaTldjW3gN/2Oj5C8SUoD+KEo+ItRdPvRLPqb6bYnCJEYUBKYEphukKKS9xoc8fgc44KwHICZt921s74POI8LgE+IMyVE2HwHMoL3HZKvzyXT6g2F1iRP4gJTDFMDcLvW4R1eUe+IMyFFWHRxInnWRVDJjCj+M4eN3G480YQyii2jYmO1wzhdyMhygsSAlMMeI3Cz0uATzvhqbpBakAcmEdpyv8zGuf7AkaMfYCh1m1JTmx0O1wzVDYJ5EupASmGMUS7pcr6zgd4WdeW9UYghEFAAeowKm+UE4sdDtcM8XyHBD2Q0pgClIMG7m5so7TEX7mtYdCUXAcD4BB1RgGAzIkkceW3e1ZvX92uWaK4Tkg7IeUAJFXTDfM4eMDkEQB5SVGDgOQPes43a5naqwstlkZlQHQdIaTPUG0tfdkTYCSa4YoZPJeRZRwLvGVRSWRhxrLYQhFFAD527g0SxyLAg8trjQ2xxn/iVku95uqmihZ6UQhQCsBIm/Eu4AqSt3o80fAGOAPGjkM+bKOTcvc6xYRkTXrdaNEHlBeImXdX0+uGaJQISVA5I34DVIzhHUwEIWialY/5HwIyvh9A6NUNgMYIEmClV1cWeLK+TgIohAgJUDkjZEbpF63CJ7nUFniQtNV5+Z1LKZlHh+lRP56womQEiAmTbrx/oW4QUqhlITTISVATIpM4v0LVeCSv55wMqQEiEmRabz/VBW4+a4NRBDZgpQAMSmoUFnhdzIjiLGgPAFiUpgx9/E4rVBZ/GqI4zi4JQFClnMNCCJX0EqAmBSFuNmbb9JdDZHLiChESAkQk6JQN3vzSTq1gZzoMiKlVxyQEiAmzVTd7E2XdFZDTuv05USlV6zkXQm8+eab2LBhAxRFQWVlJX784x9j1qxZ+R4GQWSNdFZDTttAT6X0trxyhFYHBUbelcB3v/td/OIXv8CCBQuwZcsW3HXXXdi4cWO+h0EQWWW81ZDTOn0lU3qqquG0P4ppDLQ6KCDyGh0kyzK+/e1vY8GCBQCAs846C52dnfkcAkHYwvLFs6FpOqKKBsYYooqWcgO9rb0Ht218DU0b96B58360tffYMOLJkSxqzB9UIFIUVcHBMcbY+G/LPrqu4/rrr8enP/1p/Pu//7sdQyCIvLLvYBdadh/B6b4QplX7sPrCeVh0dv2o9/yypQ2iaAjJqKJBVRmuXb1w1HsLmWTzONkTwrQqL0q9kvU+xhgCYRW/+v4lNo7W2eRMCWzbtg0bNmxIeG3u3Ll47LHHIMsybrnlFgwODuKhhx6CJEkpzjKa3t4AdN0WvZV36urK0N09ZPcwbMGpc2/evB8DQRmlXgn+oAx/UIaianBLIv7vlZ8oKrfJyOigQFiBqrMEl1hU0ZIWEHTq92+SzfnzPIeamtKUf8/ZnsCKFSuwYsWKUa8Hg0Fcf/31qKysxMaNGzNSAAQx1TF96cGIij5/BAAHnuMQUbSi85+P3CcxI4bSzSmhENP8YMvG8Jlnnokf/vCH4HlKWCaKj1wKJ3MDeTAgw1AAgM4ASeQt/3mxCsJMckr2HeyyNcTUSQoor0rgvffew65duzBv3jxcccUVAIBp06bh4YcfzucwCGLC5Dr+3cw5kBXNUgAMQLlPmhIhpenmlLTsPmJbXoXTchzyqgQ+8YlP4P3338/nJQkiq+Q66cs8x8MvHEQ4qkISeZT7JPg8EqKKNmVDSkfS1ReCR0r0FORLCTotsY/8MQSRAT2DEbjE3AqnhQ21+M+rP4eacjeqytzwusUxQ0qnIvXVPtsKE+bjOy4kSAkQRAbkq2rqorPrcfUl81FZ4kIooqKyxIWrL5k/JS3RZKy+cF7aeRXZxmmVcal2EEFkQD6rpjq5JpOpBO3YnHVaZVxSAgSRAVQ1NX/YpQSd9h2TEiCIDHGyhe4UnPQd054AQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTiYvCuBffv2YfXq1bjssstw3XXXYXBwMN9DIAiCIGLkXQnceuutaG5uxvPPP4958+bhkUceyfcQCIIgiBh57yz20ksvQZIkKIqCrq4unHXWWfkeAkEQBNraexzTQnIsOMYYy/dF33//fXzzm9+EKIp4+umnMWPGjHwPgSAIB7PvYBd+2dIGUeTglgREFQ2qynDt6oVYdHa93cPLKzlTAtu2bcOGDRsSXps7dy4ee+wx6/ipp57C1q1b8dRTT6V93t7eAHQ973rLFurqytDdPWT3MGzByXMHaP65nn/z5v0YCMpwS4L1WlTRUFniQtNV5+bsuumSzfnzPIeamtKUf8+ZO2jFihVYsWJFwmvRaBR/+tOf8M///M8AgJUrV+Kee+7J1RAIwrGQq2NsegYj8HkSxZ9L5NEzGLFpRPaR141hURTxwx/+EAcOHABgrBbOPdd+rUsQU4m29h48ufMwBoIyfB4RA0EZT+48jLb2HruHVjDUVnggq3rCa7Kqo7bCY9OI7COvG8OCIODee+/FHXfcAU3TUF9fjx/96Ef5HAJBTHm2t3ZAEHjL1eGWBERjr9NqwGD54tl4cudhRGGsAGRVh6bpWL54tt1Dyzt5jw5atGgRWlpa8n1ZgnAM5OoYH1MZksvMBiVAEERuqa3wjNr0dKqrYywWNtQ6UuiPhMpGEMQUY/ni2dA0HVFFA2MMUUVzrKuDGB9aCRDEFINcHUQmkBIgiCkIuTqIdCF3EEEQhIMhJUAQBOFgSAkQBEE4GFICBEEQDqboNoZ5nrN7CHnFafONx8lzB2j+NP/szH+889hSSpogCIIoDMgdRBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIFRCAQwJe+9CWcOHECALBnzx5cdtllWLZsGe69916bR5dbfv7zn6OxsRGNjY1obm4G4Jz533///bj00kvR2NiITZs2AXDO3OO55557cMsttwBw1vzXrVuHxsZGrFq1CqtWrcI777yT3/kzoiB4++232Ze+9CX2yU9+kh0/fpyFw2F2wQUXsI6ODqYoCrvmmmvY7t277R5mTnjttdfYV77yFRaNRpksy+zrX/86e/755x0x/9bWVrZ27VqmKAoLh8PsoosuYgcPHnTE3OPZs2cPW7x4Mfve977nqGdf13X2hS98gSmKYr2W7/nTSqBAeOaZZ/CDH/wA06ZNAwC0tbXhzDPPxMc+9jGIoojLLrsM27dvt3mUuaGurg633HILXC4XJElCQ0MDjh496oj5f/7zn8fjjz8OURTR29sLTdPg9/sdMXeTgYEB3HvvvbjuuusAOOvZ//DDDwEA11xzDVauXIknnngi7/MnJVAg/OhHP8KiRYus49OnT6Ours46njZtGrq6uuwYWs75h3/4B3z2s58FABw9ehTbtm0Dx3GOmb8kSXjggQfQ2NiIJUuWOOq7B4A77rgDN998M8rLywE469n3+/1YsmQJHnzwQTz22GN46qmncPLkybzOn5RAgaLrOjhuuA44YyzheCrywQcf4JprrkFTUxM+9rGPOWr+N910E/bu3YvOzk4cPXrUMXP/3e9+hxkzZmDJkiXWa0569s855xw0NzejrKwM1dXVWLNmDR544IG8zr/omso4henTp6O7u9s67u7utlxFU5E333wTN910E2677TY0NjbijTfecMT829vbIcsyzj77bHi9Xixbtgzbt2+HIAjWe6bq3AHgpZdeQnd3N1atWoXBwUGEQiF89NFHjpn/vn37oCiKpQQZY5g1a1Zen31aCRQon/nMZ/D3v/8dx44dg6ZpeOGFF/DFL37R7mHlhM7OTtxwww34yU9+gsbGRgDOmf+JEydw++23Q5ZlyLKMXbt2Ye3atY6YOwBs2rQJL7zwAv7whz/gpptuwsUXX4xf/epXjpn/0NAQmpubEY1GEQgE8Oyzz+I//uM/8jp/WgkUKG63G3fffTduvPFGRKNRXHDBBVi+fLndw8oJjzzyCKLRKO6++27rtbVr1zpi/hdccAHa2tpw+eWXQxAELFu2DI2Njaiurp7yc0+Fk579iy66CO+88w4uv/xy6LqOq666Cuecc05e50/tJQmCIBwMuYMIgiAcDCkBgiAIB0NKgCAIwsGQEiAIgnAwpAQIgiAcDCkBwpFcc8016Ovrm/R7Wltb8aUvfWnc65111llJz7Vr1y7cddddAIxqktu3b8eJEydwzjnnjHtOgsgGlCdAOJLXXnstK++ZLEuXLsXSpUtzfh2CSAWtBAjHceuttwIAvvGNb+CNN97AunXrcNlll2HlypXYunXrqPd0dnbilVdewdq1a7F69WpceOGFuO+++zK+7n333YcrrrgCq1atwiuvvAIAaGlpwbXXXpuVeRHERKCVAOE4NmzYgJaWFvz617/Gv/7rv6KpqQnLli1DV1cXrrzySpx55pkJ76mqqkJTUxPuvvtuzJkzB11dXbjooovw9a9/PaPrnnHGGbjzzjtx+PBhrFu3Dtu2bcvRDAkifUgJEI6lvb0d0WgUy5YtAwDU19dj2bJl+Mtf/pLgk+c4Dg899BB2796NF154Ae3t7WCMIRwOZ3S9r371qwCA+fPno6GhAW+99Vb2JkMQE4TcQYRj4ThuVIlexhhUVU14LRQK4YorrsC7776LT3ziE2hqaoIoisi04grPD//cdF2HKJINRtgPKQHCkQiCgFmzZkEURezYsQMA0NXVhT/+8Y84//zzrfeoqopjx44hEAhg/fr1uPjii9Ha2gpZlqHrekbXfPbZZwEA7777Ljo6OvCZz3wmu5MiiAlApgjhSJYvX45/+7d/wy9+8Qvcdddd+NnPfgZN03DDDTfgvPPOs96zbt063H///bjwwguxYsUKuFwuzJ8/H/PmzcOxY8fgcrnSvubx48dx+eWXg+M4/PSnP0VlZWWOZkcQ6UNVRAmCIBwMrQQIIgv86le/wvPPP5/0b9/61rewcuXKPI+IINKDVgIEQRAOhjaGCYIgHAwpAYIgCAdDSoAgCMLBkBIgCIJwMKQECIIgHMz/D7RrkTIqFAnbAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with pandas df\n",
    "sns.residplot(data=pandas_tips, x=\"total_bill\", y=\"tip\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<seaborn.axisgrid.JointGrid at 0x7fc3bd50aa90>"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAGkCAYAAACYZZpxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACHGElEQVR4nOzddXxcx7nw8d8cWl6tmM3sGGKmxGFmTpqUb/EWb2+5TUopv7ntLd5yUwo3zA6ZYrZjZpQsptXigfePtWUrK9uSLGkF8/180lqj3XOelVb7nJkz84xwHMdBkiRJkjJAyXQAkiRJ0tAlk5AkSZKUMTIJSZIkSRkjk5AkSZKUMTIJSZIkSRkjk5AkSZKUMVqmA+iKmpoWsrO9NDREMh3KKfX3+EDG2FNkjD1jqMSYnx/ooWgGlwHXE9I0NdMhnFZ/jw9kjD1FxtgzZIxD24DqCUn9n5OIYh3diVWzH7vpKE60GWwTVB3hyULJLkHNH4laOBqhuTIdriRJGSaTkHTWHNvE3LeW5PY3sap2o2SXoGQVIrzZKNmloKhgmzixMHbtAczdK7Gbq1FLJqCPPx9t+DSEIt+KkjQUyb98qdscK0li6+skNz6D8Gajlp2DPuUyhGac+bmJGFbVThLr/k182YPoU6/EmHRhp54rSdLgIZOQ1C3mgQ3Elv0N4QthzLgBJVTUpecLw41WPhWtfCp2YyXm7pW0bnoe15xb0cYu6KWoJUnqb2QSkrrEiYWJLf0LVvUe9MmXoOaPPOtjKqFijFk3YjUcIbHhWRLbXiNxw6eArLMPWJKkfm3AzY6TMses3EHro9/AAVznvb9HEtDJ1OxSjIXvQc0bScVfvkZ843M4jt2j55AkqX+RPSHpjBzHIbH5JRLrnsaYdgVqweheO5cQCtrIGYTGTKJu6RNYBzfhvvhjKN5Qr51TkqTMkT0h6bQc2yL21p9JbnkV18L39GoCOpkWyMGYezsikEfksW9iVmzrk/NKktS3ZE9IOiXHTBB95Zc4sTCu+Xch9L5d1yMUBX3cQpTsUmKv/Ap92pUYU69ECNGncUiS1HtkT0jqkJOIEnnuJ2BZGLNu7PMEdDI1fwTGwrtJ7niL2JLf4JiJjMUiSVLPkklISuMkokSe/THC5UOffhVCyXzJEsUTxDX/TpxYmMhT92NHGjMdkiRJPUAmIakdJxkn8txPEN4Q+jmX9quhL6Hq6NOvRsktJ/LEt7DqDmY6JEmSzpJMQlIbx0oSffF/Uj2gcy7pVwnoOCEE+tgFaOPPI/LMjzAPbsp0SJIknQWZhCQAHNsm+upvwHHQp17eLxPQybSSiRgzryf2+u9IbF2S6XAkSeommYQkHMchvvzvOOFa9OlXI8TAeFuoOWUY8+8kseFZYisfkgtbJWkAGhifNlKvSmx6AfPwOxgzb0SoA2vWvuLLxrXgPViHNxN75Vdy5pwkDTAyCQ1xyf1rSW56HmP2zRmdhn02hOHBmHsbTiJC5JkfYEebMx2SJEmdNLAue6UeZdUeIPbGH3HNvgXFE+zUc0zbobLR5GiTSXPMJmk5GKog6FEoDGqUhDRUpe/vJwlVQ59+DeaOt4g88W08V30eNVTS53FIktQ1MgkNUXakieiL/4Mx+ZIzbsOQtBzWHYjx9r4Iu6qSBNwKuX4Vn6GgKqnE1Bp3qG+1aI7ZjC3QmTncw4zhbrxG33W2hRDoE85H+LKJPHk/nos/jlY2uc/OL0lS18kkNAQ5ZpLoSz9DLZ2EWjLhlI+LJmxe2dbKku2tFAQ0JhQZnD/Wi+c0iSWatDlQm2Tl3igPr2lm1gg3V0z2UxDsu7eaVj4F4c0ituTXGDNvxJh8cZ+dW5KkrpFJaAiqfeF3CEVHG7uww+/bjsOyXRGeWB9meK7GbbOC5Pg6VzXBoytMKHYxodhFa9xm4+EY9z9Xy7QyNzecGyC7k8c5W2ruMMT8u0iseQKr7gDuhe8dcJMuJGkokBMThpjE1iVED2xGn95xIdDasMlPXqzj1e0RbjjXzxXn+DudgN7N51JYMNrLBxZm4QD3PV3DMxtbSFrOWb6KzlF82bgWvgen8SiRp76H3drQJ+eVJKnzZBIaQqyq3cRXP0b2+bcjtPSZcGv2R/neM7WUhjRunx2gsIeG0Ny6wsIxHt4zN8jWyjjfeqqG3dV9M5VaaC70mTeg5JQTefxezCNb++S8kiR1jhyfGCLsSCPRl3+BMfUKtGAuNEbavmfZDo+uaWbdwRg3zui55PNuWR6V66cH2FmV4NevNTBnpJsbZwQxtN6dTZcq9TMfJbuY2Ku/RpuwGNesG/tFYVZJGupkT2gIcGyT6Mu/QC2bjFo4pt33ogmbn79az57aJHfNDfZaAjrZuEKDe+YHOdJo8p1najhYl+z1cwKoeSNwLXov1pGtRJ78HnZzdZ+cV5KkU5NJaAiIr/gXQNpEhMaIxQ9fqMOlCW6c7set993bwWMoXDXFx8zhbh54uY4XNoexnd6/VyTcfow5t6Dmj6T18fuIb35ZlvuRpAySw3GDXHL3Csz963AtvKfdRISqZpMHXq5jcomL2SPcGSlYKoRgYrGLkpDGC5tb2VIR50OLQoS8vTtMJoRAGzULpWAkyU0vYu5eifv8D6LmlPbqeSVJSid7QoOYVXeQ2LK/Ycy8HmG429oP1Mb58Qt1zBzuZs5IT8YrZmd5VG6dGSDXp/Ltp2vZcCjWJ+dV/LkY8+9ELRhN5On7iS3/B068tU/OLUlSiuwJDVJOLEz0xZ+jT7oIJVjQ1r6/NsH/Lmng/HEeJhT1n1pxiiKYP9rDsByNf7zdxI5qkxumenH18hChEAJtxLmoxeNI7lhK60NfRj/3WoxJFyJUvVfPLUmS7AkNSo5tEX3ll6gFo9BKJ7W1761J8PNX67lqela/SkAnK83WuXtukLqwybefrmVPX03ldvkwpl6OMecWzL2raP3Xl0hsex3H6ptJE5I0VMkkNAjF334IJxlDm7C4rW1PdYL/XVLPpZN8TCjxZDC6M3PpCtfPzGb+aA+/fK2Bh1Y3ETf7aIFrsADX7JvRp11JcsdbtP7zC8Q3PCeH6SSpl8gkNMgkdizF3Lsa49xrEUrq17u7OsEvXqvn8sk+RuUbGY6w88Yem8pd0Why35M1bDkS77NzqzlluObcgjHzRqyKLYT/+QVib/0Fq/5wn8UgSUOBvCc0iFhVu4mv/CeuebcjjFRvZ1dVgl+9Xs8Vk/2MyBt49zi8hsJVU/zsq0nw1xWNDMvVuXVWkIJA37x1laxCjOnX4MRaMA9uIvrMDxHBfIyJF2JnXdgnMUjSYCaT0CBht9QSfennGFOvRAnkA7DjaJzfvN7AlVP8DM8deAnoZCPzDcpzdNYejHH/s7XMGenh6ql+sjx9U/VAuAPo4xaijZmPXb2H5I63OLDyn6jDpqOPPw+1ePyA2RZdkvoTmYQGASfeSvS5n6CNmoNaOBqArRVxfvdmA1dN9TMsZ2AnoOM0VTB3pIcpJS7e3hfjm/+uYd4oD5dN9pHr75u3slAU1KKxqEVjCbhs6retJfbWn8FMoI1ZgD5uAWq2XG8kSZ0lk9AA55gJoi/+DJFThjZyJgAbDsb4y/JGrpnmpyx7cCSgk3ldChdO8DJ7hJt1B2N8++laxhUaXDDex8QSA6WP1j2pHj/6qNnoo2ZjN1djHdmaGq7zZKGNW4g+Zj6KN6tPYpGkgUomoQHMsW2iS34Lioo+KXV/YvnuCI+ubeaGcwMUZQ3uX6/frXD+OC/zRnnYVhnnoTVNRBMOc0a4mT3Sw/Bcvc8W4irBApRgAdqE87FrD2Id2UJi7b9RC8agj1+ENuLcDiuXS9JQN7g/pQYxx3GIvflHnNZ6jFk3AYIXNod5ZVsrt8wMkusfOhWiDU0wrdzNtHI3tS0mO6oS/PaNRkzbYUqpiyllbsYXGX2y1bgQCmr+CNT8ETiTE1hHd5HY/DKxt/6CNuJc9HGLUIsntM1clKShTiahAchxHOLLHsSu2Y8x91ZsofLQqma2VMS5fXaAoHvoJKB3ywto5AU0Fo6BulaLfTUJXtwc5g9LTYqDGhOKDcYXGYwpMHq9YKvQDLSyyWhlk3FiYcyKbcSWPQiJCNro+antJXKHZbxskiRlkkxCA4zj2MSXPoh1dCfGnFuJ2hr/91o9kaTDbbMCfVoJu7/L9ank+jzMGgGm5VDRZHKoPsm/17dwtMmkIKgxvtBgfJGLsYUGPlfv/eyE+6T7Ry01WEe2E33xf0DV0UbPQR81ByWnXCYkaciRSWgAcWyT2Bt/xK4/jDHnVo6EFX79Wi3lOTqXT/agKPID7FQ0VTAsR2+bKWjaDkebTA43mDx/rKeU61eZUGQwqcTFuMLe6ykpgXyUCflo4xfhNB3FqtxB9IUHQChow89FG56qZSdr10lDgUxCA4STiBJ95Zc4yRj6rJt4c6/JE+tbWDzOy6QSecO7qzRFUJatt80etGyHqmaTg/Umz2wMU9FkUhrSmFziYnKJixF5OmoPJ3khBCJUjBIqRpuwGKelpm3Bsd1cg1o4BrV0ElrxeJS8EQhV/rlKg498Vw8AdksN0Rf+BxHIJzLxWh58M0xti8lts4bWBITepCqCkpBOSSiVlJKWw5FGk4N1SVbvj9IctRlbaDCl1MXEEhf5PfxzF0Igjs2wY+wCnEQUu+4gVs1ekjvewgnXo+SUohaMQs0fiZI7DCVULHtL0oAnk1A/Zx7cSOz136OMmsvS+HiefqaeqeVuLpkY7PErc+kEXRWMyNUZcazSRGvc5kBdkg2HYjy1IYymwrThPkblKowtcPX4xYAwPKjF41GLxwPgmHHsxqPYTUdJ7l6BvfZJnEgDwp+Lkl2Kkl2Gml2CEipGCRXJ6eDSgCGTUD/lJOPE336Y5N417Cq9jsfecaGpUW6ZFSCvj6oDSCf4XAqTSlxMKnHhOA51rRbVrYJlu6I8tKoZXRWMyjcYXaAzPDc1zNeTU8KF5kLNG46aN7ytzbFMnNZ67JZanHAtyapd2OE6nNZ6hDuIklVEbekIEu4ClOwS1FAJwu3vsZgkqSfIT7N+yDy4kchbf2WbOoGX4jfRsg0WjHYzpqDvFl9KpyaEIM+vMaLIzaRCFcdxaIjYVDaa7K5OsHx3lJoWE59LoSSkUZylUZilke/XyA+oZHtVNPXsf49C1U4M4Z3EcWycSBNOuA7HbMY8sB5nyyvYLTUI1Uj1lLJLUbPLUEJFqWE9X7asfSdlhExC/YhVu5+apU/wdqXBssSVaLrOzBFuxhX2XSkaqeuEEOT4VHJ8KpNJDYPZjkNTxKau1aKu1WLT4TjNkQhNUZuWmI3PpZDtVcn2KW3PTX2d+neWR+n2cKsQCsKXDb5s/CEvZmMESK0vI9aS6i211GEe2YKzc2kqWSWjqaG9QD4ikIfiz0XxhhCeLITbn+pBGZ7UMJ8qL4akniOTUIY5jk319k2sX72BdbUeDltTGFPo5pJxLkpCmvxjH6AUIcj2pZLKmHd9z7YdwnGblngqIYVjNvtqkryTiBOO2TRHbSIJm6AnlaDy/Cr5AZW8QKo3ledXyfIqXb4wEUKAJ4jqCUL+yHbfc8xEqvcUacSJNmPXH8Gq3IGTiOAkYpCM4SRjYMbBtkFRQdVAKCCU1LEVBRCpNkUBRUMoGmg6QjNAdyMM77GkFkB4gijeLOJmKXbSnWqT7/chRyahPmbbDpV1Yfbs3M/O3YfYUW0RdXRGZBUycUI2V+W70HtgqEbqvxRFEPSoBE+zDYVlO7QcS0hNMZvasMW+2iRNUZvGiE0saZPlSfWacv0qOb5UzyroUQm6FfxuBa8h8Fud25FWaAYimA/B/DM+1nFssK3Uf46dSko44Dip3pbjpNodCywLxzbBSoKZxDFjOIkYTmMlTvUenHgr1asjJMONYCUQvhyUQB5KsBAlq/DYcGMhSjA/lcikQUcmoV5g2w5NrQnqm2PUNsWorm2i4mgdFbURqsIOXmIUGq0UBTWuOCebgvwQiqwlJp1EVQQhr0rI23GiSlrHklTMpjlq0dBqcaTBJJKwiSQdogmbaMIhblajiNRsP00VaErq2KoCiqCt5yEAIU60KYK2x+pq6j9DE7h1gUcXeAwFjyHw6gpeQ8fnEnhdCr5j7V3ppYVCXhobI6neWLQp1SNrbcSs2o2zby1OpAGntRHh8qWGCoMFiGABaiAvNYToy0nd09LljMCBaEAloeMVAbpaGSAcTZJMWhy/JnRIjY87zvF/g2M72I6DZaeu5izbwbJsTBvMSDPxRAIzaZMwLZJJi1jSIhY3U3/scYvWhE0k4dCaFCQsUIVDQE3gI4JfRMlz2YwpVMkZ58UI5CGMYT36s+kK23b6/U1oGePpGRrk+lVyzzDZze3RCUcSWFaqSoR1rKNiO6m/AfvYH4XT9j/HvwcWDo4Nlk3q78BOlT9KWA6tUUi2OCQsk4TpEDchZjrEkw4OqWTm1gQe46TEpQtcxxKZoQlcaqr4bKDWxkya6IpAVUJoWjZKNqg5AlUAAhRAmBGIhxHxME5zK6JuJ068FScRwW82ogsLXH4Utw9cPjB8CMOTGgLU3QjdANVI/b9iINTjQ4oqQlFTQ4xCtA0xpv6d+jrZFINI5FgwSupemRw67BHCcZzO9dclSZIkqYf170tNSZIkaVCTSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKmAFVOw7ANC0aGiKZDuOUsrO9/To+kDH2FBljzxgqMebnB7r0+Lq6MLY9OKqqne61D7iekKaduvx9f9Df4wMZY0+RMfYMGePQ1qtJKBwOc80113D48GEAHnroIa655hquvfZavvKVr5BIJHrz9JIkSVI/12tJaOPGjdx5553s378fgH379vGHP/yBf/3rXzz11FPYts0//vGP3jq9JEmSNAD0WhJ6+OGHuffeeykoKADAMAzuvfde/H4/QgjGjRtHRUVFb51ekiRJGgB6fT+hiy66iL/+9a+UlZW1tdXX13PLLbfw/e9/n7lz557V8W3bpra2lvr6BizLOttwBz2Px015eTm6rmc6FEmSpL6fHVdVVcWHP/xhbr755m4noJqalrZ/19dXI4QgKysfVdUyvtuhpimYpp3RGE7FcRxaW5s5dOgQgUB+psM5rfz8QLvfc38kY+wZQyVGOTuuY306O27Pnj3ccccd3HjjjXzyk5/skWMmEjFCoVw0Tc94AurvhBD4fEGi0VimQ5EkSQL6sCcUDof50Ic+xGc/+1luuOGGHjyygxADbqZ5xshELUlSf9Jnn96PPvootbW1/OlPf+L666/n+uuv52c/+1lfnV6SJEnqh3q9J7RkyRIA3v/+9/P+97+/t08nSZIkDSByHEuSJEnKmAFXO66/iEQi3H//tzh8+BCKIhg/fiL//d9f5a233uKPf/w9ppnE7XbzyU9+lnPOmcr993+LaDTKd77zA/bu3cOnP/0xfvGL/2PEiJGZfimSJEkZI5NQN7355mtEIhH+/Od/YFkWP/nJ9zly5DC/+c0v+fnPf0NWVoi9e/fwuc99gn/969987nNf5IMffA/PP/8M//jHX/n0pz8vE5AkSUOeTELdNHXqdP7v/37Ff/7nR5g9ey633nonq1e/TW1tLZ/5zCfaHieEwuHDhxg7dhzf+tb9fOQj7+fyy6/issuuzGD0kiRJ/YNMQt1UUlLKv/71BOvXr2Xt2tV87nOf4J57PsCsWbP51re+3/a4qqqj5OWlFoYePHiArKwsdu3aQTKZlFULJEka8uTEhG564olHuf/+bzFnzjw+8YlPM2fOfJqbm1m1aiUHDuwHYMWKpbzvfXcSj8eprKzgZz/7KQ888EuGDRvBr3/988y+AEmSpH5A9oS66Yorrmb9+rXcffetuFxuCguLuOWWOxg9ejT33vtVHMdBVVV++MP/h2EY3Hff17jzznsYNWoMn//8l3jf++5g1qy5LFiwKNMvRZIkKWN6vYBpbzi5htPRowcoKhqewWja68+1446rrj5EQUF5psM4raFST6y3yRh7hqwdd3b6Te04SZIkSTqZTEKSJElSxsgkJEmSJGWMTEKSJElSxsgkJEmSJGWMTEKSJElSxsh1Qr2gsrKCO++8iREjRrVr/+EP/x+FhUU9fq5PfeqjPPro0z16XEmSpL4gk1AvycvL589//kemw5AkSerXhmwSWrHlKI+/sYe65ji5QRc3LR7N/Mk920t5t/r6On784/upqqpCURQ++tFPMnv2XP7wh99SVXWUQ4cO0tjYwHvf+0HWrl3N1q2bGTMmVfjUsix++tPUNhD19fWMGTOG++77XqeOL0mS1F8NySS0YstR/vL8dhLHKhvUNcf5y/PbAXosEdXW1vD+99/V9vVll13Bjh3buPrq61i0aDG1tbV84hMfaust7d27h9/85o+8885GPvOZj/OXv/yL8vJh3H33rezevYvW1jCapvPb3/4J27b59Kc/xooVyxg/fmLbOX72s590eHyv19cjr0mSJKmnDckk9Pgbe9oS0HEJ0+bxN/b0WBLqaDju6qsv5sCBA/z+978FwDRNjhw5DMDs2XPRNI2iomJyc/MYOXJU23FaWpqZMWMWwWAWjz32MAcP7ufw4UNEo9F2x1+zZlWHxx87dnyPvCZJkqSeNiSTUF1zvEvtPcWybH7+818TDGYBUFtbS3Z2Nm+++TqaduJXoapq2nOXLn2D3//+t9x66x1cddV1NDY28u6yf6c6viRJUn81JKdo5wZdXWrvKTNnzuLxxx8BYN++vbz3vbcTj8c69dw1a1Zx0UWXcPXV1+H3+1m/fi22bfXY8SVJkjJhSPaEblo8ut09IQBDU7hp8ehePe/nPvdFfvSj7/G+992B4zh84xvf7vT9mmuvvZFvfetrvPLKi2iazpQpU6moqGDmzJ45viRJUiYM2a0cemt2nNzKoWcMlfL+vU3G2DPkVg5n53SvfUj2hCA1C663p2RLkiR1l2PbgMh0GL1uSN4TkiRJ6vfsZKYj6BMyCUmSJPVDjimTkCRJkpQplkxCkiRJUqaYiUxH0CdkEpIkSeqPLJmEJEmSpAxxkjIJSd1UWVnBokWz+NGP2le53rVrB4sWzeK55069988tt1xLZWVFb4coSVJ/J2fHSWcjKyuLt99egWWdKK3z6qsvEwrJWm6SJJ3ZUOkJDdnFqoldy0msfgwnXIfw52LMvhlj7IIeO77H42Xs2HFs3LieGTNmAbBq1UpmzZoDwGOPPcQLLzxHLBZF13Xuu+97DBs2ou35lmXxq1/9jPXr12JZNldddQ233/6eHotPkqR+Ts6OG7wSu5YTf+vPOOE6AJxwHfG3/kxi1/IePc+FF17Ka6+9CsC2bVsYM2Ysuq7T2trKm2++wS9+8VsefPBhFiw4j8cee7jdc59++gkA/vjHv/O73/2Ft956g40b1/dofJIk9V/OEElCQ7InlFj9WPr0RzNBYvVjPdobWrTofH73u19j2zavvvoyF110Ka+++hI+n4/77vsur7zyEocOHeTtt5en7fmzZs0qdu3aydq1awCIRiPs2bObadPO7bH4JEnqx4bI7LghmYSO94A6295dXq+XMWPGsmnTBtatW83HPvafvPrqS1RXV/HRj36Am2++jXnzFpCTk8uuXTvaPdeybD7xiU+zePFFADQ2NuLxeHo0PkmS+i/HNDMdQp/o1eG4cDjMNddcw+HDqd1Dly9fzrXXXstll13GAw880JunPi3hz+1S+9m46KJL+M1vfsH48ZPaNq5zu92UlZVz++3vYeLESbz55msd7g301FP/xjRNIpEIn/jEh9iy5Z0ej0+S+gudJIYTR4jBX7SzMxx7aPSEei0Jbdy4kTvvvJP9+/cDEIvF+OpXv8qvfvUrnnvuOTZv3swbb7zRW6c/LWP2zaAZ7Rs1I9XewxYuPJ9du3Zw8cWXtrXpuo5t29x996188IN3M3z4CCoq2k/LvuGGWygvL+cDH7iLD3/4Hq666tq2CQ6SNJg4VhJX/U7CT/+Qpke+ibLnDXSrNdNhZd4Q6Qn12nDcww8/zL333ssXv/hFADZt2sTw4cMpL0/tY3PttdfywgsvsHjx4t4K4ZSO3/fprdlxxcUlPPpoai2Q1+vl1VeXtX3va1+7D4Cbb769w+cefx7AZz/73z0SjyT1Z/GKPdQ++n0gtXdO4yt/JHQpiFGL07awH1JsmYTOyve+136hZnV1Nfn5+W1fFxQUUFVV1VunPyNj7IIenYQgSVLXCQHRQ1s5noCOC695jqyRc0ngykxg/YCcHdfDbNtuN9brOE63x35P3qWvulpB0/rXTPP+Fk9HurrLYybIGHtGf4+x+UD6FvSqN4g/y49quDMQUcf6+ufo0hXy+vnvrif0WRIqKiqipqam7euamhoKCgq6dayTt9m1bbtfbac9ELb3BobEdsq9TcbYM7KGTULxBrEjzcdaBMEFt1LflAT6R28gE9t7x6Pxfv+766x+sb33tGnT2LdvHwcOHKCsrIxnnnmGm2/uiYkAAsexEaL/9z76gyE9xi71S0Z+Obm3fhPz6A7seASjZDyJQNm7R+iGHsc682MGgT5LQi6Xix/84Ad86lOfIh6Ps3jxYq644oqzPq5huGlsrCUQyEZVNTm98zQcx6G1tRmPp/8McUgSQMyVB8PzEAJiDjIBAQyRC8ZeT0JLlixp+/f8+fN56qmnevT42dn5hMNN1NdXpa21yQRFUbDt/jscp2kGY8aMpLExlulQJCnNEPnc7RTH6b+fIz1pwFdMEEIQCIQIBEKZDgUYGGPwuq4DMglJUr/Wjy9me5K8kSJJktQvDY1uoUxCkiRJ/dEQGZuUSUiSJEnKGJmEJEmS+qOh0RGSSUiSJEnKHJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJKk/GiIVyGQSkiRJkjJGJiFJkiQpY2QSkiRJ6o+GyI4AMglJkiRJGSOTkCRJvUZRBC47jMsOoyhD48q+xwyRjToH/FYOkiT1T5odw9n7NvXLHgUcggtuQR85l6TiyXRoA8JQ2S16aLxKSZL6nFK9g8ZX/oQdbcGOhml89c+Iqu2ZDmvgkElIkiSpezRNIbL1rbT2yOY30DT5sdMpQ2T0Ur4bJEnqcbbtoOWWpLVruaXY9hApD322hJrpCPqETEKSJPU423Zwj1uA4va1tSkuL+6Ji2QS6ixlaCQhOTFBkqReEfcWkXP7t7DrDgKg5g4j7s4bMvvknDVlaPQRZBKSJKlXOA7EXXlQkgdAEmQC6gIxRHpCQyPVSpIkDTTynpAkSZKUMapMQpIkSVKmKEPjbolMQpIkSf2QkElIkiRJyhg5MUGSJEnKFDFEpmgPjVcpSZI00MiekCRJkpQxsoCpJEmSlDEyCUmSJEkZI7f3liRJkjJFDJG9HGQSkiRJ6o9kT0iSJEnKGGdoVHuVSUiSJKkfcoZIyXGZhCRJkvoj2850BH0iI0noySef5Oqrr+bqq6/mhz/8YSZCkCRJ6udkEuoV0WiU733vezz44IM8+eSTrFmzhuXLl/d1GJIkSf2bZWU6gj7R50nIsixs2yYajWKaJqZp4nK5+joMSZKkfs2xh0YS6vNa4X6/n8985jNceeWVeDweZs+ezYwZM/o6DEmSpP7NTmY6gj4hHKdv5wFu376dL3/5y/zhD38gEAjwhS98galTp/LhD3+4L8OQJEnq1xqWP0b2gpszHUav6/Oe0NKlS5k/fz65ubkA3HTTTfzjH//oUhKqqWnprfDOWn5+oF/HBzLGniJj7BlDJcb8/ECXHh9tacXs5z+Xzjrda+/ze0ITJkxg+fLlRCIRHMdhyZIlTJkypa/DkCRJ6tccM5HpEPpEn/eEFi1axNatW7npppvQdZ0pU6bwkY98pK/DkCRJ6t+S8UxH0Ccyson5Rz7yEZl4JEmSTsMxY5kOoU/IigmSJEn9UUL2hCRJGkBUTLRwJXZTNYovhJVVCnTtZrjUfwyVnpBMQpI0CAjhIPa/Te2Lv2tr8597OdZFd2cwKumsJKOZjqBPyOE4SRoEjEQDja/+pV1beP2LJGsPZSgi6Ww5iaHRE5JJSJIGg0Sswym9ViycgWCkHuHYONbgr5ogk5AkDQKONxs9r7Rdm9Bd6KGiDEUknS2he3CGwEWETEKSNAgkhIfsqz6Nq3wiAHpuCXk3fxnjXYlJGkB015BIQnJigiQNElF3Ib4rP08wGcbW3MSER86NG8BSPaHBUbbndGQSkqRBJIlOUs/OdBhSTzA8ONHmTEfR6+RwnCRJUn9kuGUSkiRJkjJD6B7sSGOmw+h1MglJkiT1Q8Lw4sgkJEmSJGWCcPlwWhsyHUavk0lIkiSpH3IMD3ZrY6bD6HUyCUmSJPVDlu7HiciekCRJkpQBCXSwLZzE4C5kKpOQJElSP5SwQHizsMN1mQ6lV8kkJEmS1A8lLAfhycKRSUiSJEnqa4mkg+IJyp6QJEmS1PfipgOeAHZTdaZD6VUyCUmS1GsUxcFltWAwNDZo60lx004Nx7XUZDqUXiULmEpSBgghMBJ1OC21CHcA01uAhZrpsHqUy2wktv55mt95DTWQS+ii95HMG4ftyGvfzkiYoHizSB7cmOlQepVMQpKUAa6mPdQ+/qNjWzgLshbeijbpEkxhZDq0HqEqDrH1zxNe/yIAZkMltY//iPy7vkPMJ/c46oyE6SC8IeyW2kyH0qvkJYkk9TGXE6Hh+d8cS0AADk3LHkYNV2Y0rp6kJcO0vvNa+0bHxqo/kpmABqCE5YDhBdvEibdmOpxeI5OQJPW1RASzg5vN9iCqE+YoOqo/J61dGJ4MRDMwJUwbIQTCl409iO8LySQkDQqKIlDVnn07q6qCEKJHjwnguPzoHWy7rQTyevxcmZIQbkIXvx848fPTi0YickdkKqQBJ2Gl/l/xhrCbB28SkveEpAFNCHBFKontWoXVUodn/ALs3FFndW8llrTYfrCJNTuqGFceYvrYfILunvtTSeAmdPknqH/qp1gt9QhVJ3Tx+zB9RT12jv4gmTeO/Lu+g9VwBGF4EbnDiWtyw/HOSloOAMKThd1cleFoeo9MQtKA5opWUfvQt9vqa0W2vEnOtZ9BlJyL43TjgAKeXr6f51ccAGD5pkqGFR7my/fMxK31XE8r5isl+/ZvQ2s9wuUj4crF7k68/ZjtKMT8ZeAvy3QoA5J17A0hfCHsxsGbhORwnDSgmVV70go8tix7GN2Jd+t4ja1JXlx5oF3bwaoWKusj3Y7xVOKKn3hgGDFj8CUg6eyZx3tC3mzspqMZjqb3yCQkDWy2ndbk2DbQvU915xTPdGSWkPqYeeytLXzZOM2Dt2qCTELSgKYVjUFo7e//BOfdSFK4u3W8kFdn8bnth48KcjwU53q7HaMkdcfx6x7hCeIkWnGSg7PqhLwnJA1ocW8Rebd/k8jGl7Fa6vBNuwS7cEL37geRmst1ywWjGV2axbJ3Kpg0IodF00rw6IOrmoHU/1nHe0JCIHy52E1VqHnDMxtUL5BJSBrQHAdivjL08z6IwbEFfmfJa6gsOqeI86cW4Thgy6E4KQPsk66kFH8OdmOlTEKS1F9ZPZB8TuY4DpbVo4eUpC45+f0nfNlYDRXomQun18h7QpIkSf2QdVJPSPhzsesPZzCa3iOTkCRJUj9kntQTUgJ52I2Ds+6eTEKSJEn9UNI6sfxA+HJwwnU4ZiKDEfWOjCShJUuWcNNNN3HllVfy3e9+NxMhSFKvM+wIRvMBXOHDaN1cPCsNXXHzpOE4VUvNkGuoyGBEvaPPJyYcOnSIe++9l0ceeYTc3Fze97738cYbb7B48eK+DkWSeo07UUvjsz8nWXMQAM/4eXgXvYeEKmunSZ1zchICULIKsGr3o+aPyExAvaTPe0Ivv/wyV111FUVFRei6zgMPPMC0adP6OgxJ6jSXFcao3Y5etQl3oo4zFdZWFIi9s6QtAQFEd6zEqdrZy5FKg0nCdNpK9wAowQKsmn0ZjKh39HlP6MCBA+i6zsc+9jEqKyu54IIL+OxnP9ulY+Tn9++ryf4eHwydGB3HwUnGUYzuVVBINhzl6L9/TLI6VU9OGB6K33Mv7pKxp4zRirVSsT99S2azai/50y7oVhxnY6j8rntbX8foc6tgGIQCqYnZibKRtKx/eUD8rLqiz5OQZVmsWbOGBx98EK/Xy8c//nGeeOIJbrrppk4fo6ampRcjPDv5+YF+HR8MnRjdiTpiW98gvn8j7jGzcI1fSExP32jtdLSDm9sSEICTiNKw9HHcF32M7LysDmNUFHCPOpdkXfvZTFrh6D7/uQ+V33Vv64kYu5o8fIZgb0UYrdAFgKNkkairoLqiFqG7ziqWvna6197nw3F5eXnMnz+fnJwc3G43l1xyCZs2berrMKRBzmVHaHzmZ7SseopE9QGalz9G88v/16Xq2ooiMBvTqxcnaw6gOMlTPs+2wT35QozCkW1t3knnIQrGde1FSENa0KNS1XxinrZQNZSsIqyavRmMquf1eU/owgsv5Etf+hLNzc34fD7eeustLr744r4OQxrknJYqkrUH27XFD28n0FrT6f1tbNvBVTqed1//+iafj6mcfngvZuQSuO6LiHANKBq2L5/EoFzvLvWWkEdwuKH9lGwluwSrYjtaycQMRdXz+rwnNG3aND784Q9z1113cdVVV1FSUsLNN9/c12FIg5xQTlFw9FTtp2DljCR08fsRhhuEgm/KhejjF3WqnlxCeIgHhhH3lZCUCUjqoly/xoHa9j1uJaccq2JrhiLqHRmpHXfLLbdwyy23ZOLU0iDlsiMQrgbNheXNx/YX4B45ndi+DW2P8U5chOXN69JxTeFCjLmA3OHnIhyLpBEi7sg13lLvyw+oHG5IYloOmpqakqnklpFY/xSOGUdoA+u+0KnIAqbSgOeJV1P/9AOY9ZUA+M+9DGPGDfgv/ACe8dtJVOzAVT4JUTS+W0NijgNxLevYFyfarXgEV7wGNBdJPUtW25Z6lEsV5PpV9tUmGVuY2jNLaK7UfaHKnWjlUzIcYc+QSUga0FRhE179VFsCAgivf4nckdOJ502CYXPRR84j2cNVtt2JWqoe/hOxg1tQ3H5CF38AUTodC7nvkNRzynN0Nh+JtSUhACV3OOahTYMmCXV6XKGpqYlwONybsUhSl2lWjNj+9NmVZt2RtkWlPb3NgyZMwkv/SezgFgDsWJj6Z/8XPVx5hmdKUteMzNPZcKj9jE61YBTmwfR1aAPVGZPQ3r17ufnmm5k/fz5z587l7rvvpqJi8NUvkgYmU3XjHp5+RajllHR7d9Uz0ZJhorvXprVbjTIJST2rJKTRErOobjbb2kRWIU4iit2UvnxgIDpjEvrKV77CrbfeysaNG1m/fj2XX345X/va1/oiNkk6I8tR8M25Hi2rsK3NN+1inNyRp3nW2bFVF1p2cVq78Gb12jmloUkRgjEFBqv3R9vahBCohaNJ7l+fwch6zhmTUDQa5Y477kDXdQzD4J577qG2trYvYpOkTom5Csi69V5yb7+XvPfcjzH3DpKKt9fOlxAesi/9ECgnbql6x8+DUOfWH0lSV4wvMnh7bxTnpK69WjAGc396b3wgOuPEhFGjRrFu3TpmzJgBwM6dOykrk39sUv+SULyQ1Xu9n3eLZ4+m7EM/IlJ9GMXlww6WkFA8fXZ+aegoDWlEkw6H6k2G5aZmdyp5w7E3PIMdaUIZ4D3wMyahiooK7rnnHsaPH4+maWzdupX8/HyuvfZaAJ5++uleD1KS+hvHERgFw2kSXatFJ0ldJYRgYrHBij0RhuWmEo5QtdQEhQPrMSZekNkAz9IZk9AXvvCFvohDkiRJOoWJxS4eWdPCLbOCqEpq2qdaOBZzz9uDNwnt2bOH0aNH4/P5Ovz+5MmTey0oSRpKFAWiSRtdVTKz1bHU7+X4VIIehe2VCSaXpiolKAWjSLzzIk4sjHD7Mxxh950yCf3oRz/it7/9LbfeeivFxcXtbopFo1FWrFjRJwFKZ6aqAgHtNsCSOiYE6HYUYSWw9ABmhkvwtCYs9lY0U9MYxePSGFUapDDL3a4ygyQBjCs0eHtftC0JCc1AzRuBeWA9+vjzMhxd950yCf3whz+ksbGR0aNH8+CDD+I4DkIIkskkd999d1/GKJ2CwMHVcpDW9S9ix8L4zr0MO388piyW2SEhHIy6nTS+/EfM5hq8E+bjm3dzl/YYips2h2rCHK2LUF4UoTjkxtC6l8iEAodqwvzr5Z1U1rUCsGh6CTecN4ocn3GGZ0tDzbhCg7+tbMK0HbRjQ3JK0ViSu1cOziT0X//1XyxbtgwhBPPnz29rV1WVyy+/vE+Ck07P1XqEmoe+DXZqz5HY/k3kXvc5KJLbpXfE1XqUmsd+CI4NQGTbMhzbxH3BRzCdM5fbsRyHx9/cy8urTmwRcd15o7h+4QjOsON3h6JJm9fXHWlLQABLN1Qwa0KhTEJSmoBbIeRV2Xk0waSSVG9ILRxNcvPLOPFWhKvjWyf93Skv4f7whz+wfft2brjhBrZt29b23+bNm/npT3/alzFKHRBCkDjwTlsCOq5l9VPowjzFs4Y2q7GyLQEdF92xCi3RuR0za5vj7RIQwNNL99IQ7vxGeSezbdh5oCGtvao+0lZySJJONiJX553DsbavheZCzRuOuX9dBqM6O2ccR/j+97/fF3FI3aGk//qEokG3rsv7H8NuxdWwG6N+Jy7rRKLQSOIKH8Ko2YorXoMQp7+BIgS0xEwSHWxEp/pDJIXG/upW9lWFiSbtDo6QkkhaaW2OA4loBM2J42o5gFG7FXeiDnEsi5zqNQD4XAqTR6UPBZYV+DF7uCK3EOAyGzFqt+Nq2ofuRNO+3xhJsrOimcqGWI+cX3MSuMKHO/17ks5sRK7O1sp3bXRXNI7knrczFNHZk1W0ByjHcXANm4LQHsMxT7wpA3NvIN6JoaX+LlFfSfNTPyFZvR8ALVRA9vVfxDQCmBuepmH1MwAIVSf3pv8mkTPulLXiqprifOePb3Pp1BAXlU7GObLl2HcEWRd/gO89sovtx3okRblevnj3TEKe9PtqhUGNvJCb2sYTV6IleR7yRCOJ1c/RuuHl1FF1F3k3fwXHk0XTsz9Lew0xV2pPI8eGmy4Yw54jTRytiwBwwYwy3t58lIraVi6cVozSQ10iV2sldY99DzuaKkLsHj0D3wUfJKH6EQL2VbXy/b+uJmmmkvC1i0ZyzYIR6Er3zq85CcxNz9Lw9pOpBlUj74YvkMib0Gs1/YaCgqBKXatFa9zG50pdhA70ITk5I3QAi3lLyLv9XgKzr8E35QLybvs6Zt7YTId11oSAyK41bR/eAGZjNbHtb6FFamk5loAAHCtJwwu/QbdaOzgS2MDfX9xOa8zk36tqec1zOeGFn8R/2cfJf893Wd1S2JaAAI7WRVi2sRKlgw9fnwFfvCKXOeNC+Dw68ydm85nzXHjjNbRueOVETMk4zW/+Hevghg5fw8nHzvbqfOW9s7j7igncdfl46ppivLnhCH9/YTu1Le2veLtLExYtKx5pS0AAsT3roHZf6t+mza8e39iWgACeXrqPqoZo2rE6S22ppOV4AgKwTBpe/A2GKSvxnw1VERQFVfbXndhxdaAPyckkNMDFfKUoM29FX/RB4qExWIOgc6soCrEju9LaE4e3gZV+/8VqqUckO/7ATJg2e440tX39xKpavvZ0CyuiIzGzylmzoz7tOVv213V4TyaJi9IgvNf7Bt9bUM979Jfxb3oY1Rfi3XOqHTNBoqKD13BoK8q7hqWicZO/vbCdf7y4g3f2nKjL2NTNe03vplgxEpW709rNpiqEgGjcate7O67xLM5vR5vS2qxwIyQj3T6mlJIf0DhU/65tv4vGkdy9MkMRnR2ZhAYB23awrFPfyxhoLMvGN2ZGWrtn/Hww0guTGgUjsV2BDo/l1lVmTyxMay/N82GaNjMmFKR9b8GUEuxT/DiTuWPJOe9WcvLzyJ11BaEbvoStpd9rUvy5uEed2+FrsOz2GS7oNcgPta87p6mCvFDP1KKzNC/esXPS2rW8chwH/B6N0aXp9ccKsrtfBFYJ5PPue5N6XjmOO9jtY0opOT6Fisb2k4/UwjFY1Xuwo80Ziqr7ZBKS+iXPyKn4z70stZgG8E5ciDZiJgl3PjlX/yfCSH1A67klhC7/CElcHR/Icbjx/NFMGJ6derym8N4rJ1Cam/qAnTgsm8vnDuf4CNn500uYMTav3eLsk9mOQsxXhjliIf7JC4nr2Zj+ErIv/w+EnopBzx9G8Lw7oGhi+msYOTPt2G5N4XN3ntuWiPwenf+6aybZ3p5Z72XZAs+5V+Eqn5RqUDWyzrsDOzQCAE0IPnbjFErzU/cTPC6NT982jfxg96eJJ70F5F77KYQr9XPWsosJXflxEqQnbKlrsr0qR5vaJyGhGagFo0nuWZWhqLpPOKf6a+vHamo6N6U2E/LzA/06Phg4MdbXNqHF6sBxsNw5mMeGGhVFoMcbIBnB8WSTEGfuMZgONLUmMDSFoEdPSwSNkSS245DtMzo9t/Dkn6MQYCQaIBk7FlPqw1YVNlq0Dmj/GjoSt2yaWhP43Dp+l3bKRNhZLieCYbaQwCBhZKM5CZRoPULVSbhysJ32rzRpOTS2JvC4VIIeHfssZ8gpikBPNEDi9L+ngfJ+PNsY8/M77q2fyso3VhGLpQ+JNkctHlrdwk9ua9/Dt6r3YO5dg++m+84iyt5xutc+8G8gSIOW5ShYrvy0dtt2iOsh0EOdOo5GElfDPpQdy1F82bjGziHuLW43Syt0lr0Ox4G4ns27i1VYjoLlTn8NHXGpCgXBVPJSFIED2N0sxeSOVNDwzM8wG6sQupvsSz+EVTaDpKfoWMDpz9FVQX4w1Zs72wR0/BhxLQRa6KyPJZ3gdymE4zaW7bQVMwVQ8kbibHoRq+EIanZpBiPsGjkcJw16ytGt1D56P63vvE7LyieofehbuKJVmQ6rQ5btsKuymQce3shvn9rK4fpIl5d9GU6Mxhd/g9mYeo1OMkb9c7/CiFT3QsRSX1MUgddQaI62v3EpFAW17ByS217PTGDdJJOQNKjpxGle/mi7NicRw6xMn7nWH+yubOH7f1nDhl01rHinkvt+t5LKhvSZa6cj4i0kaw6+q9XBapZJaLAIuAWN0fTF0+qwaSR3LcdJ9szMyr4gk5A0qAnAsdLLGDl2+h9wximCp97a267JdmDt9uoO1y2dimN4UQPplRgUX+hsI5T6Cb9LoTGSPoVT8Wah5pSR3Lk0A1F1j0xC0qCWFC6C825s36hq6MW9t6hXF0l04l2u/yYc0LX0J2lq1w6UVH1kX/FxhHZidltg3o1Y/uKuBST1Wz6XQkOk4wspdeQsEhuf658XWh2QExOkQc1xwCmdRu51n6V1w0so/hx80y8n7ivp8T17VEy02p00L3sEJxknMOd6nLJpmB3UrOs4VofrzxvNpt11bW2aqjBjfEGXJgo4DiRyxpJ79/dRI3VYug/LVyC3+BhEAm6FupaOCxWrOWWYbj/m7hXo4xb1cWRdJ5OQNOiZiguKpuO5ejoOgpjl9MqmcVrjAWof/1Hb1/Uv/Jqcq/8TSmd1+hjDC3zc9+G5LNtUgcelMf+cYgqzXF2ut+Y4EDdyyS8d0e+nP0tdF/K0L93zbtrYhcRXP442ei5C7d8XHzIJSUOGaUFvbVmqKIL4nrVp7eF1L+Avn0HS7tzItyIEw/J8jLx0PI7jYNuOLPgppcnxK6zce+otW9TccqxALol3XsI1/eo+jKzr5D0hacjQieMKH8LVegSdU19FdofjgOJNL0mj+rJwurG1hmXZPbJWRxqccrwqjVGL2Gm2HtEmXEBi47PYrel7VvUnMglJPS5pO1Q0RNlXFSbSwR483aU7MVzNBzDqd6XtzXMmXrMe9q0kvuU16h76FtE3/oBh9lydLcdxMEZMbytTA4Ci4p91LabdM9sxSNJxiiIoDGrsrz31xZTiz0EbNp3Y0r/2YWRdJ4fjpB4VTVr84+VdLNtUAUAo4OJr75tNrv/stqs2rDCRpX8juiNVKVj1h8i96ctE3UWnfZ4Q4G45RN3zv8JsqEQLFZJz4d00vPUw7pHbYNjcs4rrZHFvEXm330eycgckE+il44n7Bs7KdWlgKQlpbD8aZ0LxKeomAtqY+cSX/pXE7pUYY+b1YXSdJ3tCUo86WN3aloAAGlviPLJkF51d9q8oAkXt4G1Zt68tAUFqW4CWlU+gidP3tAyzmdp//xizoRIAs7GKhrceJjD9YuKHtqB2dK7TEAISlkNrwiJhtj+340DMXYA18jzs8RcT85V1ayhOkjpjeI7O5iOnX5QqVA1j2pXEl/8NO1x32sdmiuwJST3qaF365nLbDzQQMy3c2uk/8JujJsveqWDT7jrmTylmxrh8/C4VIcBsOJr2+MSRHfisOKZy6i0HnHAddqT9sJsdCyNUDVfZRKxT7dnQ0bGAXRUt/P7JzdQ2RVk0rZRbLhxNwJX+ZyQnE0i9rTRbo2aTRX2rRY7v1LspK6FitJGziL7yK7zXfQWh9K+PfdkTknpUeUF6tdyZEwpw66ffcjxu2vz0X+t4ZMludhxs4M/PbuXPz2/DIvWBruWWpz3HPXoGpnr6CtrC7QflXecWCmogB7V0UpeSRW1znB/+dTU1jVEcB97acIRHX9vd5dpuktQTVEUwpsBg1b4z74CrjZoDQhBf+VAfRNY1MglJPaos38ctF45pKzMzpiyL6xaNBMdBCHCZzRgNu3FFK1E5McW0qiHKoar2Wz+v215NfXOMysYoB5Ry3Fd+rm1vHqN4NN4ZV2M5p88ASXcuoQvubtcWWnQbYthM4lr6Rm6nU1HbyrsnrC3bWEFrfGCsTJcGn0klBkt3Rc647YcQAmPaVZj71pLYuayPouuc/tUvkwY8QxVcOXcYC6YUE09a5AbdHK9E4wofpu7xH2JHWwBBYN4NaFOuwBSuU9dGq9qJO1rHy5XZHGqCT7zn54ScJhxfHrFTbWR3EttRUEcvIq9oDHZLHUogB9NfTLwb1QN8nvQ/l9wsD/oZhhklqbeUhjQcB7YfTTDxNBMUAIThwZh1A/EV/0ANFqAW9V7pqq7IaBL64Q9/SENDAz/4wQ8yGcbgJ1JDSUfrIvg8Orr77FZQt8RMDteEURRBWZ4fn6v9cJcAClwJlOgB7MMtqIFs8GbRvOwR7GgLgXMvRcsqQM8uwj66GUVzMzJUxuSROWzZVw/A4skhrp+skWtWIdwO141o5puvaazb18qic0rbhtGEACNeh11/GKHqiJwy4mr79TqW0LH85di+MqqaYtTuD5MdcFGU7UFTBIqw8UQqSdYdQagqSt4Ionpu2usuy/MzZUwu7xwrq6MI+PD1k3Gpos/vAbnMJpyGwzi2hZpdRtyVI+9DDUFCCKYPc/HC5vAZkxCktl03pl1J9KX/xXv911CyCs/4nN6WsSS0YsUKnnjiCS644IJMhTAkCHFse4C/rmlb/Dh3chHvv3ICrm5cwde3Jvj2H1fR3JoAIC/LzdfeP5ssz4nEZlhhwq/8hviBzakGRSP34nvwjZuFkV9G7NBW9Oxial/4LXY0NQSnZuXzxeu+yNL9JVjRMHNjS4m9soTGY8fMXnwH188ezTt7alk8rRjTTE0ocLVWUPvwd3ASqXFxPbeMrOv+K7XBXLufg+DtbdX87snNbW03XTCGq+YNw9u4h+qn/7dtAoMayCH3pi8RdbX/A3XrCh+/fgqHa8O0Rk2GFQcJebQ+//B3J+po+PePMRtTkzUUt4/cW79BzHP66erS4DSpxMXbe2Psr00wIu/MSyHUgtE4Y+cTee7HeG/4JoonfZF1X8rIOEJjYyMPPPAAH/vYxzJx+iElbjr83783t1t9//aWoxypi3T5WKoqeG3dkbYEBFDbFGPN9hrEySWjGw6eSEAAtknzupcwm2txFY/CMU3iR/e2JSAAq6kG59BGFk4u5NIRcWKbl7Q7d+PKpxiTrzF9hA/l0FpcZiOq4hBe83RbAgJI1h3GqtieFntjJMmfn93aru3x13fT0NxK67Zl7WbQWS31xHev7nCI0K0rjCkOMm1UDuOGZff5nAQhIHnonbYEBGDHWolseJEuzjaXBglNEcwa4eaJ9Z1fwK0Nn45aPJ7ocz9u9/eTCRnpCX3zm9/kc5/7HJWVld16flf3au9r/Sm+ytpWahrT32TRuNnlOC3LZu+RprT2g1XN5OWNb/u6pSJ9mnaysQoUDYSKFsxt9yHadvy6QxTk+glXpa8Cd+IRHDPB+MRu6p5+BKNwJAU3/zetaZu3gd10NO211R+oJ2mmT8dujZkEG9J3WU3WHKQo15/W/m6Z+F3XrDqS1pas2U9+0EDR04dk+tP78VRkjOm8PhdaB1t7dGTBBBe/fqWaQy0wpfzUSxZO5sy+lObVz2G++nOK7vxGh++dvtDnSeiRRx6huLiY+fPn8/jjj3frGP25KnB+fqBfxacAsycWsnrbiQ9aISA/y93lOIWAC2aUsm1/fbv2OROL2h3LFUwfFvKOngGqhuLLJV6xi8D0S4hX7G73GNfImdTUtOAOliBUHcc6kYz0gmEUmocJr3sEgETVPmK1lXgnn0/Tm/9sdxy9dGLaa/O5VPKzPdQ0nEjIHpdGTpYH94ipxA5ta/d4z9jZZ/z5ZOp3bQyfButfbtfmnbSY+qYkjpNo197f3o8dGSoxdjWJRVrjxGKd3yF1wWgPv19SwzevzUPt5CaIztjFJDc8y6F//gDP5Z/utTVEp3vtfd6Bf+6551i2bBnXX389P//5z1myZAn3339/X4cxZCjAey4bz4zxBUCqjM6X3zubgqzO7XFzMseBKaNyuXHxaDRVwaWr3HXZeMaWth9TNv0l5F776WMFPQWeUdPxjp2FMWYecX8R2Zd8EC1URODcy0DREJpB1vl34hSkZuvE3XkU3vYVtGA+AK7S8QSmXkT4XcnGjrWij5mPf/plIBSE4SZ08fuxckamxe41NL58z0xGl6amZZfk+/jq+2YTcLtwjZqRikU9FsuCm6B4cpd/Pn3FzhtD6IL3IHQXKBqBWVejjph5xmm60uA2tkDHpQmWbE8fiTgVIRT0aVdBMkrs1d/idGHxdk8RTgbfuY8//jirVq3q8uy4/nzV1F+v6hxSs9oMTaG8OOusYhQCWmIWQoDfreF0UO1Z0wR6sgWRjIDqIqH5MR0VjTjKoXU0LX8cNbuYrFlXoPjziLoLOPn9n58foLm6CpGMgctHZNk/ad38+okYDDe5d32XuJGHKhy0RCMIlaSR1e7+V8y02XmoiTXbjzKyJItzx+WjAB5DQz9px1JDmGixOlA04u5crE4s/cnk71oIMJJN4DiYriysUxRJ7a/vx5MNlRi72hNa+caqLvWEAOpbLR5a3cw3rskn13/6BeIncyyTxOrHULJLcV/wIYTo2f7J6V67XCc0RAgg6O6ZX7fjgP/YtOx3JyAFG6NpP+F1zxNNxvHPuBIzbwyWk3q8Vr2Dmud/C4DZVEP1/k3kXvYhGFmQdp648ILhBQc8c25ADeQQ2fIWWv4wAvNvJO7KAwcsR2Adnw13UjxCgVfWHOLx1/cAsHRjJS+vOsTX3z+rXQICSDgaieOz4fpw7akiQE80gG1hubMxnc59cDgOJxbb9v3Fq9RP5fhUzh3m5q8rGvnsJTntJwydhlA1jFk3klj1KPFlf8O18J5OP/dsZTQJ3XTTTdx0002ZDEHqYUbzQWoe+g7HN4+L7d9E3k1fwsqbiKoqRLavSHtO6463CY6aTdQ59RBhXAshpl5H1jmXYasGMVs54/50zRGTp97a267taF0rFbURxhRn/ka4bkcxty2hduUTYFl4Jy3CO/cW4nrXKjlI0slmj3Dzz1XNLN0V4bxxvk4/T2gGxuybib/9EKx6FPfcW3sxyhPkpE6pxyiKILrrbd6dHcJrn0VTU3vuqIH0RaCpjd/O/FZ0HEgIN+YZdil1WS24mvZCMr3MDoDdT+6diNo9NC97BCwTcIhsfYvk7hWnrh4hSZ2gKoIrJvt4fF0LtS2n3n21I0J34ZpzC+beVcQ3PNtLEbYnk5DUozraz16oqQV0tu3gGT8XYZwoOip0F/5zLiDunN1+Q8d5YlU0PHwftQ99G/W1/+WyGe1n6uUEXJTmdf7qsLcoiiB+aEtae2TbMjSnZ3d9lYaevIDGrBFufv9WI1YXd+gVhhfXnFtJbn6ZxLbXeyfAk8h7QlKPsW0H99jZtKx5DuzUFZiaXQoL7iFqQjxhkXSXkXv3d2luimJYYXxeD/FAOSoWWrQGx0ri+POB1HCZEBBPJNGw8LoUSMZJaj4sR0EngWrFMTVvatKDsGhZ8ShWS6qsjnV0N5dN3UbJVbNZvrWWEUVBLp0zDJ/R+Ru2vcW2HfTc9A3vjKJRWEI741BjpigCdDOMo6gkFY8sFdSPzRzu5kBdmOc2hbl2eteGn4UngDHnVuIr/4XiyUIbcW4vRSmTkNQJ0aTFoepWmlrjlOT5Kc52o5zipmXcX0b+nfcR27mS1pxxrG7MpbBBYfOavby+7jB3XDqeHQfqeXtLFeUFfj5w7TBG2nESa56gYf3LgINRNArP9Z8hZvp5c/1hnlx6ALehcvf5RYw9/DQevw/3rOtYt7eFRNJkWLZKeUkutuoifmy9j1E0Ct+E+TjJGBcEjhCcUsDOWsgPutpK/mSanpWPnl9OsuYQAIo3iG/iAmL99IPdsJpJbH6FunUvongChC58L1bhZCwyn9SldEIILp/s4+9vNzGpxMXogq6NNij+HFyzbiT6xu/xej+PWjC6d+LM5BTt7urP0zkH23TTuGnz639vZtPu2ra2T906jZlj89pNhX43TVN4cfUhsoNuDlWFefLNPcycUEA8YbF574kdHnVN4UcfPAfl0S+0e35w7vUs1+fxu6faLyL92nWFFCz9MVr5Ofw9eQnLt9ajqQr33jqMUSNLaX3zL8QPbCZr9lU0vPGvtueJ4glYi/6DrOz296R0EgjHwlS9p309HTmb37WmKcRe/BlaVi6aPxvHscEyiR/dh+fK/yKZ7Jkpej31fhQC2Pxs6h7WSfLu+Bbx4PCzOvZg+5s53TG6ojtTtDuyuzrBmzsj3HtdPl6j63dgrKO7SG55JVVnLpDXrRj61WJVqf8RAlyJWvSjGzCq3sFlNrZ9r6Iu0i4BAfzxma2E46e/4elEm5hp7CHL5bDmWLWGMWWhdgkIIGnaVNSm17GLxhK8uOpwWvs7R23UYB7moc3MLk915E3L5t9rm7DjEQJzbyQ48wqaVrW/qepUbifPrmn7WhE2rrrttPz7ezT+62uI7S9jWJ1f5He2LMvBKBlHy7qXaHjzIRrfeoTG5U+gF43BsvpHT+1khh2hdeMrae1m1Z4MRCN1xZgCgxF5Bn9d3tStBc1q0VjUETOJvvA/OMmzT4rvJpOQhKu1grp/fpP6p/6Huid/SsOj38WVSCWeaAfJpjWaJGmephckLJJrH0dd9kc0M0JOMDX1Op608HWwVsnrS5+a7Q5mU5STvmtqnl/BjkcQLi8tiRNDgtXNJpZiEDXycY2Z0644apuTCjUazYeofeyHJKsPYLXU0/j637D2vt1nayMcx8EYPQstp6StTQsV4Bo7r8s9sr5gCx01mH4VrHjldPKB4PyxHg41JFm2u+uFiwG0UbMRvhCxt/7c45U5ZBIa4lQFIhtexImfeHNazbWYBzchBBTletM2bZszqZBABxu8tR0zUkvrptcACNa+w3nTS9A1hTfXH+Ha89uPK8+YUEBZfhDP+HltbVqoEP/wCVw7UcE46dy5QRcTvA048QjWjFt5ct2JYqqXzSjAJSy0/ctIHNmOd/yc9kEpGmp26gNfCEhU7iRtKvm659GdvqsoHDNyCd34VXJv+Sq5N3+F0M3fIObq3nBHb0uik3Xene22StdySlAKRmUwKqmzNFVw5Tk+HlvbQk0Xp21D6v6Sfs6lWNV7SO54s2dj69GjSQOOgk2yNr0StVlXgTZekOs3+MYH5vCX57ZxuCbMwqklXL9oZNvVS2r9j2g/DdSxEboLvXQC2uG1jDXcfOWu6VQ1W+QEdO77wAyONibwew2GFwZQDRVt8Qfxzbwax0ygBgtoXPIHsqv2862r7qIiEUB3exhZ4CLUehDljvvYFQmiKDvwe3SuWTiCKWPzaXz+pyQqdgEQWnQLiuGhdftK9Oxisi68h7inEJzUeiPVlV4hW/GFcPp4Zlpc9UPOuL474VmIh0aSf9d3sOqPIDQXIm8YcS2U6bCkTso/Nm37D0sb+eIVuaecXHQqQjMwpl9D/O2H0EomogTTq5x0h0xCQ5zpKHjPuZDE0X3t2l2jziVhpT6Ny3K9fOnumSQtG6+u4jgOmpNAqd5O6/oXEJ4A/hlXEg8Mx0EQceWza/aXeGVzEwUBlUvdCsXrfsvI0jE0NCg8Wj2a5dsaABheFOCzt59LlsfA9JcD4FFieEZNx1U8mqBSR3DHo5hN1fgX3Iw55Tocx2F4CD5581TW7ahhydrDPPHmXj5zxS0Ma/o1VmsjjUsfxTX8HArf+30Sqje1FfhJyUUrHofqD2GFG1MNQiFr0W3EnJ75k1AU0S+H1c6G4whi3hLwlpz5wVK/NHO4m701Lby6rZVLJ515q5J3U4L5aKPmEHvjD3iu+XKPDF/LJDTEOQ6ow2cQmFdPePWzoOlkLboNO7f9sJkmUjO6jo8Hi6NbqHv6Z23fj+5cTeE99+MoGq/uTPD7Z/YDsA1YuV3l+x/8GIknvsyBGZ9h+UnbShw42sLLqw9y6+LR6MlmRONBrNYGrOY6EnWHie5/h5zz76Bp1TMYZZOJHzt/fTjJd/+0CtM68UH/i5eq+PYF16Cv+hsAiSM7sVBJkr5PSszIJeeWb2BV78GJR9CLxhD3l5x1L0i3o4i6PcQPbEHPK0MtnZS2y6skZYoQgksm+XhodTMzhrnJ9Xc9BWgjZxFf/jeSu5ZjjFt41jHJJCSRUP0o024gd/KFgEJSD552lbUuTMKrnmrX5p98Hq3rnqdFzeLRNe23xY4nLfYdjTDel8We2vSZXxt31XLLecOILPs7wSmLad65itjhHRiFI8m96B4aVz5J3rWfbjcVuLEl3i4BAURiJmER4PhHvn/WlSS04CkTS8zIpT7kZ822SvZuaeT8aR7GlOVgqN27ulMEWDtep+mth9ra9PxhZN34JTCTgCChZ8kFnlJGHS9y+s9VzfznRTldfr5QFPTJF5NY9TD6yFmpLUXOgkxCEpAqPh1Xs058cQqqsNHsOHpeOYmqY0N4ioaWXUDjW48gplyF3tE+07obxZfNuGEhWFPT7lvnjs/HFasDt5f61/5Gsi61c2j88HbMhkr855yPpbnaLYrMDrjQVNEuEXndGtkBN3puKb6pF6GOmkPiNB/4LdEE3/rjKloiqTI5q7ZW88GrxnP+9LLTDqWZtsOh2lZe21hBbtDD6NIgPkNFTzRSt7z9Ro3ukjEk1j9H87oXEYpG1sKbUcacR1Lp+n5OktRTZg1389cVTWyvjDOhuOtJRM0uxQqVkNjyCq7pV59VLHJ2nNRp7lgViTf/QN2/volQVXIuugemX4+68B7MxmpAoOx6i9vntb+68nt0mqNJ3HNvobx+NedPPvH9USVBLp5ZhpOMogZy2xLQcVZrM4rbj+MOtWsP+TQ+ddv0ttlzHpfGZ28/l5yxUwncfC/2uEtIqKdfHHjoaFNbAjruoSV7iMdPvRZCCHh7ezXf+eMqHnx+O//z0Hr+99GNxEwb4Vg41omZR4rbh+IJ0Lz6WbBMnGSMxtf/jqjZddq4JKm3aapgwRgPj6xt7vaUa23sAhKbnscxE2d+8OmOc1bPloYMl9VCy9J/4hk2GSOnCKHq2Ik49aULSSgeppSUYhSNAqEyL6Qy7EPncLARKutjjBuWjd8l2HAkjO2ayNXnCK4cF8L2F9Jguth9uJF4bjYloVKyz7sN5/jNTsdGMTzouWWYhzbg9gYhq4S4KxfHEcwY7uUn/zGVxpYo2QE3oaBB0hKAQWfGvBw7faqq7YDhRDEaDmA1VaMG81B9WSSrDyA0g+bQeP7+wvZ2z9l5sJGK2lb8xdn4zlnctvmeq2QssQOb084R27MWvWRatxelChxckUqs2oMo3iwUlwez4SjC5UXkDj+xzxCQbKjCOLoLJxFFzS0j4S/BduS1pwTjCw1W74vxzpE4U8u63jNXAnmooWKSO5diTLqo23HIJCR1TmsdnvIJNLz+97YmV/FoChedSyJSRe3jP25bTa14/BRdcA+rjgbJysnhSE2Yx1/bTTia6nW4DZX73jOB/3tuHweqUuuThIAvXldK6cqnyD7vVhpX/Bs7miqTIjSDnAvuouaJnxJadAvusfNJGlkk1z2Ns/oZskjt65acfQ3quTdgoaGqAsfhtMNqwwt8+Dw6rdETvaGPXl4OO16ndsUTbW2+SQuxWpuIHdhM8ryPE++gpE4iaWM6Cp45N6LlFBPZuhS9YCR2pIl4ZfuqAlre6Yf7zsTVuIeaR+5HaAbZ591G7ZIHOX7jS88fRta1/0Vcy8JlNnH08R+TrD1WeUIo5N38JeI547t9bmnwEEIwa4SbFzaHu5WEIDWpKbnlVfSJF3Z7ppy8JJI6RdWNtFI48co9uJ0Inr2vtyvnYUfDmFV7mVpgkzRtqhuibQkIIJaw2FtPWwKCVMflwaUNKFMux2yqaUtAAI6ZIHpoK0bRSJpWP4fTcATDihA7uLVdPC2rn0VEGzlY28rfX9nNU8v3U9McP+UfR1bQx7fuGsc1swuYNiaHz1xVyuxhGs0rn2z3uNaty/AMnwyA+8AyFkxuP/HC49IozvOmfiZaFky6guDN96JMvx7PtMtQ3CemwqrBPIwR53Z7CEQnSdNb/wLHxjdhLs3rX+LkmRfJmoM4dal1X07t/hMJCMCxaXrjbxj0fOkVaWAaV2hwtMmisrF724coecNxElHs2gPdjkH2hKROcTRXx6VwLAu7uTqt2W5twpVtY5o2TeH0D71IIr03UdcSx/blYdVsTfue1dKA6gmS0INsihbywhP7Cbhu4NILVLLX/gm7pRYUlV0NCt/569ttz3tm6T6++9H55AXSb75ajkpeQQF3LXah2Aks1QORBnDSh8kcOxWvdXATd1x7A/k5o3hzQwUjioPcdtFYQl69bQTQth0SpBa92t4icu/4NlbDYYRQETllxE4aLusqYSfbtqpQPUGscEN6rIlUcrfj6bXwrJZ6hJ0E5exmNEmDg6oIJhUbLN8T5eaZ6XuBnYkQArV0EsmdS1HzR3QrBtkTkjol6QrhnTCvfaOi0eJ4cMaen/Z4o3g0BxoVDF1hTFko7fvD8728ewPRS6bmoOxYgj5qRtrjPSOnEq/ez8GRN/CDf25hw65a3tpcy7f+XU3T9LsB0Ceex6Nvtq/+kDBtNu2pO2VvyHYEMS1ExCggrgbAl4cWar8RnuoPYcdPlPPJ8Thcv2gkP/v8BXzypnMoyHKd8haU40DMyCFZOJVEweR292u6I6l68U+/FIDI3g343v07EUpqDydAzS0H2r9u37RLSGpdX6QoDV7jigzWHoh1+/lqyQTMvatTleC7QSYh6YwURWCj4VtwO/5plyB0N0bBSHJu+SoHIj7qguMInP8eFG8QNZBDaPGdtGSNIa98OGPKsqioDXPnZePJCbrJ8ht84JIyCo8u5au3j6eswI/HpXHtgjIuya/BHLWQh7Z7sRZ8EDWQg+IN4ll0F1YsgmvmdTyxqn0Vbst22FKn4520iMCcG0h0cL8m2YX9g+KKl+zrP49nzEyEZuAeMZXcyz9C67YVaFn55F73WcysYTi2QyjgQvTxmh/HAW3cIoLzb8JqqUMLFRGceSXC8KDnlZJ385dIBFIVDZL+Uopu+wp6TinC5SUw9zpcky/G7n9FuqUMKgioxJMO1c1drykHoPhzwXBjdbOiutxPqIcNpr1RhIDq5jgHD1WTo0bIzc3Cm5OPjwi24iKJgRCCmGmjCPA6YXRhERU+Eo6GrgiEACNShV13gKg7H+HPw+d1oyQjoAgijoeopeJ3a2hmC1srk3zvr+sIeHUunZqNrgre2hHmG3efg9+j852/bWbnocZ2cb73ivFcOqOIpK2y5WAjP/3HurbvKYrgux+ZT1GoazdeNWGhmhFszYOJhmGFcUT73UQz+btWlOM7nGpYihstGcZRdRLvqg6Rnx+gqboWYSdJ6v5+mYAG09/MmY7RFT21n1BnPP9OmFkjPCwa6+3W85M7liJcXtwL3tPh90/32uU9IemUGlqTNBw+wMS9j2FV7EAYblj4Huxx80g6qfFjx3FwHaswkCTA8dub+rFRIHdrBTUPfxsnkeruC83Af9vXifiHpR4gwKOAZdlYwkfcagSgJZLk8ZWpe02KgDguXI7GzReO4ft/XdMWo6EpTB6VR9JOLWQdX5bFF++eyXPL9+P36lw1fwTF2e4uVykwHRVTDbTd848rx4aw+sklm22fFJMNlnrqIbaEcIHqSk0hlKQOFGZp7K1JdDsJqcXjSKx9Emf+XV2eJSeTkHRK4XCE0sMvYVXsAMBJxIi+9gc8hcOgE7tpKoogtmNZWwKC1Ey3yKZXMRZ/uMPdQ0N+F163RiR2Ymhg/pSStjsbo4sD3Puhuby54QhBr8H8KcUUZLnbZptpimBCWRaT7pyOQGBZtiyTI0lnkO9XWbWv+/eFRCAfALvuAGreiC49VyahIcbBYc/hJiprW8gOuMkLuNImCByXYySx9q9La3caj3Y6CcWba9LazZY6/NEqTD0vLUHYtsNdl01gw64aKmtbmTYuH1UIzGPrahQhGJ7v4/1XTMBxHGzb6XC6s2059JtuiyT1c9k+lZpw9+4JwbFZcsXjSO5Z3eUkJCcmDCFCwLrd9Xzuf17nhw+u5cu/WsbyLUdxTvFh7fb50PLL09qTRudmV5mmjXfsnLR2z+gZtKx9Dr2D7bTzs9wsWXuIlkiCkSVB3t5ciWlZhLztp49alt3hgk9FOLjitbia9+OymulupXlFpKoS6BXrMOp3YNjd25FSkgYCnyGIJR0Sp9kx+UzU4vGYe1Z2eQ2c7AkNIY2RJP/373fa9T7+/OxWpo7JBQSqIvC7VOKmTVVjhCCt5C++m/onftxWH0qMnMVr+1QuzmnGpavEFX/6m05AU9TEshxyCyaTfeF7aNmwBBwb/znno/lDNLz+D3znXIDhU0lwYtKAS1P4zK3T2Hqgge3767nnigmMK8uiM7lExULsX0ntK38Cy0TxBsm94QvEjt9/6iQhQK/ZTu0TP25bM+QePQPfBR8iofq6dKyBzrQdwnETr0vDOFWXWRrwhBD4XQotMatb2zsAiGAhCIFdvQe1cEynnyeT0BASjibTpitfe95oHn9jL0s3HMHQVT5+01Sam8Kco+1DW/sw1Qs/TNN5/40ercXWPCT8RaxZXsls/XUCTgvu0TNJ5ozBOvZWipk2z688wPMr9gOCS+cO44JpCyk7Lw+ScRwzTuOyx9AC2US3vEns8HZCl3+MuL+sLTkG3BrXLBrFwsmFxBIWjpP6IznTFZYWqaL2xd+1fW1Hmml49peEbruXuOj8DVfDitDw8u/bLVqN7VmH79zLYQiVvKluivOrxzdxsKqF4lwvH795GuW5HnmPbZBy64JIwiG3m89PDclNJLlzmUxCUsey/S6CPoPm1lSvpiDbQzxh8ub6VOXqWMKiuiHKWFcdytI/IkbM4O/rbdbuPnTsCFGEqOe/7piGuuIftDTXoPpCaIYPy1+OELBxTx1PLz2+S6vD88v3kx1w4R1Zhnj1fqzWhlQP5bIPUvfKX7DDDdT/+ydk3/Gd1GLRYyKxJOt31/Gvl3di2TY3XziG6aNzO94m4hi7pTatzWyqglgLeDqfhISdwGqpT2t3Yh1UjBikokmbH/19LfXNqZvVlXURfvDX1Xz/4wsIuOTHxmCkq4L4WQzHAahlk4kvexDX/DsRmtGp58h7QkOIz6Xy3++ZSX7IA8CsCYWs39l+4oBl2wTN1AdwvHgqa3c3tvu+40C8NYx1bMJBvGIXIp76cFZUhbc2tN+KAWDHgQYakjqB6RcRWngz/snnYdYfxY6k1l1YrY3Q2n4R6ta9dTzwr/VU1rVS3RDl14+/w47DTad9fYo/fYMuNZCL4+pahQBTD+AZO6t9o1BQQ8VdOs5A1tASa0tAx0ViJrVN3Z9BJfVvqhBpG0V2leLNQgkWYB7Y0PnnnNUZpQHFcaAs18NPPnM+P/rkQq5dNILhRSd6H7lZbjRVwTo28UBrrSE/25N2nIB1IhlooUIcNXVl7NgOozso0VOY48WxHRqXPUbjsseI7FqDnYyBY2MUDEfLKQHXiXstqqrw1sb0ZPbSqoOop9n11PQWErrwHhCpt7UwPORc9QmSXbyPYzoq/kV34BkzMxWPP5u86z9Pwld0hmcOHl63jvaun7UQEPB0vb6YNECInplPqpZOJrn9jU4/XvarhxjHgZygGyueWlZ60wVj2HmwkY9ekE1x63ZcTVvQxlxPvHQy9vZX+Y/Fn+OHT1W0bfc9b0I2BQ2bAFADORiFI3ACqR6CbTucP72Ut9YfofFY0dK8kJvJo3Lxe1SyL7wbIQSKN4uWDa+Sc8FdxI7sRPVno1iJtvs+juOQm5We/PI6aDuZJXTUsReQX3YOdqwF4c8lbuR0q2J1TM/FffHH8Z/Xgq0aJFTfkLoXEvLpvP/qSfz+qS1tbbdfMo4cvyx8Olg5jnPK5RpdoRaPI7nlVezWBhRf9hkfL5PQEFeY5eJ/PjiOlse/gx1pJgEkdiwn77avYwM+xcOPPjaHisYkPrfGsJDA0yBgzATUrAJMTx4JJXW/RQjI82t87s5zqaqPgBAEvTq7DzUyNuSl4bW/AeAZMY3A1MXUPvebtjjCm14j785vEfMUY9sOC6eVsHRjBVec4yfLZbOrDhbNHoZ1huECCxXLUwieY9stnEXmMB0NUz/2RzSEEhAADsybWMCo0ixqG6PkBNwUZrt75ENK6p+SFhja2f+ChaqjFo0luXslrmlXnvHxMgkNcY4DWtNh7EgzkNqozjNmBslomB2xfP748kFGlgS5afEY8oNG6p5Q4XQgNROuoSmOx50k4NKor6tFj9bxg8eqicbbL3ybVj6Z4LF/C5eL5jXPt4/DTJA8sg0xthjHgVGFPn5wlU7zK7/CjrUyrmAE2e6PEyW9BpymKdi2c1YbxUnpFCEoynJTlNW9Dc+kgSWWtPEaPXOHRi0ej7l3lUxCUicd6y14x81B9fhpXPoYODalecN438K7+NHTFWzeU8f9H5uP/9jMqJrmOD/+x1pqG2NoqsLdV05giqeKRGtLWgICaDWVE0lIKB2WfXdOqq6ZqDlI4zP/y/EuSLJ6P40v/x7/Nf9NktSsG92OIqp2ENn6JlpeOe5x84l5hs59G0nqSZGEg9/VM0lIyRuOve4pnFgY4T79xCA5MUFCyR2G4vLiKhlDy8YlbetjrNqDlFe/xfjyIOFoksq6VNUA04ZfP/EOtY2pmVKmZfPnZ7YSVXz46ncwoTzY7vguXaUoP4ucaz6F4vYT2buBwJQL2gehahilk9pGz5INR3n3GFiiYhdqPDWjTlEE9p4V1D39P0T3rKPl7Sepe/S7uBPtZ9lJknRmsWSqxqLX6JnxVqGoKLnlmEd3nPGxsickkXDnkXv7vSR3r0r/5pF3mDpsPjsONaNrqWuW1rjJ/srmtIfWWz6CB9byobnTeMQTYvWuRoYV+vnwdeeQ5XVhemeRc9dYsBJgeMkNFtK68WUUXwjf1EuI+0va8o7qTd/8TQ3kYOupoSE92UL98kfbfd+OhrHrDkFxd5fbpVNVBSEEpplebFWSBovGiE1eQO1yBezTUQIF2LUHYcTM0z5OJqFBpKOqAqeqNKAoJ4bEHAfinkKMwlHpBy0Yy86jCSaOyKYkz4eigNetMmFYiEjCpKE5TjiaZO74bCZmRQje/EWSjVV88jwfrfMCePOLsXVvWwxxLYhiiNT9m4Jz8Fw5BccRxCy7LQEJIdDyyvBPv5TwhpePBayRfflHU2VzHNqmkwrDjVB17OjxvV4chBC4zQaspmoUt5+kNx+LM08tVhTRdl9JCIErfJjo5iVYLXV4p1yMXTAOU7SfHXb8b3YozZyTBp/qFpOy7J5NB8ITwG5NX/T9bhlJQr/4xS94/vnUjenFixfzxS9+MRNhDBrueC3Jim04kRZc5ZNIBMvRky1YR3diNR7FVToBK3s4cQwO14R5/u195PsVxuarOAhiejZ7K5ooz8qheOJ5RLe9BaTWxzjTrmNWnYeSPA/Jyp34YkcI5Jbz1Ys04kcP4QQL0PNGElvxMJEnNhHz+PEvfi+rqnwI4TBBayAv2ErcU4hmRRF1e0kc3Y2eU4ZSOJY4QY5nH1eyAatyB3bjUSJZeWihfPJv+HyqbE9WEXF3/onhOi1A/jWfJH5kJ04yjpaVT3jbStS8csTR9UQPbEb1h0jGWkHR8Ey+gKjecQ8pbtrsrwqz53Aj5YUBRpcEybGqqX3oW20186J7N5Bz9X8iymZhJBqxju6k7p0KjNxSHARKdilxb0m3poNLUqYdbbKYUtrD0+9VDY79/ZxOnyeh5cuXs3TpUp544gmEEHz4wx/m5Zdf5tJLL+3rUAYFV6KWuke+3Ta7DSD/pi/Qsv4VhGOTqDtC84rHyb7io6yLjeXXj29qe1xJrocvzEuwP15Awgjx19cquHTGZYy6bCGKlaBOy+GXjx+kKRzHtBwmjwjxH+VVGBtfxTNiKpE1z+Mun0hyv5fEgdRx7WiY5hd+Rd7Cz/Odp2tT55gToXiSRmzbUppXPnEi9uHn4L/skySEB5fZROOTP8asr2j7fmjRLdS//EeCC2/BKprW7gPeiFZR++yvsdtK6QgKbvkS8Z1v07TskRPnKBmLnluCuX8D7uHTiRntE5EDPLfywEmlhmDmhAI+Ot9oS0DHNS9/jJybxtLw/M9IHj3x+Kw519K6/Amyr/wEMV9pF357kpR5juNwsD7JLTO7tvPrGSWiCNeZF4r3+cSE/Px8vvzlL2MYBrquM3r0aCoqKs78RKlDVtWedgkIwKyvxFU4HDsZwztmBqHzbqW+oZkHn9/W7nEVdVEqnVxGuRupbYyyaFoJ//fsbtbVB1jVkMP9D++irinWVspjy/5G6oITSNYeQfWmJh+4yycQ3bM+La4su77tHBWiEJqraH77yXaPiR/YjGiqBMCpP9QuAQE0r3kB36SFtKx6Gt2JtrULAckj209KQAAOTSufIFG1r90x4hW70LOLsBMRrOq9aXE2tCZ4dln756zdXk2VmT6jRwiB01jZLgEBNK9/Cd+4WcR2rkCRC2mkAaa6xUJToCirZ/skdksNSk76VjDv1uc9obFjx7b9e//+/Tz//PP885//7NIxurpXe1/ry/ia9rXfg947bg7RveuJHdwKQPzITvS8ctQ57yGWqEx7ftIWqIpFNG6iH1tvI0htqd1RHamEnfqQPT6d2myuR88uIvmuBBJXfEBL2zkcK9muKvVxmmIRzA/QUp1+499OxFB0A8Xw4A94Ud0nrqoatkfTHx8NowXSh9wcx0boLoSVSPvdNMUa6Gh5ka15ELobJ3miVlro/NsQooOp5WYSoek44UZyc7tWp64v9Pe/F5AxdsTrc6H1wOLRM1m+t4nzJgTJzu65bUocy6S6dj8F134MPfv0P7eMTUzYtWsXH/3oR/niF7/IiBEjuvTcmpqWMz8oQ/LzA30an7tgVKpW2rEPeKNwOI1vPdLuMcnaQ+ToCS6fN5xnl+1va3cZKsVGmGorm+HFQTbvrWfSyBw27a7l3HH5TB2Tx6bdJypTZ/kNCp1ahO5um0XTun0FORfeTd2rfwU7tT5IlE9n1VEX0IJLVynWW1CCw3GPmEps/4nhQNWXhektoKamBXewGKG1HwLzT15IZPc6gotup77FhpYTP1d3yURAcPI07sCsq4gePFFmBkDxBjFyy2lc+W+yLv5Q2u/GZ6hMHJHDtv0nbqDmZbnJy8ki+/ZvEtu+DCtcj2fS+Zi5o9GSLQiXFyd+YpM779hZRPdvJjDvpn733uzr92N3DJUYu5rEIq1xYrH4mR94FpKWw8aDEb56dR6NjT23caN56B1EqIRG0wc1Lad97RlJQmvXruXTn/40X/3qV7n66qszEcKgkfCXkn/b12he8Th2uAE9f0SHj9PcPkaVhrjjkrG8sb6C0lwXN8wM4dJMNHLZtbGS6voI77t8NKt31tMUjnPD4lGUFfhZu72asWVBrpkgCBxYgv+6TxGrOoAWKkTNH4GZPYzsi+5B0XQcd5BdkSCrXq5k1thsbpyZxfBsh5i7kMCFH8BzeDNWUxWOZeOevJiYlpqKHfcUknf7N2hZ+QRmfQX+iQtQc0pwT1qMmZW+KV0iUEb+rV+lecVj2NEW/LOuhvJz8ZVMQgvmE92xEqNoFP5zzie8822yFt9N3FeS/nNRBB+94RxeXXOIVVurmDwqh6vnj8SjK8T0EpRZt6EpgsSxfZgsVy55t36d8Kp/k6w+gGf0uWiBXJRQMWb2yJ77xUpSH9h8JM7ofIOCQM+lAicZx9yxFM8Vn+nU44XTx9N5KisrufHGG3nggQeYP39+t47Rn6+aMnVVpwsLHBOESvTNPxLdtrzte67yibgv/hh7d+4jqCdxsssxhUFlU5KSXC/FNW9jBQpwkkka/CN54IndjCnLoqo+QlGuh4tnlTOywINqx1EAE5WYbeByooh4C+Gl/0CoOv4ZV2Bmj8IgTtTR0LFBqCRs9diU50O0rHgMs/Yw3nPOw5iwmLjWfj2QptgodpKs/Dzq6sJnLMWTet0WpjDaZs4pCmhWDFt1IewECI2ko572OEIIEpaNoSqdmuGmCZugVyEcNXEcSDr9c7XDUOll9LZM9IRWvrGqV3tCScvhz8ua+M+LshmR17m9f87EcRyS655EyS7Bveh9be39qif0hz/8gXg8zg9+8IO2tjvuuIM777yzr0MZVFIfsio44J1/B+7hU4nv34irbCJa+TlEt71O7kkz0/S5d/HY6iB3z/HgWfm3tvaDc7/MkZowR2pSN/23H2jgSE2ED1wziWjcIj/oJuDRETgkhAfcHjyXfzY11dtywAbzWH23BLSNlrliNdQ+/B2cZOqPqnn54/haGtAX3IPlnJgfY9oKkJoq2placCe/7uNsGxLCDTapY3XiMstxHHTlzLu3tsXpKKjeAInW/v3hKUmnsvZAjNEFeo8mIHP7GzhmAtf8uzr9vD5PQl//+tf5+te/3tenHVISWhAxfB7GyAVYto3aWkH4pAQEoKx9lA9e9FXyWk/MGFPKziEnP5c7LvWzcVct2/bXc87oXIYVBvjab5anynq4Nb783tmU5ZzYVkGYMdTmStR4BBEqJOHKT/swtxqOtCWg41o3v0H+rOuw9DOXe5ckqec0RizWH4zx9WvyeuR4juNg7ngLu/4Q3mu/ilA7v+9U/xxDkM6a44BlHauI8O5tqYfPpKL0YtbujVPgKWTqoo9htFbzZM1wXnvwHQAWTi3hsrnDKcj28PArO7ni3HzKczRW7I7wq0c3ct+H5mCoCrodIbbs70S2LQNAaAZ5t3yVWHBEu1N2tNWvMNw4yumHySRJ6lmO4/DqtgiXT/aR5z/7FOBYJsl3XsKJteC55stnLFj6bjIJDQEikI8wPDiJKIo3yOb8y/jtMyd2Ln3W7+KTNyxgyasnZq4t3VjBh6+bTH7IxXevz8a78V9YByqZMmo2+yZfSCRuYXgVRMOhtgQEqS0ZGl/9I8Ebv0aCEyuwRXYZev5wkjUH2tpC599JUs+iwznSkiT1ivWH4jjAZZPPfjmBHWkiuf4pRFYR3uu+gtC6XnVBJqFBKGbaNLUm8Ht0Am6NhJFDwY3/Rd1Lv8ccMZ9/La9p9/im1jiKcPjCFXkkHZVn32lld0WY9Ttq+MxluTQ8979Yx6Zf23veZrQZw3vOp3AAJ9IIgNDd+KcsRvX4cSwT3YmTOKnOWlwNkHXN57CrdmM1V6MXj8UKDcdxwGU2QTwM7iAJLdAnddiEELTETFpjSbJ8Bh5dkfXfpEGvusVk1d4oX74qD/UsF1abFdtIblmCMf1qjKlXdLv4qUxCg4SiCDQVGsIJvvfXNRyti+J1a3zi+vFMGZFFvL4Sd/lEEuWTSC6rZN64LK4Yr+CzmsnKy0WpWELruudBUfnUlCv5d85oirMFHN3etv7nOPPARlzJJmJGLpo/G6G7yLngLhpXPonVUodwedHzh6GUnIt90qSDuB6CslkIIYg7DkI4GDVbqX/ul9ixMKovRM41nyYe6qCQag/bdqiRXz66kdaYSU7QzefvPJfSHI9MRNKgFU/aPLspzB1zghQGu//R78QjJLcuwQ7X4r3y86gFZ/f3KvcTGgTciTqMyvVEX/pfjNd/xrcu05k9JkgkZvL/HtlKbVUtdrie8KbXsJc/yPsvH80dpfvJXvYzjJV/IvrMT1CwMIpGgW3Bxme4aqzNucF6SKZPEVU8fhw1dY/HcfvJu/JjNL79FFZLai8fJx6h9plfYkRr054LtE1acMXrqXvqgbbyO1ZrI3VPPYBhpm8T0ZMaIkn+3z/X0RpLJdf65hj/75/riSbTqyFI0mDgOA4vbmllcqmLuaO83T6GeegdYm/+CSWnFN/N3znrBAQyCQ14LqsZ68gWap7+XyK71hA7sJnYi7/gvdMsXIaKbTscrarDXTIGANXlZlwogdj4TLvjtKx/Bd/YWW1f58YO4t7wCMm6I7jLJ7V7bOiiD5DQUrXjLE8udiKK1fyuhOPY2C3th/3ezYpHsKZdhzJ2IRyboGBHW6C1oVs/i86qbYqmlSSqb47R1Hrmir+SNBCt2BvFtOHO2en7dHWG3VxNYuW/sA6/g/eqL+Cef1eHk426Qw7HDXTN1STrjqR6MCdRt77I7LHXs3RLHSHdxIq0knv5f9Cy6TWURBjn3YtnHLvdltua4cKOhQlveYvAjMsJzLoKKxZBCRVhBk5sWZDEhbdoDIrHjx1tPwtPeEOnDLslZvL0qijLNgUpzy/k7sXnkbPq19iJGLh7t0ZXyOdCiPZ7APncGn5356eVStJAseNonO2VCb52dR6a2rX7Nk4iSnLXMqyKHbhm3Yg+8UKE0rN9F9kTGugU0eGbQqgapuVw13mFFKhNWJEwDUsfxSgYTjAvH9UXavd41Z+NHU8VBXWPnIrZUp+qR6douIZPIV4whWT5HOKBYVjvunaJugrJufLjoJxozzr/TkxvQYchO8CDL+7gldWHicZNdh5u4XtPVdE67TayL/8ICVfO2f1MziA34OK9V05s+1pVBJ+4eRoBj7wmkwaXyiaT13ZE+M+Lcgh6Or8cwrEtzL1riL3xBxRPEN/t38eYfHGPJyCQPaGBz1+InlOSVvzTM+s6LrFLyPGAT+jUPHw/en45enYhomIzOVf8B7GDW2lZ/Sx6yViyFt+DlYhTMHJ6av2OmcQ1agYE8km48k5bScBxHBJ5k8i/5wfYLTUIb6ow6buT1XHNkSRrtlW1a4snLeoCkykszen1yQGKgPOmFDNpRA6NrXHysjxke7VOVWiQpIGiOWrx9IYw71sQojync718x3Gwj+4kueNNlKxivNd9FTW7d/fIkklogIurfjzlU8i/7tNE923CTsZxRs7hB6+E2X54A6oi+NptYygMFeKftIiG1//R9lzfxAXkXvlRFE+QRNYwbBvM05zrdBwEMVceuM68AlvTFLxujUis/dncbne72XS9SRGQH3SRH+zh3SRP4VTbrEtSb4gnbf69Pszl5/iYXu7u1HOs+sOY298Ex8Z9/gfRys7p5ShT5HDcIBBVQ8Tyz0FfcA9VE+/gP/5SxfbDrQBYtsPvXzmCb/4tNL39VLvntW5bjh1povapn6HHzrwXfE8JuDXed1X7yQ5Tx+RRnNu9WTv9mebEcdXvRGx+Bv3ImtSaKEnqRZbt8MymViaWuLh00pn3CLLD9STWPklyw7PoUy7De/N3+iwBgewJDRqOA8mkRVV9+p4gR+siCFcRdqw1/XmWmRrGS0TAaH8vRgiIWw6m5eA1lE4VAu0M23aYMTaXb//HPI7UtBIKGAwr8OPSBtc1kaKAs2MZta/9ta3NKB5D4OrPklD63+Z30sDnOA5LtkfwuRTunBM87QJSJx4huXs5VsU2jKlX4rny8z02460rZBIaZIrz0q98Fk4twcgrxSgcRaLqpC2uFQ2hqGihQvC2T0C247DzcDN/fGYLjS0JLpszjCvnDcfn6plab6oQlOV6KRuEvZ/j9HgjdUsfateWqNwNDRWQOy5DUUmD2ZoDMerCFl+6MveUFREcy8Tcvw5z7yr00XNx3/YDFE+wjyM9YXBdeg5AqipQOzlt8lSPVRSBqqZ+lQVZLv7rrhlk+VNXNHMmFXLzhWOIKgGyrvgYrvLUrDA1mEfORXcTPbyTnGs/S1xpnwyONsb4+4s70DUV07J5bsV+Xll7CNHFUh+pfXocTNuhm1U9BixhWzjJ9LVHJ08gkaSesqsqwcZDcT59cQ5uPf2j3XEcrModxN/8I05rPb7rv4F70XszmoBA9oQyqrIxyrJNldi2w8KpJZTkeOjoc9oBKuqjLNtUgSIEC6cWU5yd2kqhujnOis2VtESSnDethPI8H+cMz+b+j84naTn43VrblUbMVYDvys+TL6IkLQfbsvCOmk+U9jfnFWwKkkf48rjtOEKhKX86v3yjmVdXH+LyOcNwd3LYLG7arNlRwxNv7MGlq9x1+XgmlGehDpFsZLpDeCfMJ7L9xAaDwvCgZheTzGBc0uBT1WzyyrZWPntJDtm+9NEKu7mG5NZXwUrivuA/0EondXCUzJBJKEMqGqJ883cr26YFv/j2Ae778DzKOxieOlIX4d7fr2ybuvzC2wf49n/MQ9cUvvl/K9q2nl6y5hBffd9sxhQH8Ogqng5mZSbRMfJyaKppOWU/2GjaT9O/v8fxm0BBsYRPLv4cf1qVQFc7l4CEgM376vnD01va2n76j3Xc+8G5DC84883SwcB0VLzzb0MNFRDdugy9YDj+eTcRd+X22P01SWqN2zy1Mczd87LSNqhzkvHUYtMj244tNr0A0c+2T5HDcRmgqgpvbqhoty7FcVKJSHtXL0PTFF54+0C7tTO27bBqaxXv7KlrS0DHPf76bpyz6GioqiC84SXafUo6NqHq9Xz4usl0dsG1jeCFlQfS2tfurEY5y+q9A0lcD6FMv4Gs27+N+5KPE/OWyCKpUo9JzYQLc94YD7NGeNp/r3IH8Tf+iNBcJy027V8JCGRPKGNMK71YpmnaaR9QjpNqfzfLdrA6WFxpWvZZXmULsNIHi9yaQzDH27ZR3pkoAvKyPew50n5Kck7APeQ+hFPbjXuObTcuST3n9R0Rsn0q104/UerKiYVJbnkFu7UR92X/iVbUvyfByJ5QBliWzeJzS9Nu1F8+b3jah7xl2Vwxb3jaMeZMLGTq6PQZMDcsHt3hfaWuxOabfvm7WgW+yed3OgEBOLbDdYtGoZ00fBf0GUwbc/rqC5Ikdc7WijhHGk0+tCiEcuzDxKrcQXzpX1AKxuC75Tv9PgEBCGcAfiLU1LRkOoRTys8PdCo+BzhU28pzK/ZjWQ5XLRjB8Hw/HY1U2Q4cqAnz/Ir9KEJw1YIRlOf5EKTuLb2wcj8tkSRXzhvBqOIA2hmGu84Uo4qJ1rD32P5CGv4ZV5AIjexyNQMhoKY5zt6KZnRNYWRxkJC3c+VDOvtzzCQZY88YKjHm53etMO/KN1YRi6VvpQJQF7Z4eE0z/315LqXZOo6VJLnlVeyGCjwXfRS1cMxZxdrTTvfaZRLqYV19sx6fWt2ZXsapHpuati063VPpbIyaKnAAy+r7t8hQ+WDqbTLGntGfklDScvjnqmaumOzjvHG+VMWDdU+i5o/Cff77EXrnyvT0pdO9dnlPKMO6MsR1qsemkkTPJ4p377kjSVLmLd0VpTxbZ9FYL1b1XhIbn8M1+5bUzLcBuPxBJiFJkqQB4lB9kj01Ce67Lh/r0DuYO5fiuezTaMXjMx1at8kkJEmSNAAkLYeXt7Zy97wsjCNrMQ9uwHvd11BCRZkO7azIJCRJkjQArN4XY0SeweTkZsxDm/Be93UUf+9uANkX5BRtSZKkfq4pYrHxcIxby49i7l+H99qvDIoEBDIJSZIk9XvL9kS5aKSDb++reK76PIo/N9Mh9RiZhCRJkvqx2haTQ/VJFrc8i/v8D6DmlGc6pB4lk5AkSVI/tvpAjAtzjuAdcQ76yFmZDqfHySQkSZLUT7XEbPZVx1kkNuCed0emw+kVMglJkiT1U5uPxDnXc4TQ/JsQhufMTxiAZBKSJEnqhxwcth2JMD9wGG3MvEyH02tkEpIkSeqHasM2ihVn7OyFCDF4P6oH7yuTJEkawA7WRJmiH0AfxL0gkElIkiSpX6qoj3FOmQ+huzIdSq+SSUiSJKkfqo0qjDtn4BYm7ayMJKGnn36aq666issuu4y///3vmQhBkiSpX8tWwviGTc50GL2uzwuYVlVV8cADD/D4449jGAZ33HEHc+fOZcyY/rUToCRJUiYVuaIItz/TYfS6Pu8JLV++nHnz5hEKhfB6vVx++eW88MILfR2GJElSv1YUHBqbHPR5EqquriY/P7/t64KCAqqqqvo6DEmSpH4tNzT4e0GQgeE427bbbUHrOE6Xt6Tt6l7tfa2/xwcyxp4iY+wZMsZ0paV5A+Lncrb6PAkVFRWxZs2atq9ramooKCjo0jFqalp6Oqwek58f6NfxgYyxp8gYe8ZQibGrCUVzefr9z6WzTvfa+3w4bsGCBaxYsYL6+nqi0SgvvfQS559/fl+HIUmS1K/5s0KZDqFP9HlPqLCwkM997nO8973vJZlMcssttzB16tS+DkOSJKlfM/wBnEwH0QcyMv3i2muv5dprr83EqSVJkgYG1WAoZCFZMUGSJKkf6uqErYFKJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY7RMB9AdiiIyHcJp9ff4QMbYU2SMPUPGOHQJx3GcTAchSZIkDU1yOE6SJEnKGJmEJEmSpIz5/+3dfUxT1x/H8XdHwYc4o2YiCxqIj9ElMjIzrZph0VTKpRSND+BSVOYDxojTPxAXo5nBiMYI4rbMROOzMZqABiLowiRTMRAWlcQsmcEBoozh3FZB0pb2/P4w9if57UEz/d1b/L7+u6cHzufeE/j2XMq5UoSEEELoRoqQEEII3UgREkIIoRspQkIIIXQjRUgIIYRupAgJIYTQjRQhIYQQugmpIlRUVMT+/fuDx263m1WrVmG32/n444/p6OjQMR2UlZWRnJyMzWbj5MmTumZ5XmdnJykpKbS2tgJQU1ODw+HAZrNRWFioczr44osv0DQNTdPYvXs3YLyM+/btIzk5GU3TOHz4MGC8jM/s2rWLvLw8wHgZXS4XmqbhdDpxOp3cunXLcBm//fZb5s+fj91uJz8/HzDedexTVAhwu91q8+bNavLkyaq4uDjY/vnnn6sDBw4opZQqLS1V69ev1ymhUj///LOyWq3qt99+U11dXcrhcKg7d+7olueZmzdvqpSUFPXee++pe/fuqe7ubpWQkKBaWlqUz+dTWVlZqrq6Wrd8165dU4sXL1Yej0d5vV6VmZmpysrKDJWxtrZWpaenK5/Pp7q7u5XValU//PCDoTI+U1NTo6ZOnao2bdpkuLkOBAJq5syZyufzBduMlrGlpUXNnDlTtbW1Ka/XqzIyMlR1dbWhMvY1IbESqqqqIjY2luXLl/dqr66uxuFwAJCSksJ3332Hz+fTIyI1NTVMmzaNIUOGMHDgQObOnUtlZaUuWZ535swZtm3bRmRkJAANDQ3ExMQwatQozGYzDodD15zDhw8nLy+PiIgIwsPDGTNmDE1NTYbK+OGHH3Ls2DHMZjO//vorfr8ft9ttqIwAv//+O4WFhWRnZwPGm+u7d+8CkJWVRWpqKidOnDBcxm+++Ybk5GSioqIIDw+nsLCQAQMGGCpjXxMSRSgtLY1Vq1YRFhbWq/2XX35h+PDhAJjNZgYNGsSjR4/0iNgrC0BkZCTt7e26ZHnejh07mDJlSvDYaDnHjRvH+++/D0BTUxMVFRWYTCZDZQQIDw+nuLgYTdOwWCyGu44AW7duZcOGDQwePBgw3ly73W4sFgtffvklR44c4fTp0zx48MBQGZubm/H7/WRnZ+N0Ojl16pThrmNfY6hHOVRUVLBz585ebaNHj+bIkSMv9PVKKd56S5+6GggEMJn+u9W7UqrXsVEYNeedO3dYvXo1ubm5hIWF0dTUFHzNKBlzcnJYuXIl2dnZNDU1Geo6nj17lnfffReLxUJJSQlgvLmOj48nPj4+eLxgwQKKi4v54IMPgm16Z/T7/dTX13P8+HEGDhzImjVr6N+/v6GuY19jqCJkt9ux2+0v3D8yMpKHDx8SFRVFT08PXV1dDBky5PUF/BtRUVHU19cHjzs6OoK3wIwkKiqq1wc4jJDz+++/Jycnh88++wxN06irqzNUxsbGRrxeLxMnTmTAgAHYbDYqKyt7rcz1znjhwgU6OjpwOp388ccfPHnyhPv37xsqY319PT6fD4vFAjz9ZR4dHW2ouX7nnXewWCwMGzYMgDlz5hhurvuakLgd91cSEhI4d+4c8PSHcMqUKYSHh+uSZfr06Vy/fp1Hjx7R3d3NpUuX+Oijj3TJ8nfi4uL46aefgrcdysvLdc3Z1tbG2rVr2bNnD5qmGTJja2srW7Zswev14vV6qaqqIj093VAZDx8+THl5OefPnycnJ4fExEQOHjxoqIyPHz9m9+7deDweOjs7KS0tZePGjYbKaLVauXr1Km63G7/fz5UrV0hKSjJUxr7GUCuhl7V+/Xry8vLQNI23336bPXv26JZlxIgRbNiwgczMTHw+HwsWLGDy5Mm65fkr/fr1o6CggHXr1uHxeEhISCApKUm3PIcOHcLj8VBQUBBsS09PN1TGhIQEGhoaSEtLIywsDJvNhqZpDBs2zDAZ/4zR5tpqtXLr1i3S0tIIBAIsWbKE+Ph4Q2WMi4tjxYoVLFmyBJ/Px4wZM8jIyGD06NGGydjXyJNVhRBC6Cakb8cJIYQIbVKEhBBC6EaKkBBCCN1IERJCCKEbKUJCCCF0I0VICCGEbqQIiZCUlZX1j/sEvkif2tpaUlJS/nG8CRMm/On3qqqqCm7373K5qKyspLW1tdf2NEKIvxbS/6wq3lzXrl17JX3+rdmzZzN79uzXPo4QfZWshETI2bx5MwBLly6lrq4Ol8uFw+EgNTU1uI3T833a2tq4fPky6enpzJ8/n1mzZlFUVPTS4xYVFTFv3jycTieXL18GoKSkhNWrV7+S8xLiTSQrIRFydu7cSUlJCUePHmXRokXk5uZis9lob29n4cKFxMTE9OozdOhQcnNzKSgoIDY2lvb2dqxWK5mZmS817siRI9m+fTs//vgjLpeLioqK13SGQrw5pAiJkNXY2IjH48FmswFP9++z2WxcuXKl199kTCYTX3/9NdXV1ZSXl9PY2IhSiu7u7pcaLyMjA4Dx48czZswYbty48epORog3lNyOEyHLZDL9z3NdlFL09PT0anvy5Anz5s3j9u3bTJo0idzcXMxmMy+7beLzz6oKBAKYzfIeToh/S4qQCElhYWFER0djNpu5dOkSAO3t7Vy8eJHp06cH+/T09NDc3ExnZyeffvopiYmJ1NbW4vV6CQQCLzVmaWkpALdv36alpYW4uLhXe1JCvIHkrZwISUlJSSxbtoyvvvqK/Px89u/fj9/vZ+3atUybNi3Yx+VysW/fPmbNmoXdbiciIoLx48czduxYmpubiYiIeOEx7927R1paGiaTib179+r2AEUh+hJ5lIMQQgjdyEpICODgwYOUlZX96WuffPIJqamp/+dEQrwZZCUkhBBCN/LBBCGEELqRIiSEEEI3UoSEEELoRoqQEEII3UgREkIIoZv/AP/kVwligiBHAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x432 with 3 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with Modin df \n",
    "sns.jointplot(data=modin_tips, x=\"total_bill\", y=\"tip\", hue=\"sex\", hue_order=[\"Female\", \"Male\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<seaborn.axisgrid.JointGrid at 0x7fc3bda21520>"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAGkCAYAAACYZZpxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACHGElEQVR4nOzddXxcx7nw8d8cWl6tmM3sGGKmxGFmTpqUb/EWb2+5TUopv7ntLd5yUwo3zA6ZYrZjZpQsptXigfePtWUrK9uSLGkF8/180lqj3XOelVb7nJkz84xwHMdBkiRJkjJAyXQAkiRJ0tAlk5AkSZKUMTIJSZIkSRkjk5AkSZKUMTIJSZIkSRkjk5AkSZKUMVqmA+iKmpoWsrO9NDREMh3KKfX3+EDG2FNkjD1jqMSYnx/ooWgGlwHXE9I0NdMhnFZ/jw9kjD1FxtgzZIxD24DqCUn9n5OIYh3diVWzH7vpKE60GWwTVB3hyULJLkHNH4laOBqhuTIdriRJGSaTkHTWHNvE3LeW5PY3sap2o2SXoGQVIrzZKNmloKhgmzixMHbtAczdK7Gbq1FLJqCPPx9t+DSEIt+KkjQUyb98qdscK0li6+skNz6D8Gajlp2DPuUyhGac+bmJGFbVThLr/k182YPoU6/EmHRhp54rSdLgIZOQ1C3mgQ3Elv0N4QthzLgBJVTUpecLw41WPhWtfCp2YyXm7pW0bnoe15xb0cYu6KWoJUnqb2QSkrrEiYWJLf0LVvUe9MmXoOaPPOtjKqFijFk3YjUcIbHhWRLbXiNxw6eArLMPWJKkfm3AzY6TMses3EHro9/AAVznvb9HEtDJ1OxSjIXvQc0bScVfvkZ843M4jt2j55AkqX+RPSHpjBzHIbH5JRLrnsaYdgVqweheO5cQCtrIGYTGTKJu6RNYBzfhvvhjKN5Qr51TkqTMkT0h6bQc2yL21p9JbnkV18L39GoCOpkWyMGYezsikEfksW9iVmzrk/NKktS3ZE9IOiXHTBB95Zc4sTCu+Xch9L5d1yMUBX3cQpTsUmKv/Ap92pUYU69ECNGncUiS1HtkT0jqkJOIEnnuJ2BZGLNu7PMEdDI1fwTGwrtJ7niL2JLf4JiJjMUiSVLPkklISuMkokSe/THC5UOffhVCyXzJEsUTxDX/TpxYmMhT92NHGjMdkiRJPUAmIakdJxkn8txPEN4Q+jmX9quhL6Hq6NOvRsktJ/LEt7DqDmY6JEmSzpJMQlIbx0oSffF/Uj2gcy7pVwnoOCEE+tgFaOPPI/LMjzAPbsp0SJIknQWZhCQAHNsm+upvwHHQp17eLxPQybSSiRgzryf2+u9IbF2S6XAkSeommYQkHMchvvzvOOFa9OlXI8TAeFuoOWUY8+8kseFZYisfkgtbJWkAGhifNlKvSmx6AfPwOxgzb0SoA2vWvuLLxrXgPViHNxN75Vdy5pwkDTAyCQ1xyf1rSW56HmP2zRmdhn02hOHBmHsbTiJC5JkfYEebMx2SJEmdNLAue6UeZdUeIPbGH3HNvgXFE+zUc0zbobLR5GiTSXPMJmk5GKog6FEoDGqUhDRUpe/vJwlVQ59+DeaOt4g88W08V30eNVTS53FIktQ1MgkNUXakieiL/4Mx+ZIzbsOQtBzWHYjx9r4Iu6qSBNwKuX4Vn6GgKqnE1Bp3qG+1aI7ZjC3QmTncw4zhbrxG33W2hRDoE85H+LKJPHk/nos/jlY2uc/OL0lS18kkNAQ5ZpLoSz9DLZ2EWjLhlI+LJmxe2dbKku2tFAQ0JhQZnD/Wi+c0iSWatDlQm2Tl3igPr2lm1gg3V0z2UxDsu7eaVj4F4c0ituTXGDNvxJh8cZ+dW5KkrpFJaAiqfeF3CEVHG7uww+/bjsOyXRGeWB9meK7GbbOC5Pg6VzXBoytMKHYxodhFa9xm4+EY9z9Xy7QyNzecGyC7k8c5W2ruMMT8u0iseQKr7gDuhe8dcJMuJGkokBMThpjE1iVED2xGn95xIdDasMlPXqzj1e0RbjjXzxXn+DudgN7N51JYMNrLBxZm4QD3PV3DMxtbSFrOWb6KzlF82bgWvgen8SiRp76H3drQJ+eVJKnzZBIaQqyq3cRXP0b2+bcjtPSZcGv2R/neM7WUhjRunx2gsIeG0Ny6wsIxHt4zN8jWyjjfeqqG3dV9M5VaaC70mTeg5JQTefxezCNb++S8kiR1jhyfGCLsSCPRl3+BMfUKtGAuNEbavmfZDo+uaWbdwRg3zui55PNuWR6V66cH2FmV4NevNTBnpJsbZwQxtN6dTZcq9TMfJbuY2Ku/RpuwGNesG/tFYVZJGupkT2gIcGyT6Mu/QC2bjFo4pt33ogmbn79az57aJHfNDfZaAjrZuEKDe+YHOdJo8p1najhYl+z1cwKoeSNwLXov1pGtRJ78HnZzdZ+cV5KkU5NJaAiIr/gXQNpEhMaIxQ9fqMOlCW6c7set993bwWMoXDXFx8zhbh54uY4XNoexnd6/VyTcfow5t6Dmj6T18fuIb35ZlvuRpAySw3GDXHL3Csz963AtvKfdRISqZpMHXq5jcomL2SPcGSlYKoRgYrGLkpDGC5tb2VIR50OLQoS8vTtMJoRAGzULpWAkyU0vYu5eifv8D6LmlPbqeSVJSid7QoOYVXeQ2LK/Ycy8HmG429oP1Mb58Qt1zBzuZs5IT8YrZmd5VG6dGSDXp/Ltp2vZcCjWJ+dV/LkY8+9ELRhN5On7iS3/B068tU/OLUlSiuwJDVJOLEz0xZ+jT7oIJVjQ1r6/NsH/Lmng/HEeJhT1n1pxiiKYP9rDsByNf7zdxI5qkxumenH18hChEAJtxLmoxeNI7lhK60NfRj/3WoxJFyJUvVfPLUmS7AkNSo5tEX3ll6gFo9BKJ7W1761J8PNX67lqela/SkAnK83WuXtukLqwybefrmVPX03ldvkwpl6OMecWzL2raP3Xl0hsex3H6ptJE5I0VMkkNAjF334IJxlDm7C4rW1PdYL/XVLPpZN8TCjxZDC6M3PpCtfPzGb+aA+/fK2Bh1Y3ETf7aIFrsADX7JvRp11JcsdbtP7zC8Q3PCeH6SSpl8gkNMgkdizF3Lsa49xrEUrq17u7OsEvXqvn8sk+RuUbGY6w88Yem8pd0Why35M1bDkS77NzqzlluObcgjHzRqyKLYT/+QVib/0Fq/5wn8UgSUOBvCc0iFhVu4mv/CeuebcjjFRvZ1dVgl+9Xs8Vk/2MyBt49zi8hsJVU/zsq0nw1xWNDMvVuXVWkIJA37x1laxCjOnX4MRaMA9uIvrMDxHBfIyJF2JnXdgnMUjSYCaT0CBht9QSfennGFOvRAnkA7DjaJzfvN7AlVP8DM8deAnoZCPzDcpzdNYejHH/s7XMGenh6ql+sjx9U/VAuAPo4xaijZmPXb2H5I63OLDyn6jDpqOPPw+1ePyA2RZdkvoTmYQGASfeSvS5n6CNmoNaOBqArRVxfvdmA1dN9TMsZ2AnoOM0VTB3pIcpJS7e3hfjm/+uYd4oD5dN9pHr75u3slAU1KKxqEVjCbhs6retJfbWn8FMoI1ZgD5uAWq2XG8kSZ0lk9AA55gJoi/+DJFThjZyJgAbDsb4y/JGrpnmpyx7cCSgk3ldChdO8DJ7hJt1B2N8++laxhUaXDDex8QSA6WP1j2pHj/6qNnoo2ZjN1djHdmaGq7zZKGNW4g+Zj6KN6tPYpGkgUomoQHMsW2iS34Lioo+KXV/YvnuCI+ubeaGcwMUZQ3uX6/frXD+OC/zRnnYVhnnoTVNRBMOc0a4mT3Sw/Bcvc8W4irBApRgAdqE87FrD2Id2UJi7b9RC8agj1+ENuLcDiuXS9JQN7g/pQYxx3GIvflHnNZ6jFk3AYIXNod5ZVsrt8wMkusfOhWiDU0wrdzNtHI3tS0mO6oS/PaNRkzbYUqpiyllbsYXGX2y1bgQCmr+CNT8ETiTE1hHd5HY/DKxt/6CNuJc9HGLUIsntM1clKShTiahAchxHOLLHsSu2Y8x91ZsofLQqma2VMS5fXaAoHvoJKB3ywto5AU0Fo6BulaLfTUJXtwc5g9LTYqDGhOKDcYXGYwpMHq9YKvQDLSyyWhlk3FiYcyKbcSWPQiJCNro+antJXKHZbxskiRlkkxCA4zj2MSXPoh1dCfGnFuJ2hr/91o9kaTDbbMCfVoJu7/L9ank+jzMGgGm5VDRZHKoPsm/17dwtMmkIKgxvtBgfJGLsYUGPlfv/eyE+6T7Ry01WEe2E33xf0DV0UbPQR81ByWnXCYkaciRSWgAcWyT2Bt/xK4/jDHnVo6EFX79Wi3lOTqXT/agKPID7FQ0VTAsR2+bKWjaDkebTA43mDx/rKeU61eZUGQwqcTFuMLe6ykpgXyUCflo4xfhNB3FqtxB9IUHQChow89FG56qZSdr10lDgUxCA4STiBJ95Zc4yRj6rJt4c6/JE+tbWDzOy6QSecO7qzRFUJatt80etGyHqmaTg/Umz2wMU9FkUhrSmFziYnKJixF5OmoPJ3khBCJUjBIqRpuwGKelpm3Bsd1cg1o4BrV0ElrxeJS8EQhV/rlKg498Vw8AdksN0Rf+BxHIJzLxWh58M0xti8lts4bWBITepCqCkpBOSSiVlJKWw5FGk4N1SVbvj9IctRlbaDCl1MXEEhf5PfxzF0Igjs2wY+wCnEQUu+4gVs1ekjvewgnXo+SUohaMQs0fiZI7DCVULHtL0oAnk1A/Zx7cSOz136OMmsvS+HiefqaeqeVuLpkY7PErc+kEXRWMyNUZcazSRGvc5kBdkg2HYjy1IYymwrThPkblKowtcPX4xYAwPKjF41GLxwPgmHHsxqPYTUdJ7l6BvfZJnEgDwp+Lkl2Kkl2Gml2CEipGCRXJ6eDSgCGTUD/lJOPE336Y5N417Cq9jsfecaGpUW6ZFSCvj6oDSCf4XAqTSlxMKnHhOA51rRbVrYJlu6I8tKoZXRWMyjcYXaAzPDc1zNeTU8KF5kLNG46aN7ytzbFMnNZ67JZanHAtyapd2OE6nNZ6hDuIklVEbekIEu4ClOwS1FAJwu3vsZgkqSfIT7N+yDy4kchbf2WbOoGX4jfRsg0WjHYzpqDvFl9KpyaEIM+vMaLIzaRCFcdxaIjYVDaa7K5OsHx3lJoWE59LoSSkUZylUZilke/XyA+oZHtVNPXsf49C1U4M4Z3EcWycSBNOuA7HbMY8sB5nyyvYLTUI1Uj1lLJLUbPLUEJFqWE9X7asfSdlhExC/YhVu5+apU/wdqXBssSVaLrOzBFuxhX2XSkaqeuEEOT4VHJ8KpNJDYPZjkNTxKau1aKu1WLT4TjNkQhNUZuWmI3PpZDtVcn2KW3PTX2d+neWR+n2cKsQCsKXDb5s/CEvZmMESK0vI9aS6i211GEe2YKzc2kqWSWjqaG9QD4ikIfiz0XxhhCeLITbn+pBGZ7UMJ8qL4akniOTUIY5jk319k2sX72BdbUeDltTGFPo5pJxLkpCmvxjH6AUIcj2pZLKmHd9z7YdwnGblngqIYVjNvtqkryTiBOO2TRHbSIJm6AnlaDy/Cr5AZW8QKo3ledXyfIqXb4wEUKAJ4jqCUL+yHbfc8xEqvcUacSJNmPXH8Gq3IGTiOAkYpCM4SRjYMbBtkFRQdVAKCCU1LEVBRCpNkUBRUMoGmg6QjNAdyMM77GkFkB4gijeLOJmKXbSnWqT7/chRyahPmbbDpV1Yfbs3M/O3YfYUW0RdXRGZBUycUI2V+W70HtgqEbqvxRFEPSoBE+zDYVlO7QcS0hNMZvasMW+2iRNUZvGiE0saZPlSfWacv0qOb5UzyroUQm6FfxuBa8h8Fud25FWaAYimA/B/DM+1nFssK3Uf46dSko44Dip3pbjpNodCywLxzbBSoKZxDFjOIkYTmMlTvUenHgr1asjJMONYCUQvhyUQB5KsBAlq/DYcGMhSjA/lcikQUcmoV5g2w5NrQnqm2PUNsWorm2i4mgdFbURqsIOXmIUGq0UBTWuOCebgvwQiqwlJp1EVQQhr0rI23GiSlrHklTMpjlq0dBqcaTBJJKwiSQdogmbaMIhblajiNRsP00VaErq2KoCiqCt5yEAIU60KYK2x+pq6j9DE7h1gUcXeAwFjyHw6gpeQ8fnEnhdCr5j7V3ppYVCXhobI6neWLQp1SNrbcSs2o2zby1OpAGntRHh8qWGCoMFiGABaiAvNYToy0nd09LljMCBaEAloeMVAbpaGSAcTZJMWhy/JnRIjY87zvF/g2M72I6DZaeu5izbwbJsTBvMSDPxRAIzaZMwLZJJi1jSIhY3U3/scYvWhE0k4dCaFCQsUIVDQE3gI4JfRMlz2YwpVMkZ58UI5CGMYT36s+kK23b6/U1oGePpGRrk+lVyzzDZze3RCUcSWFaqSoR1rKNiO6m/AfvYH4XT9j/HvwcWDo4Nlk3q78BOlT9KWA6tUUi2OCQsk4TpEDchZjrEkw4OqWTm1gQe46TEpQtcxxKZoQlcaqr4bKDWxkya6IpAVUJoWjZKNqg5AlUAAhRAmBGIhxHxME5zK6JuJ068FScRwW82ogsLXH4Utw9cPjB8CMOTGgLU3QjdANVI/b9iINTjQ4oqQlFTQ4xCtA0xpv6d+jrZFINI5FgwSupemRw67BHCcZzO9dclSZIkqYf170tNSZIkaVCTSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKmAFVOw7ANC0aGiKZDuOUsrO9/To+kDH2FBljzxgqMebnB7r0+Lq6MLY9OKqqne61D7iekKaduvx9f9Df4wMZY0+RMfYMGePQ1qtJKBwOc80113D48GEAHnroIa655hquvfZavvKVr5BIJHrz9JIkSVI/12tJaOPGjdx5553s378fgH379vGHP/yBf/3rXzz11FPYts0//vGP3jq9JEmSNAD0WhJ6+OGHuffeeykoKADAMAzuvfde/H4/QgjGjRtHRUVFb51ekiRJGgB6fT+hiy66iL/+9a+UlZW1tdXX13PLLbfw/e9/n7lz557V8W3bpra2lvr6BizLOttwBz2Px015eTm6rmc6FEmSpL6fHVdVVcWHP/xhbr755m4noJqalrZ/19dXI4QgKysfVdUyvtuhpimYpp3RGE7FcRxaW5s5dOgQgUB+psM5rfz8QLvfc38kY+wZQyVGOTuuY306O27Pnj3ccccd3HjjjXzyk5/skWMmEjFCoVw0Tc94AurvhBD4fEGi0VimQ5EkSQL6sCcUDof50Ic+xGc/+1luuOGGHjyygxADbqZ5xshELUlSf9Jnn96PPvootbW1/OlPf+L666/n+uuv52c/+1lfnV6SJEnqh3q9J7RkyRIA3v/+9/P+97+/t08nSZIkDSByHEuSJEnKmAFXO66/iEQi3H//tzh8+BCKIhg/fiL//d9f5a233uKPf/w9ppnE7XbzyU9+lnPOmcr993+LaDTKd77zA/bu3cOnP/0xfvGL/2PEiJGZfimSJEkZI5NQN7355mtEIhH+/Od/YFkWP/nJ9zly5DC/+c0v+fnPf0NWVoi9e/fwuc99gn/969987nNf5IMffA/PP/8M//jHX/n0pz8vE5AkSUOeTELdNHXqdP7v/37Ff/7nR5g9ey633nonq1e/TW1tLZ/5zCfaHieEwuHDhxg7dhzf+tb9fOQj7+fyy6/issuuzGD0kiRJ/YNMQt1UUlLKv/71BOvXr2Xt2tV87nOf4J57PsCsWbP51re+3/a4qqqj5OWlFoYePHiArKwsdu3aQTKZlFULJEka8uTEhG564olHuf/+bzFnzjw+8YlPM2fOfJqbm1m1aiUHDuwHYMWKpbzvfXcSj8eprKzgZz/7KQ888EuGDRvBr3/988y+AEmSpH5A9oS66Yorrmb9+rXcffetuFxuCguLuOWWOxg9ejT33vtVHMdBVVV++MP/h2EY3Hff17jzznsYNWoMn//8l3jf++5g1qy5LFiwKNMvRZIkKWN6vYBpbzi5htPRowcoKhqewWja68+1446rrj5EQUF5psM4raFST6y3yRh7hqwdd3b6Te04SZIkSTqZTEKSJElSxsgkJEmSJGWMTEKSJElSxsgkJEmSJGWMTEKSJElSxsh1Qr2gsrKCO++8iREjRrVr/+EP/x+FhUU9fq5PfeqjPPro0z16XEmSpL4gk1AvycvL589//kemw5AkSerXhmwSWrHlKI+/sYe65ji5QRc3LR7N/Mk920t5t/r6On784/upqqpCURQ++tFPMnv2XP7wh99SVXWUQ4cO0tjYwHvf+0HWrl3N1q2bGTMmVfjUsix++tPUNhD19fWMGTOG++77XqeOL0mS1F8NySS0YstR/vL8dhLHKhvUNcf5y/PbAXosEdXW1vD+99/V9vVll13Bjh3buPrq61i0aDG1tbV84hMfaust7d27h9/85o+8885GPvOZj/OXv/yL8vJh3H33rezevYvW1jCapvPb3/4J27b59Kc/xooVyxg/fmLbOX72s590eHyv19cjr0mSJKmnDckk9Pgbe9oS0HEJ0+bxN/b0WBLqaDju6qsv5sCBA/z+978FwDRNjhw5DMDs2XPRNI2iomJyc/MYOXJU23FaWpqZMWMWwWAWjz32MAcP7ufw4UNEo9F2x1+zZlWHxx87dnyPvCZJkqSeNiSTUF1zvEvtPcWybH7+818TDGYBUFtbS3Z2Nm+++TqaduJXoapq2nOXLn2D3//+t9x66x1cddV1NDY28u6yf6c6viRJUn81JKdo5wZdXWrvKTNnzuLxxx8BYN++vbz3vbcTj8c69dw1a1Zx0UWXcPXV1+H3+1m/fi22bfXY8SVJkjJhSPaEblo8ut09IQBDU7hp8ehePe/nPvdFfvSj7/G+992B4zh84xvf7vT9mmuvvZFvfetrvPLKi2iazpQpU6moqGDmzJ45viRJUiYM2a0cemt2nNzKoWcMlfL+vU3G2DPkVg5n53SvfUj2hCA1C663p2RLkiR1l2PbgMh0GL1uSN4TkiRJ6vfsZKYj6BMyCUmSJPVDjimTkCRJkpQplkxCkiRJUqaYiUxH0CdkEpIkSeqPLJmEJEmSpAxxkjIJSd1UWVnBokWz+NGP2le53rVrB4sWzeK55069988tt1xLZWVFb4coSVJ/J2fHSWcjKyuLt99egWWdKK3z6qsvEwrJWm6SJJ3ZUOkJDdnFqoldy0msfgwnXIfw52LMvhlj7IIeO77H42Xs2HFs3LieGTNmAbBq1UpmzZoDwGOPPcQLLzxHLBZF13Xuu+97DBs2ou35lmXxq1/9jPXr12JZNldddQ233/6eHotPkqR+Ts6OG7wSu5YTf+vPOOE6AJxwHfG3/kxi1/IePc+FF17Ka6+9CsC2bVsYM2Ysuq7T2trKm2++wS9+8VsefPBhFiw4j8cee7jdc59++gkA/vjHv/O73/2Ft956g40b1/dofJIk9V/OEElCQ7InlFj9WPr0RzNBYvVjPdobWrTofH73u19j2zavvvoyF110Ka+++hI+n4/77vsur7zyEocOHeTtt5en7fmzZs0qdu3aydq1awCIRiPs2bObadPO7bH4JEnqx4bI7LghmYSO94A6295dXq+XMWPGsmnTBtatW83HPvafvPrqS1RXV/HRj36Am2++jXnzFpCTk8uuXTvaPdeybD7xiU+zePFFADQ2NuLxeHo0PkmS+i/HNDMdQp/o1eG4cDjMNddcw+HDqd1Dly9fzrXXXstll13GAw880JunPi3hz+1S+9m46KJL+M1vfsH48ZPaNq5zu92UlZVz++3vYeLESbz55msd7g301FP/xjRNIpEIn/jEh9iy5Z0ej0+S+gudJIYTR4jBX7SzMxx7aPSEei0Jbdy4kTvvvJP9+/cDEIvF+OpXv8qvfvUrnnvuOTZv3swbb7zRW6c/LWP2zaAZ7Rs1I9XewxYuPJ9du3Zw8cWXtrXpuo5t29x996188IN3M3z4CCoq2k/LvuGGWygvL+cDH7iLD3/4Hq666tq2CQ6SNJg4VhJX/U7CT/+Qpke+ibLnDXSrNdNhZd4Q6Qn12nDcww8/zL333ssXv/hFADZt2sTw4cMpL0/tY3PttdfywgsvsHjx4t4K4ZSO3/fprdlxxcUlPPpoai2Q1+vl1VeXtX3va1+7D4Cbb769w+cefx7AZz/73z0SjyT1Z/GKPdQ++n0gtXdO4yt/JHQpiFGL07awH1JsmYTOyve+136hZnV1Nfn5+W1fFxQUUFVV1VunPyNj7IIenYQgSVLXCQHRQ1s5noCOC695jqyRc0ngykxg/YCcHdfDbNtuN9brOE63x35P3qWvulpB0/rXTPP+Fk9HurrLYybIGHtGf4+x+UD6FvSqN4g/y49quDMQUcf6+ufo0hXy+vnvrif0WRIqKiqipqam7euamhoKCgq6dayTt9m1bbtfbac9ELb3BobEdsq9TcbYM7KGTULxBrEjzcdaBMEFt1LflAT6R28gE9t7x6Pxfv+766x+sb33tGnT2LdvHwcOHKCsrIxnnnmGm2/uiYkAAsexEaL/9z76gyE9xi71S0Z+Obm3fhPz6A7seASjZDyJQNm7R+iGHsc682MGgT5LQi6Xix/84Ad86lOfIh6Ps3jxYq644oqzPq5huGlsrCUQyEZVNTm98zQcx6G1tRmPp/8McUgSQMyVB8PzEAJiDjIBAQyRC8ZeT0JLlixp+/f8+fN56qmnevT42dn5hMNN1NdXpa21yQRFUbDt/jscp2kGY8aMpLExlulQJCnNEPnc7RTH6b+fIz1pwFdMEEIQCIQIBEKZDgUYGGPwuq4DMglJUr/Wjy9me5K8kSJJktQvDY1uoUxCkiRJ/dEQGZuUSUiSJEnKGJmEJEmS+qOh0RGSSUiSJEnKHJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJKk/GiIVyGQSkiRJkjJGJiFJkiQpY2QSkiRJ6o+GyI4AMglJkiRJGSOTkCRJvUZRBC47jMsOoyhD48q+xwyRjToH/FYOkiT1T5odw9n7NvXLHgUcggtuQR85l6TiyXRoA8JQ2S16aLxKSZL6nFK9g8ZX/oQdbcGOhml89c+Iqu2ZDmvgkElIkiSpezRNIbL1rbT2yOY30DT5sdMpQ2T0Ur4bJEnqcbbtoOWWpLVruaXY9hApD322hJrpCPqETEKSJPU423Zwj1uA4va1tSkuL+6Ji2QS6ixlaCQhOTFBkqReEfcWkXP7t7DrDgKg5g4j7s4bMvvknDVlaPQRZBKSJKlXOA7EXXlQkgdAEmQC6gIxRHpCQyPVSpIkDTTynpAkSZKUMapMQpIkSVKmKEPjbolMQpIkSf2QkElIkiRJyhg5MUGSJEnKFDFEpmgPjVcpSZI00MiekCRJkpQxsoCpJEmSlDEyCUmSJEkZI7f3liRJkjJFDJG9HGQSkiRJ6o9kT0iSJEnKGGdoVHuVSUiSJKkfcoZIyXGZhCRJkvoj2850BH0iI0noySef5Oqrr+bqq6/mhz/8YSZCkCRJ6udkEuoV0WiU733vezz44IM8+eSTrFmzhuXLl/d1GJIkSf2bZWU6gj7R50nIsixs2yYajWKaJqZp4nK5+joMSZKkfs2xh0YS6vNa4X6/n8985jNceeWVeDweZs+ezYwZM/o6DEmSpP7NTmY6gj4hHKdv5wFu376dL3/5y/zhD38gEAjwhS98galTp/LhD3+4L8OQJEnq1xqWP0b2gpszHUav6/Oe0NKlS5k/fz65ubkA3HTTTfzjH//oUhKqqWnprfDOWn5+oF/HBzLGniJj7BlDJcb8/ECXHh9tacXs5z+Xzjrda+/ze0ITJkxg+fLlRCIRHMdhyZIlTJkypa/DkCRJ6tccM5HpEPpEn/eEFi1axNatW7npppvQdZ0pU6bwkY98pK/DkCRJ6t+S8UxH0Ccyson5Rz7yEZl4JEmSTsMxY5kOoU/IigmSJEn9UUL2hCRJGkBUTLRwJXZTNYovhJVVCnTtZrjUfwyVnpBMQpI0CAjhIPa/Te2Lv2tr8597OdZFd2cwKumsJKOZjqBPyOE4SRoEjEQDja/+pV1beP2LJGsPZSgi6Ww5iaHRE5JJSJIGg0Sswym9ViycgWCkHuHYONbgr5ogk5AkDQKONxs9r7Rdm9Bd6KGiDEUknS2he3CGwEWETEKSNAgkhIfsqz6Nq3wiAHpuCXk3fxnjXYlJGkB015BIQnJigiQNElF3Ib4rP08wGcbW3MSER86NG8BSPaHBUbbndGQSkqRBJIlOUs/OdBhSTzA8ONHmTEfR6+RwnCRJUn9kuGUSkiRJkjJD6B7sSGOmw+h1MglJkiT1Q8Lw4sgkJEmSJGWCcPlwWhsyHUavk0lIkiSpH3IMD3ZrY6bD6HUyCUmSJPVDlu7HiciekCRJkpQBCXSwLZzE4C5kKpOQJElSP5SwQHizsMN1mQ6lV8kkJEmS1A8lLAfhycKRSUiSJEnqa4mkg+IJyp6QJEmS1PfipgOeAHZTdaZD6VUyCUmS1GsUxcFltWAwNDZo60lx004Nx7XUZDqUXiULmEpSBgghMBJ1OC21CHcA01uAhZrpsHqUy2wktv55mt95DTWQS+ii95HMG4ftyGvfzkiYoHizSB7cmOlQepVMQpKUAa6mPdQ+/qNjWzgLshbeijbpEkxhZDq0HqEqDrH1zxNe/yIAZkMltY//iPy7vkPMJ/c46oyE6SC8IeyW2kyH0qvkJYkk9TGXE6Hh+d8cS0AADk3LHkYNV2Y0rp6kJcO0vvNa+0bHxqo/kpmABqCE5YDhBdvEibdmOpxeI5OQJPW1RASzg5vN9iCqE+YoOqo/J61dGJ4MRDMwJUwbIQTCl409iO8LySQkDQqKIlDVnn07q6qCEKJHjwnguPzoHWy7rQTyevxcmZIQbkIXvx848fPTi0YickdkKqQBJ2Gl/l/xhrCbB28SkveEpAFNCHBFKontWoXVUodn/ALs3FFndW8llrTYfrCJNTuqGFceYvrYfILunvtTSeAmdPknqH/qp1gt9QhVJ3Tx+zB9RT12jv4gmTeO/Lu+g9VwBGF4EbnDiWtyw/HOSloOAMKThd1cleFoeo9MQtKA5opWUfvQt9vqa0W2vEnOtZ9BlJyL43TjgAKeXr6f51ccAGD5pkqGFR7my/fMxK31XE8r5isl+/ZvQ2s9wuUj4crF7k68/ZjtKMT8ZeAvy3QoA5J17A0hfCHsxsGbhORwnDSgmVV70go8tix7GN2Jd+t4ja1JXlx5oF3bwaoWKusj3Y7xVOKKn3hgGDFj8CUg6eyZx3tC3mzspqMZjqb3yCQkDWy2ndbk2DbQvU915xTPdGSWkPqYeeytLXzZOM2Dt2qCTELSgKYVjUFo7e//BOfdSFK4u3W8kFdn8bnth48KcjwU53q7HaMkdcfx6x7hCeIkWnGSg7PqhLwnJA1ocW8Rebd/k8jGl7Fa6vBNuwS7cEL37geRmst1ywWjGV2axbJ3Kpg0IodF00rw6IOrmoHU/1nHe0JCIHy52E1VqHnDMxtUL5BJSBrQHAdivjL08z6IwbEFfmfJa6gsOqeI86cW4Thgy6E4KQPsk66kFH8OdmOlTEKS1F9ZPZB8TuY4DpbVo4eUpC45+f0nfNlYDRXomQun18h7QpIkSf2QdVJPSPhzsesPZzCa3iOTkCRJUj9kntQTUgJ52I2Ds+6eTEKSJEn9UNI6sfxA+HJwwnU4ZiKDEfWOjCShJUuWcNNNN3HllVfy3e9+NxMhSFKvM+wIRvMBXOHDaN1cPCsNXXHzpOE4VUvNkGuoyGBEvaPPJyYcOnSIe++9l0ceeYTc3Fze97738cYbb7B48eK+DkWSeo07UUvjsz8nWXMQAM/4eXgXvYeEKmunSZ1zchICULIKsGr3o+aPyExAvaTPe0Ivv/wyV111FUVFRei6zgMPPMC0adP6OgxJ6jSXFcao3Y5etQl3oo4zFdZWFIi9s6QtAQFEd6zEqdrZy5FKg0nCdNpK9wAowQKsmn0ZjKh39HlP6MCBA+i6zsc+9jEqKyu54IIL+OxnP9ulY+Tn9++ryf4eHwydGB3HwUnGUYzuVVBINhzl6L9/TLI6VU9OGB6K33Mv7pKxp4zRirVSsT99S2azai/50y7oVhxnY6j8rntbX8foc6tgGIQCqYnZibKRtKx/eUD8rLqiz5OQZVmsWbOGBx98EK/Xy8c//nGeeOIJbrrppk4fo6ampRcjPDv5+YF+HR8MnRjdiTpiW98gvn8j7jGzcI1fSExP32jtdLSDm9sSEICTiNKw9HHcF32M7LysDmNUFHCPOpdkXfvZTFrh6D7/uQ+V33Vv64kYu5o8fIZgb0UYrdAFgKNkkairoLqiFqG7ziqWvna6197nw3F5eXnMnz+fnJwc3G43l1xyCZs2berrMKRBzmVHaHzmZ7SseopE9QGalz9G88v/16Xq2ooiMBvTqxcnaw6gOMlTPs+2wT35QozCkW1t3knnIQrGde1FSENa0KNS1XxinrZQNZSsIqyavRmMquf1eU/owgsv5Etf+hLNzc34fD7eeustLr744r4OQxrknJYqkrUH27XFD28n0FrT6f1tbNvBVTqed1//+iafj6mcfngvZuQSuO6LiHANKBq2L5/EoFzvLvWWkEdwuKH9lGwluwSrYjtaycQMRdXz+rwnNG3aND784Q9z1113cdVVV1FSUsLNN9/c12FIg5xQTlFw9FTtp2DljCR08fsRhhuEgm/KhejjF3WqnlxCeIgHhhH3lZCUCUjqoly/xoHa9j1uJaccq2JrhiLqHRmpHXfLLbdwyy23ZOLU0iDlsiMQrgbNheXNx/YX4B45ndi+DW2P8U5chOXN69JxTeFCjLmA3OHnIhyLpBEi7sg13lLvyw+oHG5IYloOmpqakqnklpFY/xSOGUdoA+u+0KnIAqbSgOeJV1P/9AOY9ZUA+M+9DGPGDfgv/ACe8dtJVOzAVT4JUTS+W0NijgNxLevYFyfarXgEV7wGNBdJPUtW25Z6lEsV5PpV9tUmGVuY2jNLaK7UfaHKnWjlUzIcYc+QSUga0FRhE179VFsCAgivf4nckdOJ502CYXPRR84j2cNVtt2JWqoe/hOxg1tQ3H5CF38AUTodC7nvkNRzynN0Nh+JtSUhACV3OOahTYMmCXV6XKGpqYlwONybsUhSl2lWjNj+9NmVZt2RtkWlPb3NgyZMwkv/SezgFgDsWJj6Z/8XPVx5hmdKUteMzNPZcKj9jE61YBTmwfR1aAPVGZPQ3r17ufnmm5k/fz5z587l7rvvpqJi8NUvkgYmU3XjHp5+RajllHR7d9Uz0ZJhorvXprVbjTIJST2rJKTRErOobjbb2kRWIU4iit2UvnxgIDpjEvrKV77CrbfeysaNG1m/fj2XX345X/va1/oiNkk6I8tR8M25Hi2rsK3NN+1inNyRp3nW2bFVF1p2cVq78Gb12jmloUkRgjEFBqv3R9vahBCohaNJ7l+fwch6zhmTUDQa5Y477kDXdQzD4J577qG2trYvYpOkTom5Csi69V5yb7+XvPfcjzH3DpKKt9fOlxAesi/9ECgnbql6x8+DUOfWH0lSV4wvMnh7bxTnpK69WjAGc396b3wgOuPEhFGjRrFu3TpmzJgBwM6dOykrk39sUv+SULyQ1Xu9n3eLZ4+m7EM/IlJ9GMXlww6WkFA8fXZ+aegoDWlEkw6H6k2G5aZmdyp5w7E3PIMdaUIZ4D3wMyahiooK7rnnHsaPH4+maWzdupX8/HyuvfZaAJ5++uleD1KS+hvHERgFw2kSXatFJ0ldJYRgYrHBij0RhuWmEo5QtdQEhQPrMSZekNkAz9IZk9AXvvCFvohDkiRJOoWJxS4eWdPCLbOCqEpq2qdaOBZzz9uDNwnt2bOH0aNH4/P5Ovz+5MmTey0oSRpKFAWiSRtdVTKz1bHU7+X4VIIehe2VCSaXpiolKAWjSLzzIk4sjHD7Mxxh950yCf3oRz/it7/9LbfeeivFxcXtbopFo1FWrFjRJwFKZ6aqAgHtNsCSOiYE6HYUYSWw9ABmhkvwtCYs9lY0U9MYxePSGFUapDDL3a4ygyQBjCs0eHtftC0JCc1AzRuBeWA9+vjzMhxd950yCf3whz+ksbGR0aNH8+CDD+I4DkIIkskkd999d1/GKJ2CwMHVcpDW9S9ix8L4zr0MO388piyW2SEhHIy6nTS+/EfM5hq8E+bjm3dzl/YYips2h2rCHK2LUF4UoTjkxtC6l8iEAodqwvzr5Z1U1rUCsGh6CTecN4ocn3GGZ0tDzbhCg7+tbMK0HbRjQ3JK0ViSu1cOziT0X//1XyxbtgwhBPPnz29rV1WVyy+/vE+Ck07P1XqEmoe+DXZqz5HY/k3kXvc5KJLbpXfE1XqUmsd+CI4NQGTbMhzbxH3BRzCdM5fbsRyHx9/cy8urTmwRcd15o7h+4QjOsON3h6JJm9fXHWlLQABLN1Qwa0KhTEJSmoBbIeRV2Xk0waSSVG9ILRxNcvPLOPFWhKvjWyf93Skv4f7whz+wfft2brjhBrZt29b23+bNm/npT3/alzFKHRBCkDjwTlsCOq5l9VPowjzFs4Y2q7GyLQEdF92xCi3RuR0za5vj7RIQwNNL99IQ7vxGeSezbdh5oCGtvao+0lZySJJONiJX553DsbavheZCzRuOuX9dBqM6O2ccR/j+97/fF3FI3aGk//qEokG3rsv7H8NuxdWwG6N+Jy7rRKLQSOIKH8Ko2YorXoMQp7+BIgS0xEwSHWxEp/pDJIXG/upW9lWFiSbtDo6QkkhaaW2OA4loBM2J42o5gFG7FXeiDnEsi5zqNQD4XAqTR6UPBZYV+DF7uCK3EOAyGzFqt+Nq2ofuRNO+3xhJsrOimcqGWI+cX3MSuMKHO/17ks5sRK7O1sp3bXRXNI7knrczFNHZk1W0ByjHcXANm4LQHsMxT7wpA3NvIN6JoaX+LlFfSfNTPyFZvR8ALVRA9vVfxDQCmBuepmH1MwAIVSf3pv8mkTPulLXiqprifOePb3Pp1BAXlU7GObLl2HcEWRd/gO89sovtx3okRblevnj3TEKe9PtqhUGNvJCb2sYTV6IleR7yRCOJ1c/RuuHl1FF1F3k3fwXHk0XTsz9Lew0xV2pPI8eGmy4Yw54jTRytiwBwwYwy3t58lIraVi6cVozSQ10iV2sldY99DzuaKkLsHj0D3wUfJKH6EQL2VbXy/b+uJmmmkvC1i0ZyzYIR6Er3zq85CcxNz9Lw9pOpBlUj74YvkMib0Gs1/YaCgqBKXatFa9zG50pdhA70ITk5I3QAi3lLyLv9XgKzr8E35QLybvs6Zt7YTId11oSAyK41bR/eAGZjNbHtb6FFamk5loAAHCtJwwu/QbdaOzgS2MDfX9xOa8zk36tqec1zOeGFn8R/2cfJf893Wd1S2JaAAI7WRVi2sRKlgw9fnwFfvCKXOeNC+Dw68ydm85nzXHjjNbRueOVETMk4zW/+Hevghg5fw8nHzvbqfOW9s7j7igncdfl46ppivLnhCH9/YTu1Le2veLtLExYtKx5pS0AAsT3roHZf6t+mza8e39iWgACeXrqPqoZo2rE6S22ppOV4AgKwTBpe/A2GKSvxnw1VERQFVfbXndhxdaAPyckkNMDFfKUoM29FX/RB4qExWIOgc6soCrEju9LaE4e3gZV+/8VqqUckO/7ATJg2e440tX39xKpavvZ0CyuiIzGzylmzoz7tOVv213V4TyaJi9IgvNf7Bt9bUM979Jfxb3oY1Rfi3XOqHTNBoqKD13BoK8q7hqWicZO/vbCdf7y4g3f2nKjL2NTNe03vplgxEpW709rNpiqEgGjcate7O67xLM5vR5vS2qxwIyQj3T6mlJIf0DhU/65tv4vGkdy9MkMRnR2ZhAYB23awrFPfyxhoLMvGN2ZGWrtn/Hww0guTGgUjsV2BDo/l1lVmTyxMay/N82GaNjMmFKR9b8GUEuxT/DiTuWPJOe9WcvLzyJ11BaEbvoStpd9rUvy5uEed2+FrsOz2GS7oNcgPta87p6mCvFDP1KKzNC/esXPS2rW8chwH/B6N0aXp9ccKsrtfBFYJ5PPue5N6XjmOO9jtY0opOT6Fisb2k4/UwjFY1Xuwo80Ziqr7ZBKS+iXPyKn4z70stZgG8E5ciDZiJgl3PjlX/yfCSH1A67klhC7/CElcHR/Icbjx/NFMGJ6derym8N4rJ1Cam/qAnTgsm8vnDuf4CNn500uYMTav3eLsk9mOQsxXhjliIf7JC4nr2Zj+ErIv/w+EnopBzx9G8Lw7oGhi+msYOTPt2G5N4XN3ntuWiPwenf+6aybZ3p5Z72XZAs+5V+Eqn5RqUDWyzrsDOzQCAE0IPnbjFErzU/cTPC6NT982jfxg96eJJ70F5F77KYQr9XPWsosJXflxEqQnbKlrsr0qR5vaJyGhGagFo0nuWZWhqLpPOKf6a+vHamo6N6U2E/LzA/06Phg4MdbXNqHF6sBxsNw5mMeGGhVFoMcbIBnB8WSTEGfuMZgONLUmMDSFoEdPSwSNkSS245DtMzo9t/Dkn6MQYCQaIBk7FlPqw1YVNlq0Dmj/GjoSt2yaWhP43Dp+l3bKRNhZLieCYbaQwCBhZKM5CZRoPULVSbhysJ32rzRpOTS2JvC4VIIeHfssZ8gpikBPNEDi9L+ngfJ+PNsY8/M77q2fyso3VhGLpQ+JNkctHlrdwk9ua9/Dt6r3YO5dg++m+84iyt5xutc+8G8gSIOW5ShYrvy0dtt2iOsh0EOdOo5GElfDPpQdy1F82bjGziHuLW43Syt0lr0Ox4G4ns27i1VYjoLlTn8NHXGpCgXBVPJSFIED2N0sxeSOVNDwzM8wG6sQupvsSz+EVTaDpKfoWMDpz9FVQX4w1Zs72wR0/BhxLQRa6KyPJZ3gdymE4zaW7bQVMwVQ8kbibHoRq+EIanZpBiPsGjkcJw16ytGt1D56P63vvE7LyieofehbuKJVmQ6rQ5btsKuymQce3shvn9rK4fpIl5d9GU6Mxhd/g9mYeo1OMkb9c7/CiFT3QsRSX1MUgddQaI62v3EpFAW17ByS217PTGDdJJOQNKjpxGle/mi7NicRw6xMn7nWH+yubOH7f1nDhl01rHinkvt+t5LKhvSZa6cj4i0kaw6+q9XBapZJaLAIuAWN0fTF0+qwaSR3LcdJ9szMyr4gk5A0qAnAsdLLGDl2+h9wximCp97a267JdmDt9uoO1y2dimN4UQPplRgUX+hsI5T6Cb9LoTGSPoVT8Wah5pSR3Lk0A1F1j0xC0qCWFC6C825s36hq6MW9t6hXF0l04l2u/yYc0LX0J2lq1w6UVH1kX/FxhHZidltg3o1Y/uKuBST1Wz6XQkOk4wspdeQsEhuf658XWh2QExOkQc1xwCmdRu51n6V1w0so/hx80y8n7ivp8T17VEy02p00L3sEJxknMOd6nLJpmB3UrOs4VofrzxvNpt11bW2aqjBjfEGXJgo4DiRyxpJ79/dRI3VYug/LVyC3+BhEAm6FupaOCxWrOWWYbj/m7hXo4xb1cWRdJ5OQNOiZiguKpuO5ejoOgpjl9MqmcVrjAWof/1Hb1/Uv/Jqcq/8TSmd1+hjDC3zc9+G5LNtUgcelMf+cYgqzXF2ut+Y4EDdyyS8d0e+nP0tdF/K0L93zbtrYhcRXP442ei5C7d8XHzIJSUOGaUFvbVmqKIL4nrVp7eF1L+Avn0HS7tzItyIEw/J8jLx0PI7jYNuOLPgppcnxK6zce+otW9TccqxALol3XsI1/eo+jKzr5D0hacjQieMKH8LVegSdU19FdofjgOJNL0mj+rJwurG1hmXZPbJWRxqccrwqjVGL2Gm2HtEmXEBi47PYrel7VvUnMglJPS5pO1Q0RNlXFSbSwR483aU7MVzNBzDqd6XtzXMmXrMe9q0kvuU16h76FtE3/oBh9lydLcdxMEZMbytTA4Ci4p91LabdM9sxSNJxiiIoDGrsrz31xZTiz0EbNp3Y0r/2YWRdJ4fjpB4VTVr84+VdLNtUAUAo4OJr75tNrv/stqs2rDCRpX8juiNVKVj1h8i96ctE3UWnfZ4Q4G45RN3zv8JsqEQLFZJz4d00vPUw7pHbYNjcs4rrZHFvEXm330eycgckE+il44n7Bs7KdWlgKQlpbD8aZ0LxKeomAtqY+cSX/pXE7pUYY+b1YXSdJ3tCUo86WN3aloAAGlviPLJkF51d9q8oAkXt4G1Zt68tAUFqW4CWlU+gidP3tAyzmdp//xizoRIAs7GKhrceJjD9YuKHtqB2dK7TEAISlkNrwiJhtj+340DMXYA18jzs8RcT85V1ayhOkjpjeI7O5iOnX5QqVA1j2pXEl/8NO1x32sdmiuwJST3qaF365nLbDzQQMy3c2uk/8JujJsveqWDT7jrmTylmxrh8/C4VIcBsOJr2+MSRHfisOKZy6i0HnHAddqT9sJsdCyNUDVfZRKxT7dnQ0bGAXRUt/P7JzdQ2RVk0rZRbLhxNwJX+ZyQnE0i9rTRbo2aTRX2rRY7v1LspK6FitJGziL7yK7zXfQWh9K+PfdkTknpUeUF6tdyZEwpw66ffcjxu2vz0X+t4ZMludhxs4M/PbuXPz2/DIvWBruWWpz3HPXoGpnr6CtrC7QflXecWCmogB7V0UpeSRW1znB/+dTU1jVEcB97acIRHX9vd5dpuktQTVEUwpsBg1b4z74CrjZoDQhBf+VAfRNY1MglJPaos38ctF45pKzMzpiyL6xaNBMdBCHCZzRgNu3FFK1E5McW0qiHKoar2Wz+v215NfXOMysYoB5Ry3Fd+rm1vHqN4NN4ZV2M5p88ASXcuoQvubtcWWnQbYthM4lr6Rm6nU1HbyrsnrC3bWEFrfGCsTJcGn0klBkt3Rc647YcQAmPaVZj71pLYuayPouuc/tUvkwY8QxVcOXcYC6YUE09a5AbdHK9E4wofpu7xH2JHWwBBYN4NaFOuwBSuU9dGq9qJO1rHy5XZHGqCT7zn54ScJhxfHrFTbWR3EttRUEcvIq9oDHZLHUogB9NfTLwb1QN8nvQ/l9wsD/oZhhklqbeUhjQcB7YfTTDxNBMUAIThwZh1A/EV/0ANFqAW9V7pqq7IaBL64Q9/SENDAz/4wQ8yGcbgJ1JDSUfrIvg8Orr77FZQt8RMDteEURRBWZ4fn6v9cJcAClwJlOgB7MMtqIFs8GbRvOwR7GgLgXMvRcsqQM8uwj66GUVzMzJUxuSROWzZVw/A4skhrp+skWtWIdwO141o5puvaazb18qic0rbhtGEACNeh11/GKHqiJwy4mr79TqW0LH85di+MqqaYtTuD5MdcFGU7UFTBIqw8UQqSdYdQagqSt4Ionpu2usuy/MzZUwu7xwrq6MI+PD1k3Gpos/vAbnMJpyGwzi2hZpdRtyVI+9DDUFCCKYPc/HC5vAZkxCktl03pl1J9KX/xXv911CyCs/4nN6WsSS0YsUKnnjiCS644IJMhTAkCHFse4C/rmlb/Dh3chHvv3ICrm5cwde3Jvj2H1fR3JoAIC/LzdfeP5ssz4nEZlhhwq/8hviBzakGRSP34nvwjZuFkV9G7NBW9Oxial/4LXY0NQSnZuXzxeu+yNL9JVjRMHNjS4m9soTGY8fMXnwH188ezTt7alk8rRjTTE0ocLVWUPvwd3ASqXFxPbeMrOv+K7XBXLufg+DtbdX87snNbW03XTCGq+YNw9u4h+qn/7dtAoMayCH3pi8RdbX/A3XrCh+/fgqHa8O0Rk2GFQcJebQ+//B3J+po+PePMRtTkzUUt4/cW79BzHP66erS4DSpxMXbe2Psr00wIu/MSyHUgtE4Y+cTee7HeG/4JoonfZF1X8rIOEJjYyMPPPAAH/vYxzJx+iElbjr83783t1t9//aWoxypi3T5WKoqeG3dkbYEBFDbFGPN9hrEySWjGw6eSEAAtknzupcwm2txFY/CMU3iR/e2JSAAq6kG59BGFk4u5NIRcWKbl7Q7d+PKpxiTrzF9hA/l0FpcZiOq4hBe83RbAgJI1h3GqtieFntjJMmfn93aru3x13fT0NxK67Zl7WbQWS31xHev7nCI0K0rjCkOMm1UDuOGZff5nAQhIHnonbYEBGDHWolseJEuzjaXBglNEcwa4eaJ9Z1fwK0Nn45aPJ7ocz9u9/eTCRnpCX3zm9/kc5/7HJWVld16flf3au9r/Sm+ytpWahrT32TRuNnlOC3LZu+RprT2g1XN5OWNb/u6pSJ9mnaysQoUDYSKFsxt9yHadvy6QxTk+glXpa8Cd+IRHDPB+MRu6p5+BKNwJAU3/zetaZu3gd10NO211R+oJ2mmT8dujZkEG9J3WU3WHKQo15/W/m6Z+F3XrDqS1pas2U9+0EDR04dk+tP78VRkjOm8PhdaB1t7dGTBBBe/fqWaQy0wpfzUSxZO5sy+lObVz2G++nOK7vxGh++dvtDnSeiRRx6huLiY+fPn8/jjj3frGP25KnB+fqBfxacAsycWsnrbiQ9aISA/y93lOIWAC2aUsm1/fbv2OROL2h3LFUwfFvKOngGqhuLLJV6xi8D0S4hX7G73GNfImdTUtOAOliBUHcc6kYz0gmEUmocJr3sEgETVPmK1lXgnn0/Tm/9sdxy9dGLaa/O5VPKzPdQ0nEjIHpdGTpYH94ipxA5ta/d4z9jZZ/z5ZOp3bQyfButfbtfmnbSY+qYkjpNo197f3o8dGSoxdjWJRVrjxGKd3yF1wWgPv19SwzevzUPt5CaIztjFJDc8y6F//gDP5Z/utTVEp3vtfd6Bf+6551i2bBnXX389P//5z1myZAn3339/X4cxZCjAey4bz4zxBUCqjM6X3zubgqzO7XFzMseBKaNyuXHxaDRVwaWr3HXZeMaWth9TNv0l5F776WMFPQWeUdPxjp2FMWYecX8R2Zd8EC1URODcy0DREJpB1vl34hSkZuvE3XkU3vYVtGA+AK7S8QSmXkT4XcnGjrWij5mPf/plIBSE4SZ08fuxckamxe41NL58z0xGl6amZZfk+/jq+2YTcLtwjZqRikU9FsuCm6B4cpd/Pn3FzhtD6IL3IHQXKBqBWVejjph5xmm60uA2tkDHpQmWbE8fiTgVIRT0aVdBMkrs1d/idGHxdk8RTgbfuY8//jirVq3q8uy4/nzV1F+v6hxSs9oMTaG8OOusYhQCWmIWQoDfreF0UO1Z0wR6sgWRjIDqIqH5MR0VjTjKoXU0LX8cNbuYrFlXoPjziLoLOPn9n58foLm6CpGMgctHZNk/ad38+okYDDe5d32XuJGHKhy0RCMIlaSR1e7+V8y02XmoiTXbjzKyJItzx+WjAB5DQz9px1JDmGixOlA04u5crE4s/cnk71oIMJJN4DiYriysUxRJ7a/vx5MNlRi72hNa+caqLvWEAOpbLR5a3cw3rskn13/6BeIncyyTxOrHULJLcV/wIYTo2f7J6V67XCc0RAgg6O6ZX7fjgP/YtOx3JyAFG6NpP+F1zxNNxvHPuBIzbwyWk3q8Vr2Dmud/C4DZVEP1/k3kXvYhGFmQdp648ILhBQc8c25ADeQQ2fIWWv4wAvNvJO7KAwcsR2Adnw13UjxCgVfWHOLx1/cAsHRjJS+vOsTX3z+rXQICSDgaieOz4fpw7akiQE80gG1hubMxnc59cDgOJxbb9v3Fq9RP5fhUzh3m5q8rGvnsJTntJwydhlA1jFk3klj1KPFlf8O18J5OP/dsZTQJ3XTTTdx0002ZDEHqYUbzQWoe+g7HN4+L7d9E3k1fwsqbiKoqRLavSHtO6463CY6aTdQ59RBhXAshpl5H1jmXYasGMVs54/50zRGTp97a267taF0rFbURxhRn/ka4bkcxty2hduUTYFl4Jy3CO/cW4nrXKjlI0slmj3Dzz1XNLN0V4bxxvk4/T2gGxuybib/9EKx6FPfcW3sxyhPkpE6pxyiKILrrbd6dHcJrn0VTU3vuqIH0RaCpjd/O/FZ0HEgIN+YZdil1WS24mvZCMr3MDoDdT+6diNo9NC97BCwTcIhsfYvk7hWnrh4hSZ2gKoIrJvt4fF0LtS2n3n21I0J34ZpzC+beVcQ3PNtLEbYnk5DUozraz16oqQV0tu3gGT8XYZwoOip0F/5zLiDunN1+Q8d5YlU0PHwftQ99G/W1/+WyGe1n6uUEXJTmdf7qsLcoiiB+aEtae2TbMjSnZ3d9lYaevIDGrBFufv9WI1YXd+gVhhfXnFtJbn6ZxLbXeyfAk8h7QlKPsW0H99jZtKx5DuzUFZiaXQoL7iFqQjxhkXSXkXv3d2luimJYYXxeD/FAOSoWWrQGx0ri+POB1HCZEBBPJNGw8LoUSMZJaj4sR0EngWrFMTVvatKDsGhZ8ShWS6qsjnV0N5dN3UbJVbNZvrWWEUVBLp0zDJ/R+Ru2vcW2HfTc9A3vjKJRWEI741BjpigCdDOMo6gkFY8sFdSPzRzu5kBdmOc2hbl2eteGn4UngDHnVuIr/4XiyUIbcW4vRSmTkNQJ0aTFoepWmlrjlOT5Kc52o5zipmXcX0b+nfcR27mS1pxxrG7MpbBBYfOavby+7jB3XDqeHQfqeXtLFeUFfj5w7TBG2nESa56gYf3LgINRNArP9Z8hZvp5c/1hnlx6ALehcvf5RYw9/DQevw/3rOtYt7eFRNJkWLZKeUkutuoifmy9j1E0Ct+E+TjJGBcEjhCcUsDOWsgPutpK/mSanpWPnl9OsuYQAIo3iG/iAmL99IPdsJpJbH6FunUvongChC58L1bhZCwyn9SldEIILp/s4+9vNzGpxMXogq6NNij+HFyzbiT6xu/xej+PWjC6d+LM5BTt7urP0zkH23TTuGnz639vZtPu2ra2T906jZlj89pNhX43TVN4cfUhsoNuDlWFefLNPcycUEA8YbF574kdHnVN4UcfPAfl0S+0e35w7vUs1+fxu6faLyL92nWFFCz9MVr5Ofw9eQnLt9ajqQr33jqMUSNLaX3zL8QPbCZr9lU0vPGvtueJ4glYi/6DrOz296R0EgjHwlS9p309HTmb37WmKcRe/BlaVi6aPxvHscEyiR/dh+fK/yKZ7Jkpej31fhQC2Pxs6h7WSfLu+Bbx4PCzOvZg+5s53TG6ojtTtDuyuzrBmzsj3HtdPl6j63dgrKO7SG55JVVnLpDXrRj61WJVqf8RAlyJWvSjGzCq3sFlNrZ9r6Iu0i4BAfzxma2E46e/4elEm5hp7CHL5bDmWLWGMWWhdgkIIGnaVNSm17GLxhK8uOpwWvs7R23UYB7moc3MLk915E3L5t9rm7DjEQJzbyQ48wqaVrW/qepUbifPrmn7WhE2rrrttPz7ezT+62uI7S9jWJ1f5He2LMvBKBlHy7qXaHjzIRrfeoTG5U+gF43BsvpHT+1khh2hdeMrae1m1Z4MRCN1xZgCgxF5Bn9d3tStBc1q0VjUETOJvvA/OMmzT4rvJpOQhKu1grp/fpP6p/6Huid/SsOj38WVSCWeaAfJpjWaJGmephckLJJrH0dd9kc0M0JOMDX1Op608HWwVsnrS5+a7Q5mU5STvmtqnl/BjkcQLi8tiRNDgtXNJpZiEDXycY2Z0644apuTCjUazYeofeyHJKsPYLXU0/j637D2vt1nayMcx8EYPQstp6StTQsV4Bo7r8s9sr5gCx01mH4VrHjldPKB4PyxHg41JFm2u+uFiwG0UbMRvhCxt/7c45U5ZBIa4lQFIhtexImfeHNazbWYBzchBBTletM2bZszqZBABxu8tR0zUkvrptcACNa+w3nTS9A1hTfXH+Ha89uPK8+YUEBZfhDP+HltbVqoEP/wCVw7UcE46dy5QRcTvA048QjWjFt5ct2JYqqXzSjAJSy0/ctIHNmOd/yc9kEpGmp26gNfCEhU7iRtKvm659GdvqsoHDNyCd34VXJv+Sq5N3+F0M3fIObq3nBHb0uik3Xene22StdySlAKRmUwKqmzNFVw5Tk+HlvbQk0Xp21D6v6Sfs6lWNV7SO54s2dj69GjSQOOgk2yNr0StVlXgTZekOs3+MYH5vCX57ZxuCbMwqklXL9oZNvVS2r9j2g/DdSxEboLvXQC2uG1jDXcfOWu6VQ1W+QEdO77wAyONibwew2GFwZQDRVt8Qfxzbwax0ygBgtoXPIHsqv2862r7qIiEUB3exhZ4CLUehDljvvYFQmiKDvwe3SuWTiCKWPzaXz+pyQqdgEQWnQLiuGhdftK9Oxisi68h7inEJzUeiPVlV4hW/GFcPp4Zlpc9UPOuL474VmIh0aSf9d3sOqPIDQXIm8YcS2U6bCkTso/Nm37D0sb+eIVuaecXHQqQjMwpl9D/O2H0EomogTTq5x0h0xCQ5zpKHjPuZDE0X3t2l2jziVhpT6Ny3K9fOnumSQtG6+u4jgOmpNAqd5O6/oXEJ4A/hlXEg8Mx0EQceWza/aXeGVzEwUBlUvdCsXrfsvI0jE0NCg8Wj2a5dsaABheFOCzt59LlsfA9JcD4FFieEZNx1U8mqBSR3DHo5hN1fgX3Iw55Tocx2F4CD5581TW7ahhydrDPPHmXj5zxS0Ma/o1VmsjjUsfxTX8HArf+30Sqje1FfhJyUUrHofqD2GFG1MNQiFr0W3EnJ75k1AU0S+H1c6G4whi3hLwlpz5wVK/NHO4m701Lby6rZVLJ515q5J3U4L5aKPmEHvjD3iu+XKPDF/LJDTEOQ6ow2cQmFdPePWzoOlkLboNO7f9sJkmUjO6jo8Hi6NbqHv6Z23fj+5cTeE99+MoGq/uTPD7Z/YDsA1YuV3l+x/8GIknvsyBGZ9h+UnbShw42sLLqw9y6+LR6MlmRONBrNYGrOY6EnWHie5/h5zz76Bp1TMYZZOJHzt/fTjJd/+0CtM68UH/i5eq+PYF16Cv+hsAiSM7sVBJkr5PSszIJeeWb2BV78GJR9CLxhD3l5x1L0i3o4i6PcQPbEHPK0MtnZS2y6skZYoQgksm+XhodTMzhrnJ9Xc9BWgjZxFf/jeSu5ZjjFt41jHJJCSRUP0o024gd/KFgEJSD552lbUuTMKrnmrX5p98Hq3rnqdFzeLRNe23xY4nLfYdjTDel8We2vSZXxt31XLLecOILPs7wSmLad65itjhHRiFI8m96B4aVz5J3rWfbjcVuLEl3i4BAURiJmER4PhHvn/WlSS04CkTS8zIpT7kZ822SvZuaeT8aR7GlOVgqN27ulMEWDtep+mth9ra9PxhZN34JTCTgCChZ8kFnlJGHS9y+s9VzfznRTldfr5QFPTJF5NY9TD6yFmpLUXOgkxCEpAqPh1Xs058cQqqsNHsOHpeOYmqY0N4ioaWXUDjW48gplyF3tE+07obxZfNuGEhWFPT7lvnjs/HFasDt5f61/5Gsi61c2j88HbMhkr855yPpbnaLYrMDrjQVNEuEXndGtkBN3puKb6pF6GOmkPiNB/4LdEE3/rjKloiqTI5q7ZW88GrxnP+9LLTDqWZtsOh2lZe21hBbtDD6NIgPkNFTzRSt7z9Ro3ukjEk1j9H87oXEYpG1sKbUcacR1Lp+n5OktRTZg1389cVTWyvjDOhuOtJRM0uxQqVkNjyCq7pV59VLHJ2nNRp7lgViTf/QN2/volQVXIuugemX4+68B7MxmpAoOx6i9vntb+68nt0mqNJ3HNvobx+NedPPvH9USVBLp5ZhpOMogZy2xLQcVZrM4rbj+MOtWsP+TQ+ddv0ttlzHpfGZ28/l5yxUwncfC/2uEtIqKdfHHjoaFNbAjruoSV7iMdPvRZCCHh7ezXf+eMqHnx+O//z0Hr+99GNxEwb4Vg41omZR4rbh+IJ0Lz6WbBMnGSMxtf/jqjZddq4JKm3aapgwRgPj6xt7vaUa23sAhKbnscxE2d+8OmOc1bPloYMl9VCy9J/4hk2GSOnCKHq2Ik49aULSSgeppSUYhSNAqEyL6Qy7EPncLARKutjjBuWjd8l2HAkjO2ayNXnCK4cF8L2F9Jguth9uJF4bjYloVKyz7sN5/jNTsdGMTzouWWYhzbg9gYhq4S4KxfHEcwY7uUn/zGVxpYo2QE3oaBB0hKAQWfGvBw7faqq7YDhRDEaDmA1VaMG81B9WSSrDyA0g+bQeP7+wvZ2z9l5sJGK2lb8xdn4zlnctvmeq2QssQOb084R27MWvWRatxelChxckUqs2oMo3iwUlwez4SjC5UXkDj+xzxCQbKjCOLoLJxFFzS0j4S/BduS1pwTjCw1W74vxzpE4U8u63jNXAnmooWKSO5diTLqo23HIJCR1TmsdnvIJNLz+97YmV/FoChedSyJSRe3jP25bTa14/BRdcA+rjgbJysnhSE2Yx1/bTTia6nW4DZX73jOB/3tuHweqUuuThIAvXldK6cqnyD7vVhpX/Bs7miqTIjSDnAvuouaJnxJadAvusfNJGlkk1z2Ns/oZskjt65acfQ3quTdgoaGqAsfhtMNqwwt8+Dw6rdETvaGPXl4OO16ndsUTbW2+SQuxWpuIHdhM8ryPE++gpE4iaWM6Cp45N6LlFBPZuhS9YCR2pIl4ZfuqAlre6Yf7zsTVuIeaR+5HaAbZ591G7ZIHOX7jS88fRta1/0Vcy8JlNnH08R+TrD1WeUIo5N38JeI547t9bmnwEEIwa4SbFzaHu5WEIDWpKbnlVfSJF3Z7ppy8JJI6RdWNtFI48co9uJ0Inr2vtyvnYUfDmFV7mVpgkzRtqhuibQkIIJaw2FtPWwKCVMflwaUNKFMux2yqaUtAAI6ZIHpoK0bRSJpWP4fTcATDihA7uLVdPC2rn0VEGzlY28rfX9nNU8v3U9McP+UfR1bQx7fuGsc1swuYNiaHz1xVyuxhGs0rn2z3uNaty/AMnwyA+8AyFkxuP/HC49IozvOmfiZaFky6guDN96JMvx7PtMtQ3CemwqrBPIwR53Z7CEQnSdNb/wLHxjdhLs3rX+LkmRfJmoM4dal1X07t/hMJCMCxaXrjbxj0fOkVaWAaV2hwtMmisrF724coecNxElHs2gPdjkH2hKROcTRXx6VwLAu7uTqt2W5twpVtY5o2TeH0D71IIr03UdcSx/blYdVsTfue1dKA6gmS0INsihbywhP7Cbhu4NILVLLX/gm7pRYUlV0NCt/569ttz3tm6T6++9H55AXSb75ajkpeQQF3LXah2Aks1QORBnDSh8kcOxWvdXATd1x7A/k5o3hzQwUjioPcdtFYQl69bQTQth0SpBa92t4icu/4NlbDYYRQETllxE4aLusqYSfbtqpQPUGscEN6rIlUcrfj6bXwrJZ6hJ0E5exmNEmDg6oIJhUbLN8T5eaZ6XuBnYkQArV0EsmdS1HzR3QrBtkTkjol6QrhnTCvfaOi0eJ4cMaen/Z4o3g0BxoVDF1hTFko7fvD8728ewPRS6bmoOxYgj5qRtrjPSOnEq/ez8GRN/CDf25hw65a3tpcy7f+XU3T9LsB0Ceex6Nvtq/+kDBtNu2pO2VvyHYEMS1ExCggrgbAl4cWar8RnuoPYcdPlPPJ8Thcv2gkP/v8BXzypnMoyHKd8haU40DMyCFZOJVEweR292u6I6l68U+/FIDI3g343v07EUpqDydAzS0H2r9u37RLSGpdX6QoDV7jigzWHoh1+/lqyQTMvatTleC7QSYh6YwURWCj4VtwO/5plyB0N0bBSHJu+SoHIj7qguMInP8eFG8QNZBDaPGdtGSNIa98OGPKsqioDXPnZePJCbrJ8ht84JIyCo8u5au3j6eswI/HpXHtgjIuya/BHLWQh7Z7sRZ8EDWQg+IN4ll0F1YsgmvmdTyxqn0Vbst22FKn4520iMCcG0h0cL8m2YX9g+KKl+zrP49nzEyEZuAeMZXcyz9C67YVaFn55F73WcysYTi2QyjgQvTxmh/HAW3cIoLzb8JqqUMLFRGceSXC8KDnlZJ385dIBFIVDZL+Uopu+wp6TinC5SUw9zpcky/G7n9FuqUMKgioxJMO1c1drykHoPhzwXBjdbOiutxPqIcNpr1RhIDq5jgHD1WTo0bIzc3Cm5OPjwi24iKJgRCCmGmjCPA6YXRhERU+Eo6GrgiEACNShV13gKg7H+HPw+d1oyQjoAgijoeopeJ3a2hmC1srk3zvr+sIeHUunZqNrgre2hHmG3efg9+j852/bWbnocZ2cb73ivFcOqOIpK2y5WAjP/3HurbvKYrgux+ZT1GoazdeNWGhmhFszYOJhmGFcUT73UQz+btWlOM7nGpYihstGcZRdRLvqg6Rnx+gqboWYSdJ6v5+mYAG09/MmY7RFT21n1BnPP9OmFkjPCwa6+3W85M7liJcXtwL3tPh90/32uU9IemUGlqTNBw+wMS9j2FV7EAYblj4Huxx80g6qfFjx3FwHaswkCTA8dub+rFRIHdrBTUPfxsnkeruC83Af9vXifiHpR4gwKOAZdlYwkfcagSgJZLk8ZWpe02KgDguXI7GzReO4ft/XdMWo6EpTB6VR9JOLWQdX5bFF++eyXPL9+P36lw1fwTF2e4uVykwHRVTDbTd848rx4aw+sklm22fFJMNlnrqIbaEcIHqSk0hlKQOFGZp7K1JdDsJqcXjSKx9Emf+XV2eJSeTkHRK4XCE0sMvYVXsAMBJxIi+9gc8hcOgE7tpKoogtmNZWwKC1Ey3yKZXMRZ/uMPdQ0N+F163RiR2Ymhg/pSStjsbo4sD3Puhuby54QhBr8H8KcUUZLnbZptpimBCWRaT7pyOQGBZtiyTI0lnkO9XWbWv+/eFRCAfALvuAGreiC49VyahIcbBYc/hJiprW8gOuMkLuNImCByXYySx9q9La3caj3Y6CcWba9LazZY6/NEqTD0vLUHYtsNdl01gw64aKmtbmTYuH1UIzGPrahQhGJ7v4/1XTMBxHGzb6XC6s2059JtuiyT1c9k+lZpw9+4JwbFZcsXjSO5Z3eUkJCcmDCFCwLrd9Xzuf17nhw+u5cu/WsbyLUdxTvFh7fb50PLL09qTRudmV5mmjXfsnLR2z+gZtKx9Dr2D7bTzs9wsWXuIlkiCkSVB3t5ciWlZhLztp49alt3hgk9FOLjitbia9+OymulupXlFpKoS6BXrMOp3YNjd25FSkgYCnyGIJR0Sp9kx+UzU4vGYe1Z2eQ2c7AkNIY2RJP/373fa9T7+/OxWpo7JBQSqIvC7VOKmTVVjhCCt5C++m/onftxWH0qMnMVr+1QuzmnGpavEFX/6m05AU9TEshxyCyaTfeF7aNmwBBwb/znno/lDNLz+D3znXIDhU0lwYtKAS1P4zK3T2Hqgge3767nnigmMK8uiM7lExULsX0ntK38Cy0TxBsm94QvEjt9/6iQhQK/ZTu0TP25bM+QePQPfBR8iofq6dKyBzrQdwnETr0vDOFWXWRrwhBD4XQotMatb2zsAiGAhCIFdvQe1cEynnyeT0BASjibTpitfe95oHn9jL0s3HMHQVT5+01Sam8Kco+1DW/sw1Qs/TNN5/40ercXWPCT8RaxZXsls/XUCTgvu0TNJ5ozBOvZWipk2z688wPMr9gOCS+cO44JpCyk7Lw+ScRwzTuOyx9AC2US3vEns8HZCl3+MuL+sLTkG3BrXLBrFwsmFxBIWjpP6IznTFZYWqaL2xd+1fW1Hmml49peEbruXuOj8DVfDitDw8u/bLVqN7VmH79zLYQiVvKluivOrxzdxsKqF4lwvH795GuW5HnmPbZBy64JIwiG3m89PDclNJLlzmUxCUsey/S6CPoPm1lSvpiDbQzxh8ub6VOXqWMKiuiHKWFcdytI/IkbM4O/rbdbuPnTsCFGEqOe/7piGuuIftDTXoPpCaIYPy1+OELBxTx1PLz2+S6vD88v3kx1w4R1Zhnj1fqzWhlQP5bIPUvfKX7DDDdT/+ydk3/Gd1GLRYyKxJOt31/Gvl3di2TY3XziG6aNzO94m4hi7pTatzWyqglgLeDqfhISdwGqpT2t3Yh1UjBikokmbH/19LfXNqZvVlXURfvDX1Xz/4wsIuOTHxmCkq4L4WQzHAahlk4kvexDX/DsRmtGp58h7QkOIz6Xy3++ZSX7IA8CsCYWs39l+4oBl2wTN1AdwvHgqa3c3tvu+40C8NYx1bMJBvGIXIp76cFZUhbc2tN+KAWDHgQYakjqB6RcRWngz/snnYdYfxY6k1l1YrY3Q2n4R6ta9dTzwr/VU1rVS3RDl14+/w47DTad9fYo/fYMuNZCL4+pahQBTD+AZO6t9o1BQQ8VdOs5A1tASa0tAx0ViJrVN3Z9BJfVvqhBpG0V2leLNQgkWYB7Y0PnnnNUZpQHFcaAs18NPPnM+P/rkQq5dNILhRSd6H7lZbjRVwTo28UBrrSE/25N2nIB1IhlooUIcNXVl7NgOozso0VOY48WxHRqXPUbjsseI7FqDnYyBY2MUDEfLKQHXiXstqqrw1sb0ZPbSqoOop9n11PQWErrwHhCpt7UwPORc9QmSXbyPYzoq/kV34BkzMxWPP5u86z9Pwld0hmcOHl63jvaun7UQEPB0vb6YNECInplPqpZOJrn9jU4/XvarhxjHgZygGyueWlZ60wVj2HmwkY9ekE1x63ZcTVvQxlxPvHQy9vZX+Y/Fn+OHT1W0bfc9b0I2BQ2bAFADORiFI3ACqR6CbTucP72Ut9YfofFY0dK8kJvJo3Lxe1SyL7wbIQSKN4uWDa+Sc8FdxI7sRPVno1iJtvs+juOQm5We/PI6aDuZJXTUsReQX3YOdqwF4c8lbuR0q2J1TM/FffHH8Z/Xgq0aJFTfkLoXEvLpvP/qSfz+qS1tbbdfMo4cvyx8Olg5jnPK5RpdoRaPI7nlVezWBhRf9hkfL5PQEFeY5eJ/PjiOlse/gx1pJgEkdiwn77avYwM+xcOPPjaHisYkPrfGsJDA0yBgzATUrAJMTx4JJXW/RQjI82t87s5zqaqPgBAEvTq7DzUyNuSl4bW/AeAZMY3A1MXUPvebtjjCm14j785vEfMUY9sOC6eVsHRjBVec4yfLZbOrDhbNHoZ1huECCxXLUwieY9stnEXmMB0NUz/2RzSEEhAADsybWMCo0ixqG6PkBNwUZrt75ENK6p+SFhja2f+ChaqjFo0luXslrmlXnvHxMgkNcY4DWtNh7EgzkNqozjNmBslomB2xfP748kFGlgS5afEY8oNG6p5Q4XQgNROuoSmOx50k4NKor6tFj9bxg8eqicbbL3ybVj6Z4LF/C5eL5jXPt4/DTJA8sg0xthjHgVGFPn5wlU7zK7/CjrUyrmAE2e6PEyW9BpymKdi2c1YbxUnpFCEoynJTlNW9Dc+kgSWWtPEaPXOHRi0ej7l3lUxCUicd6y14x81B9fhpXPoYODalecN438K7+NHTFWzeU8f9H5uP/9jMqJrmOD/+x1pqG2NoqsLdV05giqeKRGtLWgICaDWVE0lIKB2WfXdOqq6ZqDlI4zP/y/EuSLJ6P40v/x7/Nf9NktSsG92OIqp2ENn6JlpeOe5x84l5hs59G0nqSZGEg9/VM0lIyRuOve4pnFgY4T79xCA5MUFCyR2G4vLiKhlDy8YlbetjrNqDlFe/xfjyIOFoksq6VNUA04ZfP/EOtY2pmVKmZfPnZ7YSVXz46ncwoTzY7vguXaUoP4ucaz6F4vYT2buBwJQL2gehahilk9pGz5INR3n3GFiiYhdqPDWjTlEE9p4V1D39P0T3rKPl7Sepe/S7uBPtZ9lJknRmsWSqxqLX6JnxVqGoKLnlmEd3nPGxsickkXDnkXv7vSR3r0r/5pF3mDpsPjsONaNrqWuW1rjJ/srmtIfWWz6CB9byobnTeMQTYvWuRoYV+vnwdeeQ5XVhemeRc9dYsBJgeMkNFtK68WUUXwjf1EuI+0va8o7qTd/8TQ3kYOupoSE92UL98kfbfd+OhrHrDkFxd5fbpVNVBSEEpplebFWSBovGiE1eQO1yBezTUQIF2LUHYcTM0z5OJqFBpKOqAqeqNKAoJ4bEHAfinkKMwlHpBy0Yy86jCSaOyKYkz4eigNetMmFYiEjCpKE5TjiaZO74bCZmRQje/EWSjVV88jwfrfMCePOLsXVvWwxxLYhiiNT9m4Jz8Fw5BccRxCy7LQEJIdDyyvBPv5TwhpePBayRfflHU2VzHNqmkwrDjVB17OjxvV4chBC4zQaspmoUt5+kNx+LM08tVhTRdl9JCIErfJjo5iVYLXV4p1yMXTAOU7SfHXb8b3YozZyTBp/qFpOy7J5NB8ITwG5NX/T9bhlJQr/4xS94/vnUjenFixfzxS9+MRNhDBrueC3Jim04kRZc5ZNIBMvRky1YR3diNR7FVToBK3s4cQwO14R5/u195PsVxuarOAhiejZ7K5ooz8qheOJ5RLe9BaTWxzjTrmNWnYeSPA/Jyp34YkcI5Jbz1Ys04kcP4QQL0PNGElvxMJEnNhHz+PEvfi+rqnwI4TBBayAv2ErcU4hmRRF1e0kc3Y2eU4ZSOJY4QY5nH1eyAatyB3bjUSJZeWihfPJv+HyqbE9WEXF3/onhOi1A/jWfJH5kJ04yjpaVT3jbStS8csTR9UQPbEb1h0jGWkHR8Ey+gKjecQ8pbtrsrwqz53Aj5YUBRpcEybGqqX3oW20186J7N5Bz9X8iymZhJBqxju6k7p0KjNxSHARKdilxb0m3poNLUqYdbbKYUtrD0+9VDY79/ZxOnyeh5cuXs3TpUp544gmEEHz4wx/m5Zdf5tJLL+3rUAYFV6KWuke+3Ta7DSD/pi/Qsv4VhGOTqDtC84rHyb7io6yLjeXXj29qe1xJrocvzEuwP15Awgjx19cquHTGZYy6bCGKlaBOy+GXjx+kKRzHtBwmjwjxH+VVGBtfxTNiKpE1z+Mun0hyv5fEgdRx7WiY5hd+Rd7Cz/Odp2tT55gToXiSRmzbUppXPnEi9uHn4L/skySEB5fZROOTP8asr2j7fmjRLdS//EeCC2/BKprW7gPeiFZR++yvsdtK6QgKbvkS8Z1v07TskRPnKBmLnluCuX8D7uHTiRntE5EDPLfywEmlhmDmhAI+Ot9oS0DHNS9/jJybxtLw/M9IHj3x+Kw519K6/Amyr/wEMV9pF357kpR5juNwsD7JLTO7tvPrGSWiCNeZF4r3+cSE/Px8vvzlL2MYBrquM3r0aCoqKs78RKlDVtWedgkIwKyvxFU4HDsZwztmBqHzbqW+oZkHn9/W7nEVdVEqnVxGuRupbYyyaFoJ//fsbtbVB1jVkMP9D++irinWVspjy/5G6oITSNYeQfWmJh+4yycQ3bM+La4su77tHBWiEJqraH77yXaPiR/YjGiqBMCpP9QuAQE0r3kB36SFtKx6Gt2JtrULAckj209KQAAOTSufIFG1r90x4hW70LOLsBMRrOq9aXE2tCZ4dln756zdXk2VmT6jRwiB01jZLgEBNK9/Cd+4WcR2rkCRC2mkAaa6xUJToCirZ/skdksNSk76VjDv1uc9obFjx7b9e//+/Tz//PP885//7NIxurpXe1/ry/ia9rXfg947bg7RveuJHdwKQPzITvS8ctQ57yGWqEx7ftIWqIpFNG6iH1tvI0htqd1RHamEnfqQPT6d2myuR88uIvmuBBJXfEBL2zkcK9muKvVxmmIRzA/QUp1+499OxFB0A8Xw4A94Ud0nrqoatkfTHx8NowXSh9wcx0boLoSVSPvdNMUa6Gh5ka15ELobJ3miVlro/NsQooOp5WYSoek44UZyc7tWp64v9Pe/F5AxdsTrc6H1wOLRM1m+t4nzJgTJzu65bUocy6S6dj8F134MPfv0P7eMTUzYtWsXH/3oR/niF7/IiBEjuvTcmpqWMz8oQ/LzA30an7tgVKpW2rEPeKNwOI1vPdLuMcnaQ+ToCS6fN5xnl+1va3cZKsVGmGorm+HFQTbvrWfSyBw27a7l3HH5TB2Tx6bdJypTZ/kNCp1ahO5um0XTun0FORfeTd2rfwU7tT5IlE9n1VEX0IJLVynWW1CCw3GPmEps/4nhQNWXhektoKamBXewGKG1HwLzT15IZPc6gotup77FhpYTP1d3yURAcPI07sCsq4gePFFmBkDxBjFyy2lc+W+yLv5Q2u/GZ6hMHJHDtv0nbqDmZbnJy8ki+/ZvEtu+DCtcj2fS+Zi5o9GSLQiXFyd+YpM779hZRPdvJjDvpn733uzr92N3DJUYu5rEIq1xYrH4mR94FpKWw8aDEb56dR6NjT23caN56B1EqIRG0wc1Lad97RlJQmvXruXTn/40X/3qV7n66qszEcKgkfCXkn/b12he8Th2uAE9f0SHj9PcPkaVhrjjkrG8sb6C0lwXN8wM4dJMNHLZtbGS6voI77t8NKt31tMUjnPD4lGUFfhZu72asWVBrpkgCBxYgv+6TxGrOoAWKkTNH4GZPYzsi+5B0XQcd5BdkSCrXq5k1thsbpyZxfBsh5i7kMCFH8BzeDNWUxWOZeOevJiYlpqKHfcUknf7N2hZ+QRmfQX+iQtQc0pwT1qMmZW+KV0iUEb+rV+lecVj2NEW/LOuhvJz8ZVMQgvmE92xEqNoFP5zzie8822yFt9N3FeS/nNRBB+94RxeXXOIVVurmDwqh6vnj8SjK8T0EpRZt6EpgsSxfZgsVy55t36d8Kp/k6w+gGf0uWiBXJRQMWb2yJ77xUpSH9h8JM7ofIOCQM+lAicZx9yxFM8Vn+nU44XTx9N5KisrufHGG3nggQeYP39+t47Rn6+aMnVVpwsLHBOESvTNPxLdtrzte67yibgv/hh7d+4jqCdxsssxhUFlU5KSXC/FNW9jBQpwkkka/CN54IndjCnLoqo+QlGuh4tnlTOywINqx1EAE5WYbeByooh4C+Gl/0CoOv4ZV2Bmj8IgTtTR0LFBqCRs9diU50O0rHgMs/Yw3nPOw5iwmLjWfj2QptgodpKs/Dzq6sJnLMWTet0WpjDaZs4pCmhWDFt1IewECI2ko572OEIIEpaNoSqdmuGmCZugVyEcNXEcSDr9c7XDUOll9LZM9IRWvrGqV3tCScvhz8ua+M+LshmR17m9f87EcRyS655EyS7Bveh9be39qif0hz/8gXg8zg9+8IO2tjvuuIM777yzr0MZVFIfsio44J1/B+7hU4nv34irbCJa+TlEt71O7kkz0/S5d/HY6iB3z/HgWfm3tvaDc7/MkZowR2pSN/23H2jgSE2ED1wziWjcIj/oJuDRETgkhAfcHjyXfzY11dtywAbzWH23BLSNlrliNdQ+/B2cZOqPqnn54/haGtAX3IPlnJgfY9oKkJoq2placCe/7uNsGxLCDTapY3XiMstxHHTlzLu3tsXpKKjeAInW/v3hKUmnsvZAjNEFeo8mIHP7GzhmAtf8uzr9vD5PQl//+tf5+te/3tenHVISWhAxfB7GyAVYto3aWkH4pAQEoKx9lA9e9FXyWk/MGFPKziEnP5c7LvWzcVct2/bXc87oXIYVBvjab5anynq4Nb783tmU5ZzYVkGYMdTmStR4BBEqJOHKT/swtxqOtCWg41o3v0H+rOuw9DOXe5ckqec0RizWH4zx9WvyeuR4juNg7ngLu/4Q3mu/ilA7v+9U/xxDkM6a44BlHauI8O5tqYfPpKL0YtbujVPgKWTqoo9htFbzZM1wXnvwHQAWTi3hsrnDKcj28PArO7ni3HzKczRW7I7wq0c3ct+H5mCoCrodIbbs70S2LQNAaAZ5t3yVWHBEu1N2tNWvMNw4yumHySRJ6lmO4/DqtgiXT/aR5z/7FOBYJsl3XsKJteC55stnLFj6bjIJDQEikI8wPDiJKIo3yOb8y/jtMyd2Ln3W7+KTNyxgyasnZq4t3VjBh6+bTH7IxXevz8a78V9YByqZMmo2+yZfSCRuYXgVRMOhtgQEqS0ZGl/9I8Ebv0aCEyuwRXYZev5wkjUH2tpC599JUs+iwznSkiT1ivWH4jjAZZPPfjmBHWkiuf4pRFYR3uu+gtC6XnVBJqFBKGbaNLUm8Ht0Am6NhJFDwY3/Rd1Lv8ccMZ9/La9p9/im1jiKcPjCFXkkHZVn32lld0WY9Ttq+MxluTQ8979Yx6Zf23veZrQZw3vOp3AAJ9IIgNDd+KcsRvX4cSwT3YmTOKnOWlwNkHXN57CrdmM1V6MXj8UKDcdxwGU2QTwM7iAJLdAnddiEELTETFpjSbJ8Bh5dkfXfpEGvusVk1d4oX74qD/UsF1abFdtIblmCMf1qjKlXdLv4qUxCg4SiCDQVGsIJvvfXNRyti+J1a3zi+vFMGZFFvL4Sd/lEEuWTSC6rZN64LK4Yr+CzmsnKy0WpWELruudBUfnUlCv5d85oirMFHN3etv7nOPPARlzJJmJGLpo/G6G7yLngLhpXPonVUodwedHzh6GUnIt90qSDuB6CslkIIYg7DkI4GDVbqX/ul9ixMKovRM41nyYe6qCQag/bdqiRXz66kdaYSU7QzefvPJfSHI9MRNKgFU/aPLspzB1zghQGu//R78QjJLcuwQ7X4r3y86gFZ/f3KvcTGgTciTqMyvVEX/pfjNd/xrcu05k9JkgkZvL/HtlKbVUtdrie8KbXsJc/yPsvH80dpfvJXvYzjJV/IvrMT1CwMIpGgW3Bxme4aqzNucF6SKZPEVU8fhw1dY/HcfvJu/JjNL79FFZLai8fJx6h9plfYkRr054LtE1acMXrqXvqgbbyO1ZrI3VPPYBhpm8T0ZMaIkn+3z/X0RpLJdf65hj/75/riSbTqyFI0mDgOA4vbmllcqmLuaO83T6GeegdYm/+CSWnFN/N3znrBAQyCQ14LqsZ68gWap7+XyK71hA7sJnYi7/gvdMsXIaKbTscrarDXTIGANXlZlwogdj4TLvjtKx/Bd/YWW1f58YO4t7wCMm6I7jLJ7V7bOiiD5DQUrXjLE8udiKK1fyuhOPY2C3th/3ezYpHsKZdhzJ2IRyboGBHW6C1oVs/i86qbYqmlSSqb47R1Hrmir+SNBCt2BvFtOHO2en7dHWG3VxNYuW/sA6/g/eqL+Cef1eHk426Qw7HDXTN1STrjqR6MCdRt77I7LHXs3RLHSHdxIq0knv5f9Cy6TWURBjn3YtnHLvdltua4cKOhQlveYvAjMsJzLoKKxZBCRVhBk5sWZDEhbdoDIrHjx1tPwtPeEOnDLslZvL0qijLNgUpzy/k7sXnkbPq19iJGLh7t0ZXyOdCiPZ7APncGn5356eVStJAseNonO2VCb52dR6a2rX7Nk4iSnLXMqyKHbhm3Yg+8UKE0rN9F9kTGugU0eGbQqgapuVw13mFFKhNWJEwDUsfxSgYTjAvH9UXavd41Z+NHU8VBXWPnIrZUp+qR6douIZPIV4whWT5HOKBYVjvunaJugrJufLjoJxozzr/TkxvQYchO8CDL+7gldWHicZNdh5u4XtPVdE67TayL/8ICVfO2f1MziA34OK9V05s+1pVBJ+4eRoBj7wmkwaXyiaT13ZE+M+Lcgh6Or8cwrEtzL1riL3xBxRPEN/t38eYfHGPJyCQPaGBz1+InlOSVvzTM+s6LrFLyPGAT+jUPHw/en45enYhomIzOVf8B7GDW2lZ/Sx6yViyFt+DlYhTMHJ6av2OmcQ1agYE8km48k5bScBxHBJ5k8i/5wfYLTUIb6ow6buT1XHNkSRrtlW1a4snLeoCkykszen1yQGKgPOmFDNpRA6NrXHysjxke7VOVWiQpIGiOWrx9IYw71sQojync718x3Gwj+4kueNNlKxivNd9FTW7d/fIkklogIurfjzlU8i/7tNE923CTsZxRs7hB6+E2X54A6oi+NptYygMFeKftIiG1//R9lzfxAXkXvlRFE+QRNYwbBvM05zrdBwEMVceuM68AlvTFLxujUis/dncbne72XS9SRGQH3SRH+zh3SRP4VTbrEtSb4gnbf69Pszl5/iYXu7u1HOs+sOY298Ex8Z9/gfRys7p5ShT5HDcIBBVQ8Tyz0FfcA9VE+/gP/5SxfbDrQBYtsPvXzmCb/4tNL39VLvntW5bjh1povapn6HHzrwXfE8JuDXed1X7yQ5Tx+RRnNu9WTv9mebEcdXvRGx+Bv3ImtSaKEnqRZbt8MymViaWuLh00pn3CLLD9STWPklyw7PoUy7De/N3+iwBgewJDRqOA8mkRVV9+p4gR+siCFcRdqw1/XmWmRrGS0TAaH8vRgiIWw6m5eA1lE4VAu0M23aYMTaXb//HPI7UtBIKGAwr8OPSBtc1kaKAs2MZta/9ta3NKB5D4OrPklD63+Z30sDnOA5LtkfwuRTunBM87QJSJx4huXs5VsU2jKlX4rny8z02460rZBIaZIrz0q98Fk4twcgrxSgcRaLqpC2uFQ2hqGihQvC2T0C247DzcDN/fGYLjS0JLpszjCvnDcfn6plab6oQlOV6KRuEvZ/j9HgjdUsfateWqNwNDRWQOy5DUUmD2ZoDMerCFl+6MveUFREcy8Tcvw5z7yr00XNx3/YDFE+wjyM9YXBdeg5AqipQOzlt8lSPVRSBqqZ+lQVZLv7rrhlk+VNXNHMmFXLzhWOIKgGyrvgYrvLUrDA1mEfORXcTPbyTnGs/S1xpnwyONsb4+4s70DUV07J5bsV+Xll7CNHFUh+pfXocTNuhm1U9BixhWzjJ9LVHJ08gkaSesqsqwcZDcT59cQ5uPf2j3XEcrModxN/8I05rPb7rv4F70XszmoBA9oQyqrIxyrJNldi2w8KpJZTkeOjoc9oBKuqjLNtUgSIEC6cWU5yd2kqhujnOis2VtESSnDethPI8H+cMz+b+j84naTn43VrblUbMVYDvys+TL6IkLQfbsvCOmk+U9jfnFWwKkkf48rjtOEKhKX86v3yjmVdXH+LyOcNwd3LYLG7arNlRwxNv7MGlq9x1+XgmlGehDpFsZLpDeCfMJ7L9xAaDwvCgZheTzGBc0uBT1WzyyrZWPntJDtm+9NEKu7mG5NZXwUrivuA/0EondXCUzJBJKEMqGqJ883cr26YFv/j2Ae778DzKOxieOlIX4d7fr2ybuvzC2wf49n/MQ9cUvvl/K9q2nl6y5hBffd9sxhQH8Ogqng5mZSbRMfJyaKppOWU/2GjaT9O/v8fxm0BBsYRPLv4cf1qVQFc7l4CEgM376vnD01va2n76j3Xc+8G5DC84883SwcB0VLzzb0MNFRDdugy9YDj+eTcRd+X22P01SWqN2zy1Mczd87LSNqhzkvHUYtMj244tNr0A0c+2T5HDcRmgqgpvbqhoty7FcVKJSHtXL0PTFF54+0C7tTO27bBqaxXv7KlrS0DHPf76bpyz6GioqiC84SXafUo6NqHq9Xz4usl0dsG1jeCFlQfS2tfurEY5y+q9A0lcD6FMv4Gs27+N+5KPE/OWyCKpUo9JzYQLc94YD7NGeNp/r3IH8Tf+iNBcJy027V8JCGRPKGNMK71YpmnaaR9QjpNqfzfLdrA6WFxpWvZZXmULsNIHi9yaQzDH27ZR3pkoAvKyPew50n5Kck7APeQ+hFPbjXuObTcuST3n9R0Rsn0q104/UerKiYVJbnkFu7UR92X/iVbUvyfByJ5QBliWzeJzS9Nu1F8+b3jah7xl2Vwxb3jaMeZMLGTq6PQZMDcsHt3hfaWuxOabfvm7WgW+yed3OgEBOLbDdYtGoZ00fBf0GUwbc/rqC5Ikdc7WijhHGk0+tCiEcuzDxKrcQXzpX1AKxuC75Tv9PgEBCGcAfiLU1LRkOoRTys8PdCo+BzhU28pzK/ZjWQ5XLRjB8Hw/HY1U2Q4cqAnz/Ir9KEJw1YIRlOf5EKTuLb2wcj8tkSRXzhvBqOIA2hmGu84Uo4qJ1rD32P5CGv4ZV5AIjexyNQMhoKY5zt6KZnRNYWRxkJC3c+VDOvtzzCQZY88YKjHm53etMO/KN1YRi6VvpQJQF7Z4eE0z/315LqXZOo6VJLnlVeyGCjwXfRS1cMxZxdrTTvfaZRLqYV19sx6fWt2ZXsapHpuati063VPpbIyaKnAAy+r7t8hQ+WDqbTLGntGfklDScvjnqmaumOzjvHG+VMWDdU+i5o/Cff77EXrnyvT0pdO9dnlPKMO6MsR1qsemkkTPJ4p377kjSVLmLd0VpTxbZ9FYL1b1XhIbn8M1+5bUzLcBuPxBJiFJkqQB4lB9kj01Ce67Lh/r0DuYO5fiuezTaMXjMx1at8kkJEmSNAAkLYeXt7Zy97wsjCNrMQ9uwHvd11BCRZkO7azIJCRJkjQArN4XY0SeweTkZsxDm/Be93UUf+9uANkX5BRtSZKkfq4pYrHxcIxby49i7l+H99qvDIoEBDIJSZIk9XvL9kS5aKSDb++reK76PIo/N9Mh9RiZhCRJkvqx2haTQ/VJFrc8i/v8D6DmlGc6pB4lk5AkSVI/tvpAjAtzjuAdcQ76yFmZDqfHySQkSZLUT7XEbPZVx1kkNuCed0emw+kVMglJkiT1U5uPxDnXc4TQ/JsQhufMTxiAZBKSJEnqhxwcth2JMD9wGG3MvEyH02tkEpIkSeqHasM2ihVn7OyFCDF4P6oH7yuTJEkawA7WRJmiH0AfxL0gkElIkiSpX6qoj3FOmQ+huzIdSq+SSUiSJKkfqo0qjDtn4BYm7ayMJKGnn36aq666issuu4y///3vmQhBkiSpX8tWwviGTc50GL2uzwuYVlVV8cADD/D4449jGAZ33HEHc+fOZcyY/rUToCRJUiYVuaIItz/TYfS6Pu8JLV++nHnz5hEKhfB6vVx++eW88MILfR2GJElSv1YUHBqbHPR5EqquriY/P7/t64KCAqqqqvo6DEmSpH4tNzT4e0GQgeE427bbbUHrOE6Xt6Tt6l7tfa2/xwcyxp4iY+wZMsZ0paV5A+Lncrb6PAkVFRWxZs2atq9ramooKCjo0jFqalp6Oqwek58f6NfxgYyxp8gYe8ZQibGrCUVzefr9z6WzTvfa+3w4bsGCBaxYsYL6+nqi0SgvvfQS559/fl+HIUmS1K/5s0KZDqFP9HlPqLCwkM997nO8973vJZlMcssttzB16tS+DkOSJKlfM/wBnEwH0QcyMv3i2muv5dprr83EqSVJkgYG1WAoZCFZMUGSJKkf6uqErYFKJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY7RMB9AdiiIyHcJp9ff4QMbYU2SMPUPGOHQJx3GcTAchSZIkDU1yOE6SJEnKGJmEJEmSpIz5/+3dfUxT1x/H8XdHwYc4o2YiCxqIj9ElMjIzrZph0VTKpRSND+BSVOYDxojTPxAXo5nBiMYI4rbMROOzMZqABiLowiRTMRAWlcQsmcEBoozh3FZB0pb2/P4w9if57UEz/d1b/L7+u6cHzufeE/j2XMq5UoSEEELoRoqQEEII3UgREkIIoRspQkIIIXQjRUgIIYRupAgJIYTQjRQhIYQQugmpIlRUVMT+/fuDx263m1WrVmG32/n444/p6OjQMR2UlZWRnJyMzWbj5MmTumZ5XmdnJykpKbS2tgJQU1ODw+HAZrNRWFioczr44osv0DQNTdPYvXs3YLyM+/btIzk5GU3TOHz4MGC8jM/s2rWLvLw8wHgZXS4XmqbhdDpxOp3cunXLcBm//fZb5s+fj91uJz8/HzDedexTVAhwu91q8+bNavLkyaq4uDjY/vnnn6sDBw4opZQqLS1V69ev1ymhUj///LOyWq3qt99+U11dXcrhcKg7d+7olueZmzdvqpSUFPXee++pe/fuqe7ubpWQkKBaWlqUz+dTWVlZqrq6Wrd8165dU4sXL1Yej0d5vV6VmZmpysrKDJWxtrZWpaenK5/Pp7q7u5XValU//PCDoTI+U1NTo6ZOnao2bdpkuLkOBAJq5syZyufzBduMlrGlpUXNnDlTtbW1Ka/XqzIyMlR1dbWhMvY1IbESqqqqIjY2luXLl/dqr66uxuFwAJCSksJ3332Hz+fTIyI1NTVMmzaNIUOGMHDgQObOnUtlZaUuWZ535swZtm3bRmRkJAANDQ3ExMQwatQozGYzDodD15zDhw8nLy+PiIgIwsPDGTNmDE1NTYbK+OGHH3Ls2DHMZjO//vorfr8ft9ttqIwAv//+O4WFhWRnZwPGm+u7d+8CkJWVRWpqKidOnDBcxm+++Ybk5GSioqIIDw+nsLCQAQMGGCpjXxMSRSgtLY1Vq1YRFhbWq/2XX35h+PDhAJjNZgYNGsSjR4/0iNgrC0BkZCTt7e26ZHnejh07mDJlSvDYaDnHjRvH+++/D0BTUxMVFRWYTCZDZQQIDw+nuLgYTdOwWCyGu44AW7duZcOGDQwePBgw3ly73W4sFgtffvklR44c4fTp0zx48MBQGZubm/H7/WRnZ+N0Ojl16pThrmNfY6hHOVRUVLBz585ebaNHj+bIkSMv9PVKKd56S5+6GggEMJn+u9W7UqrXsVEYNeedO3dYvXo1ubm5hIWF0dTUFHzNKBlzcnJYuXIl2dnZNDU1Geo6nj17lnfffReLxUJJSQlgvLmOj48nPj4+eLxgwQKKi4v54IMPgm16Z/T7/dTX13P8+HEGDhzImjVr6N+/v6GuY19jqCJkt9ux2+0v3D8yMpKHDx8SFRVFT08PXV1dDBky5PUF/BtRUVHU19cHjzs6OoK3wIwkKiqq1wc4jJDz+++/Jycnh88++wxN06irqzNUxsbGRrxeLxMnTmTAgAHYbDYqKyt7rcz1znjhwgU6OjpwOp388ccfPHnyhPv37xsqY319PT6fD4vFAjz9ZR4dHW2ouX7nnXewWCwMGzYMgDlz5hhurvuakLgd91cSEhI4d+4c8PSHcMqUKYSHh+uSZfr06Vy/fp1Hjx7R3d3NpUuX+Oijj3TJ8nfi4uL46aefgrcdysvLdc3Z1tbG2rVr2bNnD5qmGTJja2srW7Zswev14vV6qaqqIj093VAZDx8+THl5OefPnycnJ4fExEQOHjxoqIyPHz9m9+7deDweOjs7KS0tZePGjYbKaLVauXr1Km63G7/fz5UrV0hKSjJUxr7GUCuhl7V+/Xry8vLQNI23336bPXv26JZlxIgRbNiwgczMTHw+HwsWLGDy5Mm65fkr/fr1o6CggHXr1uHxeEhISCApKUm3PIcOHcLj8VBQUBBsS09PN1TGhIQEGhoaSEtLIywsDJvNhqZpDBs2zDAZ/4zR5tpqtXLr1i3S0tIIBAIsWbKE+Ph4Q2WMi4tjxYoVLFmyBJ/Px4wZM8jIyGD06NGGydjXyJNVhRBC6Cakb8cJIYQIbVKEhBBC6EaKkBBCCN1IERJCCKEbKUJCCCF0I0VICCGEbqQIiZCUlZX1j/sEvkif2tpaUlJS/nG8CRMm/On3qqqqCm7373K5qKyspLW1tdf2NEKIvxbS/6wq3lzXrl17JX3+rdmzZzN79uzXPo4QfZWshETI2bx5MwBLly6lrq4Ol8uFw+EgNTU1uI3T833a2tq4fPky6enpzJ8/n1mzZlFUVPTS4xYVFTFv3jycTieXL18GoKSkhNWrV7+S8xLiTSQrIRFydu7cSUlJCUePHmXRokXk5uZis9lob29n4cKFxMTE9OozdOhQcnNzKSgoIDY2lvb2dqxWK5mZmS817siRI9m+fTs//vgjLpeLioqK13SGQrw5pAiJkNXY2IjH48FmswFP9++z2WxcuXKl199kTCYTX3/9NdXV1ZSXl9PY2IhSiu7u7pcaLyMjA4Dx48czZswYbty48epORog3lNyOEyHLZDL9z3NdlFL09PT0anvy5Anz5s3j9u3bTJo0idzcXMxmMy+7beLzz6oKBAKYzfIeToh/S4qQCElhYWFER0djNpu5dOkSAO3t7Vy8eJHp06cH+/T09NDc3ExnZyeffvopiYmJ1NbW4vV6CQQCLzVmaWkpALdv36alpYW4uLhXe1JCvIHkrZwISUlJSSxbtoyvvvqK/Px89u/fj9/vZ+3atUybNi3Yx+VysW/fPmbNmoXdbiciIoLx48czduxYmpubiYiIeOEx7927R1paGiaTib179+r2AEUh+hJ5lIMQQgjdyEpICODgwYOUlZX96WuffPIJqamp/+dEQrwZZCUkhBBCN/LBBCGEELqRIiSEEEI3UoSEEELoRoqQEEII3UgREkIIoZv/AP/kVwligiBHAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x432 with 3 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create a visualization with pandas df\n",
    "sns.jointplot(data=pandas_tips, x=\"total_bill\", y=\"tip\", hue=\"sex\", hue_order=[\"Female\", \"Male\"])"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
  },
  "kernelspec": {
   "display_name": "Python 3.9.10 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/sklearn.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating sklearn Modin Interoperability"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Logistic Regression example taken / adapted from https://www.ritchieng.com/pandas-scikit-learn/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import numpy as np\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# From https://www.ritchieng.com/pandas-scikit-learn/\n",
    "\n",
    "url = 'http://bit.ly/kaggletrain'\n",
    "train = pd.read_csv(url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pclass: passenger class\n",
    "# Parch: parents and children\n",
    "feature_cols = ['Pclass', 'Parch']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# you want all rows, and the feature_cols' columns\n",
    "X = train.loc[:, feature_cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# now we want to create our response vector\n",
    "y = train.Survived"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. import\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "# 2. instantiate model\n",
    "logreg = LogisticRegression()\n",
    "\n",
    "# 3. fit \n",
    "logreg.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_test = 'http://bit.ly/kaggletest'\n",
    "test = pd.read_csv(url_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# missing Survived column because we are predicting\n",
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_new = test.loc[:, feature_cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4. predict\n",
    "new_pred_class = logreg.predict(X_new)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# kaggle wants 2 columns\n",
    "# new_pred_class\n",
    "# PassengerId\n",
    "\n",
    "# pandas would align them next to each other\n",
    "# to ensure the first column is PassengerId, use .set_index\n",
    "kaggle_data = pd.DataFrame({'PassengerId':test.PassengerId, 'Survived':new_pred_class}).set_index('PassengerId')\n",
    "kaggle_data.to_csv('sub.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save train data to disk using pickle\n",
    "train.to_pickle('train.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read data\n",
    "pd.read_pickle('train.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# From https://scikit-learn.org/stable/modules/generated/sklearn.compose.ColumnTransformer.html\n",
    "\n",
    "import numpy as np\n",
    "from sklearn.compose import ColumnTransformer\n",
    "from sklearn.preprocessing import Normalizer\n",
    "ct = ColumnTransformer(\n",
    "    [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n",
    "     (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n",
    "X = pd.DataFrame(np.array([[0., 1., 2., 2.],\n",
    "              [1., 1., 0., 1.]]))\n",
    "# Normalizer scales each row of X to unit norm. A separate scaling\n",
    "# is applied for the two first and two last elements of each\n",
    "# row independently.\n",
    "ct.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction import FeatureHasher\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "X = pd.DataFrame({\n",
    "    \"documents\": [\"First item\", \"second one here\", \"Is this the last?\"],\n",
    "    \"width\": [3, 4, 5],\n",
    "})  \n",
    "ct = ColumnTransformer(\n",
    "    [(\"text_preprocess\", FeatureHasher(input_type=\"string\"), \"documents\"),\n",
    "     (\"num_preprocess\", MinMaxScaler(), [\"width\"])])\n",
    "X_trans = ct.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# From https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\n",
    "\n",
    "import numpy as np\n",
    "from sklearn.impute import SimpleImputer\n",
    "imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n",
    "imp_mean.fit(pd.DataFrame([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]]))\n",
    "\n",
    "X = pd.DataFrame([[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]])\n",
    "print(imp_mean.transform(X))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# From https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html\n",
    "\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "X, y = pd.DataFrame(np.arange(10).reshape((5, 2))), pd.Series(range(5))\n",
    "X\n",
    "list(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.33, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_test_split(y, shuffle=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Linear Regression example taken / adapted from https://github.com/chendaniely/2021-07-13-scipy-pandas/blob/main/05-models.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tips = sns.load_dataset(\"tips\")\n",
    "tips = pd.DataFrame(tips)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.get_dummies(tips, drop_first=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import linear_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. create the model object\n",
    "lr = linear_model.LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. fit the model object\n",
    "lr.fit(X=tips[[\"total_bill\", \"size\"]], y=tips[\"tip\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# look at the coefficients\n",
    "lr.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# look at the intercept\n",
    "lr.intercept_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tips_dummy = pd.get_dummies(tips, drop_first=True)[[\"tip\", \"total_bill\", \"smoker_No\"]]\n",
    "tips_dummy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr2 = linear_model.LinearRegression()\n",
    "lr2.fit(X=tips_dummy.iloc[:, 1:], y=tips_dummy[\"tip\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr2.coef_, lr2.intercept_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_data = tips_dummy[[\"total_bill\", \"smoker_No\"]].tail() # not really new data\n",
    "new_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# use the model to give predicted tip values\n",
    "new_data[\"predicted_tips\"] = lr2.predict(new_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(new_data)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/statsmodels.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating statsmodels Modin Interoperability\n",
    "###  Currently statsmodels is not completely interoperable with Modin. All the examples in this section are taken/ adapted from https://www.statsmodels.org/devel/gettingstarted.html or https://www.statsmodels.org/stable/index.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.api as sm\n",
    "import pandas\n",
    "import modin.pandas as pd\n",
    "from patsy import dmatrices"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Example with sm.OLS()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = sm.datasets.get_rdataset(\"Guerry\", \"HistData\").data\n",
    "modin_df = pd.DataFrame(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']\n",
    "\n",
    "modin_df = modin_df[vars]\n",
    "\n",
    "modin_df[-5:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df = modin_df.dropna()\n",
    "\n",
    "modin_df[-5:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=modin_df, return_type='dataframe')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = pd.DataFrame(y)\n",
    "X = pd.DataFrame(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mod = sm.OLS(y, X)    # Describe model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = mod.fit()       # Fit model\n",
    "\n",
    "print(res.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "sm.ols() is not interoperable with Modin currently."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Example with sm.ols(formula=)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df = pd.DataFrame({\"A\": [10,20,30,40,50], \"B\": [20, 30, 10, 40, 50], \"C\": [32, 234, 23, 23, 42523]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.formula.api as sm\n",
    "result = sm.ols(formula=\"A ~ B + C\", data=modin_df).fit()\n",
    "print(result.params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Replicating statsmodels workflow with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.api as sm\n",
    "\n",
    "df = sm.datasets.get_rdataset(\"Guerry\", \"HistData\").data\n",
    "pandas_df = pandas.DataFrame(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']\n",
    "\n",
    "pandas_df = pandas_df[vars]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df = pandas_df.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=df, return_type='dataframe')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = pandas.DataFrame(y)\n",
    "X = pandas.DataFrame(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mod = sm.OLS(y, X)    # Describe model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = mod.fit()       # Fit model\n",
    "\n",
    "print(res.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Example with sm.ols(formula=)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df = pd.DataFrame({\"A\": [10,20,30,40,50], \"B\": [20, 30, 10, 40, 50], \"C\": [32, 234, 23, 23, 42523]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.formula.api as sm\n",
    "result = sm.ols(formula=\"A ~ B + C\", data=pandas_df).fit()\n",
    "print(result.params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/tensorflow.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating Tensorflow Modin Interoperability\n",
    "## All the examples in this section are taken/ adapted from https://www.tensorflow.org/tutorials/load_data/pandas_dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import modin.pandas as pd\n",
    "import pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SHUFFLE_BUFFER = 500\n",
    "BATCH_SIZE = 2\n",
    "\n",
    "csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')\n",
    "\n",
    "modin_df = pd.read_csv(csv_file)\n",
    "modin_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "target = modin_df.pop('target')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "numeric_feature_names = ['age', 'thalach', 'trestbps',  'chol', 'oldpeak']\n",
    "numeric_features = modin_df[numeric_feature_names]\n",
    "numeric_features.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.convert_to_tensor(numeric_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
    "normalizer.adapt(numeric_features)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Replicating statsmodels workflow with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SHUFFLE_BUFFER = 500\n",
    "BATCH_SIZE = 2\n",
    "\n",
    "csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')\n",
    "\n",
    "pandas_df = pandas.read_csv(csv_file)\n",
    "pandas_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "target = pandas_df.pop('target')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "numeric_feature_names = ['age', 'thalach', 'trestbps',  'chol', 'oldpeak']\n",
    "numeric_features = pandas_df[numeric_feature_names]\n",
    "numeric_features.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.convert_to_tensor(numeric_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
    "normalizer.adapt(numeric_features)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/jupyter/integrations/xgboost.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstrating XGBoost Modin Interoperability"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## All the examples in this section are taken / adapted from https://xgboost.readthedocs.io/en/stable/python/python_intro.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xgboost as xgb\n",
    "import modin.pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_train = pd.DataFrame(np.arange(36).reshape((12,3)), columns=['a', 'b', 'c'])\n",
    "label_train = pd.DataFrame(np.random.randint(2, size=12))\n",
    "dtrain = xgb.DMatrix(data_train, label=label_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_test = pd.DataFrame(np.arange(12).reshape((4,3)), columns=['a', 'b', 'c'])\n",
    "label_test = pd.DataFrame(np.random.randint(2, size=4))\n",
    "dtest = xgb.DMatrix(data_test, label=label_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}\n",
    "param['nthread'] = 4\n",
    "param['eval_metric'] = 'auc'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "evallist = [(dtrain, 'train'), (dtest, 'eval')]\n",
    "num_round = 10\n",
    "bst = xgb.train(param, dtrain, num_round, evallist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bst.save_model('0001.model')"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/modin-scikit-learn-example.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Process STDOUT and STDERR is being redirected to /tmp/raylogs/.\n",
      "Waiting for redis server at 127.0.0.1:35043 to respond...\n",
      "Waiting for redis server at 127.0.0.1:49923 to respond...\n",
      "Starting local scheduler with the following resources: {'CPU': 4, 'GPU': 0}.\n",
      "\n",
      "======================================================================\n",
      "View the web UI at http://localhost:8889/notebooks/ray_ui93764.ipynb?token=23507892afd3d95e7604e7cd889b30382368ed888e79fc8c\n",
      "======================================================================\n",
      "\n"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import numpy as np\n",
    "import modin.pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>CRIM</th>\n",
       "      <th>ZN</th>\n",
       "      <th>INDUS</th>\n",
       "      <th>CHAS</th>\n",
       "      <th>NOX</th>\n",
       "      <th>RM</th>\n",
       "      <th>AGE</th>\n",
       "      <th>DIS</th>\n",
       "      <th>RAD</th>\n",
       "      <th>TAX</th>\n",
       "      <th>PTRATIO</th>\n",
       "      <th>B</th>\n",
       "      <th>LSTAT</th>\n",
       "      <th>PRICE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0.00632</td>\n",
       "      <td>18.0</td>\n",
       "      <td>2.31</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.538</td>\n",
       "      <td>6.575</td>\n",
       "      <td>65.2</td>\n",
       "      <td>4.0900</td>\n",
       "      <td>1.0</td>\n",
       "      <td>296.0</td>\n",
       "      <td>15.3</td>\n",
       "      <td>396.90</td>\n",
       "      <td>4.98</td>\n",
       "      <td>24.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.02731</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.07</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.469</td>\n",
       "      <td>6.421</td>\n",
       "      <td>78.9</td>\n",
       "      <td>4.9671</td>\n",
       "      <td>2.0</td>\n",
       "      <td>242.0</td>\n",
       "      <td>17.8</td>\n",
       "      <td>396.90</td>\n",
       "      <td>9.14</td>\n",
       "      <td>21.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0.02729</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.07</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.469</td>\n",
       "      <td>7.185</td>\n",
       "      <td>61.1</td>\n",
       "      <td>4.9671</td>\n",
       "      <td>2.0</td>\n",
       "      <td>242.0</td>\n",
       "      <td>17.8</td>\n",
       "      <td>392.83</td>\n",
       "      <td>4.03</td>\n",
       "      <td>34.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>0.03237</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.18</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.458</td>\n",
       "      <td>6.998</td>\n",
       "      <td>45.8</td>\n",
       "      <td>6.0622</td>\n",
       "      <td>3.0</td>\n",
       "      <td>222.0</td>\n",
       "      <td>18.7</td>\n",
       "      <td>394.63</td>\n",
       "      <td>2.94</td>\n",
       "      <td>33.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0.06905</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.18</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.458</td>\n",
       "      <td>7.147</td>\n",
       "      <td>54.2</td>\n",
       "      <td>6.0622</td>\n",
       "      <td>3.0</td>\n",
       "      <td>222.0</td>\n",
       "      <td>18.7</td>\n",
       "      <td>396.90</td>\n",
       "      <td>5.33</td>\n",
       "      <td>36.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0     CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD  \\\n",
       "0           0  0.00632  18.0   2.31   0.0  0.538  6.575  65.2  4.0900  1.0   \n",
       "1           1  0.02731   0.0   7.07   0.0  0.469  6.421  78.9  4.9671  2.0   \n",
       "2           2  0.02729   0.0   7.07   0.0  0.469  7.185  61.1  4.9671  2.0   \n",
       "3           3  0.03237   0.0   2.18   0.0  0.458  6.998  45.8  6.0622  3.0   \n",
       "4           4  0.06905   0.0   2.18   0.0  0.458  7.147  54.2  6.0622  3.0   \n",
       "\n",
       "     TAX  PTRATIO       B  LSTAT  PRICE  \n",
       "0  296.0     15.3  396.90   4.98   24.0  \n",
       "1  242.0     17.8  396.90   9.14   21.6  \n",
       "2  242.0     17.8  392.83   4.03   34.7  \n",
       "3  222.0     18.7  394.63   2.94   33.4  \n",
       "4  222.0     18.7  396.90   5.33   36.2  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv(\"data/boston_housing.csv\")\n",
    "\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "modin.pandas.dataframe.DataFrame"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features = data.drop(\"PRICE\", axis=1)\n",
    "labels = data[\"PRICE\"]\n",
    "\n",
    "type(features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "lm = LinearRegression()\n",
    "lm.fit(features, labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXmcXVWV77+/qlxIhakC5iEUhOAEiiGkSQs2tAIOqAimIYqIttq0tO/52vGlDU9awpNu0Girrwd9KAqKIpMWUyvYQDtgMySEgBHSrULAYgqQYkoBlar1/jjnVE7dOufec4dz77n3ru/nU5+69wx7rzPcvfZea+21ZWY4juM4vUtfuwVwHMdx2osrAsdxnB7HFYHjOE6P44rAcRynx3FF4DiO0+O4InAcx+lxXBF0EJKOkPSHBs7/uqS/baZMCXWYpJel7DtZ0nV1lrtS0oWNSecUlWY+X0nzJT0jqb8Z5fUCrghajKT7JI2FL+rDks6XtGMO9XxA0i/j28zsw2b2uWbXlRUz+56ZvbnV9XaCEgnfgxfC9+IJST+VtH+75SoCYQdoMrw3T0vaIOmDaceb2f1mtqOZTbRSzk7GFUF7ONbMdgQOAhYDp7VZHqcYfCF8L4aAEeC8NstTJB4M783OwKeBb0h6VflBkma1XLIuwBVBGzGzh4FrCRQCAJK2l/RFSfdLeiQ05wwknS9phaTfhb2k30j6s3D7K4GvA68Ne1Gj4fbzJZ0VO/9Dkn4b9kCvlLRnbJ9J+rCk/5I0KumfJSnc9zJJP5P0pKTHJF1cJtobU86bNkoJ6/iopN+H5aySVOmdnC3p4vB6b5e0KFbWnpIul7RJ0r2SPhpufwvwv4ETw3uxTtKRku6KnftTSbfFvv9C0tJK5Yb7+mLP4HFJl0jaNdy3ILy+94fP8jFJn6lwbVOY2RhwCdPfiz5Jp0vaKOlRSd+RtEts/3GS1of3/N/DdyDad5+k5ZLulPSspPMk7S7px+G9/DdJc8NjZ0u6MLyeUUm3Sdo9Sc609y/c9wFJvwzf5c3hvXtrbP++4Tv0tKSfAi/KeG/MzIaBzcCrYvf5FEn3AzfEts0K69pV0rclPRjKMhyT4+2S7giv9VeSDswiR9dhZv7Xwj/gPuCN4ee9gLuAr8b2fxm4EtgV2Am4Cjg73HcE8IfYse8E9iRQ6CcCzwJ7hPs+APyyrO7zgbPCz0cBjwF/BGwP/CPw89ixBlwNDALzgU3AW8J9FwGfCeudDRye8bxpMoXH3hhe63zgP4G/TLlvK4FxYBlQAv4XcG/4uQ9YA3wW2A54CfB74OjYuRfGyhoAniNofErAIwQ98J3CfWPAbhnK/Rhwc/gctwf+H3BRuG9BeH3fCMtcBDwPvDLl+uLPZgfgu8C62P6/AH4byrAj8EPgu+G+V4TP/k3h9fxNeOx2sXfuZmB3gtHGo8DtBKPR2cANwBnhsX9F8M7NAfqBg4GdU2Su9v6NAx8Ky/nvwIOAwv3/AfxDeN9eBzwdf0Zl9RxB+N6Hdf1ZWPZ+sfv8nfC+DcS2zQrPuQa4GJgb3p/Xh9sXh/fikFDG94f3avt2txMtb5faLUCv/YUv2jPhi2/A9cBguE/hj+mlseNfC9wbfp76QaSUfQfwjvDzB6isCM4jMEVE+3YMf1wLwu/G9Ab+EmBF+Pk7wLnAXgkyVDpvmkzhsW+Jff8fwPUp17YSuDn2vQ94CPjT8Id8f9nxpwHfjp17Ydn+XwDHA4cC14VyvgU4ErgzPKZauXcDb4jt2yO8h7NijdFesf23Au9Oub7zCZTTKDBJoOQOjO2/Hvgfse/7xer6W+CSsnszAhwRe+dOju2/HPha7PtfA8Ph578AfhWvu4Z3u/z9+21s35zwfryYQOlvBXaI7f9++TOK7TsivCejwBNhPe8O90X3+SWx46Nts8JnMgnMTSj3a8DnyrZtIFQUvfTn9rT2sNTM/k3S6wl+AC8ieMnnEfxg1oTWFAiUQ2L0g6Q/Bz5J8OJD0JhnGmIT9ORuj76Y2TOSHifoMd4Xbn44dvyWsHwIepyfA26VtBn4kpl9K3Zs2nlJPBD7vDGUq+qxZjapIIJqT4If/Z4KTWAh/QSNfRo/I1Ss4efNwOsJeu0/C4/Zp0q5+wA/kjQZ2z9B0POOqOVefNHMTpc0H/gJQWN/Z7hvT4L7E7GRoKHbvXxfeG8eIHiWEY/EPo8lfI/k+i6wN/ADSYPAhcBnzGy8XNgM79/UtZvZlvCdjo7ZbGbPll3P3uV1xHjQzPaqsP+BlO17A0+Y2eaEffsA75f017Ft21H5HexK3EfQRszsZwQ9wS+Gmx4j+FEeYGaD4d8uFjjJpiFpHwKzw/8EdjOzQeDXBIoDgsaxEg8S/BCi8nYgMIeMZJD7YTP7kJntSWBK+BelhIxmIP7jnx/KVfXY0JewV3j8AwSjpsHY305m9rZI5ISyIkXwuvDzzwgUwevZpgiqlfsA8Nay/bPNrOo9rISZ3U9gdvqqtvmHpj0vtvWqHynfF/pk9ibDs0yoe9zMzjSzVwF/Arwd+PPy4zK8f5V4CJgbvnPx62mEtPf9AWDXUKkl7fu7suc3x8wualCWjsMVQfv5CvAmSYvMbJLgx/VlSf8NQNKQpKMTztuB4OXfFB73QeDVsf2PAHtJ2i6l3ouAD0o6SNL2wN8Dt5jZfdUElvROSVHvbHMox2SFUyqxXNJcSXsTNH7ljuc4B0s6PnQCfpyg934zgcnlaUmfljQgqV/SqyX9cXjeI8ACTXdE/4qgx/0a4FYzW0/QmB4C/Dw8plq5Xwf+LmwUkTRP0jvqvA/TMLOfEjTwp4abLgI+ETpZdyR4Xheb2VYCs9Yxkt4gqQR8Krw3v6q1XgWO9IUKYvCfIjA/JT3bau9fpWvbCKwGzpS0naTDgWNrlTVjXQ8BPyborMyVVJL0unD3N4APSzpEATtIOkbSTnnIUmRcEbQZM9tEYHP/bLjp0wSOvpslPQX8G0GDVX7eb4AvETjdHgEWAjfFDrkBWA88LOmxhPP/jcC2fDlBD+2lwLsziv3HwC2SniFwbH/MzH6f8dxyriBwyN5B4NSrFDJ5BYFTcjPwPuD4sAc7QdBzPYjAtv4Y8E0giqq5NPz/uKTbAUKzxO3AejN7Idz/H8BGM3s0PKZauV8Nr/86SU8TKKVD6rsNiawC/iZU1N8iMNv8PJTlOQLbPma2AXgvgcP/MYJG9djYddXCi4HLCJTA3QSjo++WH5Th/avGewju1RPAGQS/gbx4H4FCu4fAOfxxADNbTeDM/ieCd+q3BL6NniPy4DtOy5FkwMvN7LftlsVxehkfETiO4/Q4rggcx3F6HDcNOY7j9Dg+InAcx+lxOmJC2Yte9CJbsGBBu8VwHMfpKNasWfOYmc2rdlxHKIIFCxawevXqdovhOI7TUUjaWP0oNw05juP0PK4IHMdxehxXBI7jOD2OKwLHcZwexxWB4zhOj5Nr1JCk+wgWYJkAtprZEgVL+V1MkMP8PuBdKbnCnQIwvHaEVddu4MHRMfYcHGD50fuxdPFQ9RMLTLOvqZX3qJl1tfrZNqu+qJyR0TH6JSbMpv7PKfUxtnUSM+iXOOmQvTlr6cKG6y8/98j953HjPZua+hzi1zPU4t9arjOLQ0WwxMwei237AsFCEedIWkGwctCnK5WzZMkS8/DR1jO8doTTfngXY+MTU9sGSv2cffzCjlUGzb6mVt6jZtbV6mfbrPqSyqnGew+dz5J9dq27/ix1NvM5NFpmHElrzGxJtePaYRp6B3BB+PkCYGkbZHAysOraDTNe0LHxCVZdu6FNEjVOs6+plfeomXW1+tk2q76kcqpx0S0PNFR/ljqb+RwaLbMe8lYERpCrfY2kaIGN3cPFIiBYym73pBMlnSpptaTVmzZtyllMJ4kHR8dq2t4JNPuaWnmPmllXq59ts+qrR74Js4bqz1pnM59DI2XWQ96K4HAz+yPgrcBHYisDAWCBXSrRNmVm55rZEjNbMm9e1RnSTg7sOThQ0/ZOoNnX1Mp71My6Wv1sm1VfPfL1Sw3Vn7XOZj6HRsqsh1wVQbR2a7ji048IlgV8RNIeAOH/R/OUwamf5Ufvx0Cpf9q2gVI/y4+esWBax9Dsa2rlPWpmXa1+ts2qL6mcapx0yN4N1Z+lzmY+h0bLrIfcoobChan7zOzp8PObgf9DsLTf+4Fzwv9X5CWD0xiRk6qbooaafU2tvEfNrKvVz7ZZ9cXLqTVqqN76k2SPooYiGeL2/FquKe16uiZqSNJLCEYBECic75vZ30najWCx7fnARoLw0ScqleVRQ47jJNHO8OZOiKrLGjWU24ggXMx8UcL2x4E35FWv4zi9QXlDPDI6xmk/vAuorVdeqfxKSqZSJFJRFEFWfGax4zgdSZ4hsJGSGRkdw9imZIbXjkwd001Rda4IHMfpSPJsiLMomW6KqnNF4DhOR5JnQ5xFyXRTVJ0rAsdxOpI8G+IsSmbp4iHOPn4hQ4MDCBgaHCiUo7gWOmKpSsdxnHLyDIFdfvR+iRFB5Upm6eKhjmz4y3FF4DhOx5JXQ9yNc2gq4YrAcZzCUYT0593S28+CKwLHcQpF3vMDnJm4s9hxnELRjenPi44rAsdxCsPw2hFGumiiVqfgisBxnEIQmYTS6MSJWp2C+wgcxykElVbrKg/dLIIzuZtwReA4TiGoZPqJT9RyZ3LzcdOQ4ziFIM30MzQ4kDnrp1MfrggcxykEWVNGtDLr5/DaEQ475wb2XXENh51zw7Tso92Em4YcxykEWWfz7jk4kBhZ1Gxnci+ZoFwROI5TGLLM5s2aB6hRumnhmWq4InAcp6NoVR6gblp4phquCBzHyZU8Qj1bkQeoVSaoIuDOYsdxciPLko/1lpu3E7ebFp6phisCx3FyI83OvvLK9XWXmZdyKaebFp6phpuGHMfJjTR7+ujYOMNrR+pqVFvpxO2VVNQ+InAcJzcq2dM/fvEddZl1esmJ2ypcETiOkxvV7On1mHXyXLS+nF6ZUOaKwHGc3Fi6eIi5c0oVj6k1PUSrnLit8kUUAVcEjuPkyhnHHjCj4S5nZHQsc8+7VU7cXspp5M5ix3FyJT4BLG3RGcHUviypHFrhxO0lX4SPCBzHyZ2li4e4acVRfOXEg2aMDgRY2fFF6Hm30hfRblwROI7TMpLMOuVKIKLdPe8kX0SpXzz7/Naucx67achxeoSirOpVbtY57JwbCpnKoTyn0eCcEs88t5XRsXGgu7KR+ojAcXqAIkfAFDmVQ2TSuvecY5iz3SzGJ6ePX4pgwmoGrggcpwcocgRMp6Ry6GbnsZuGHKcHKHoj1gmpHLo5G6mPCBynB+ilCJi8KLIJq1FcEThOD9DNjVir6BQTVj3kbhqS1A+sBkbM7O2S9gV+AOwGrAHeZ2Yv5C2H4/QyrVrVq9vpBBNWPbTCR/Ax4G5g5/D754Evm9kPJH0dOAX4WgvkcJyeptMbsaKEv3YjuZqGJO0FHAN8M/wu4CjgsvCQC4ClecrgOE7nU+Tw124gbx/BV4C/ASbD77sBo2a2Nfz+ByBRpUs6VdJqSas3bdqUs5iO4xSZIoe/dgO5KQJJbwceNbM19ZxvZuea2RIzWzJv3rwmS+c4TidR9PDXTidPH8FhwHGS3gbMJvARfBUYlDQrHBXsBfjYznGcinRzDH8RyG1EYGanmdleZrYAeDdwg5mdDNwILAsPez9wRV4yOI7THTQz/LVXVh2rhXbMI/g08ElJvyXwGZzXBhkcx+kgmhXD707nZGSWlgS2OCxZssRWr17dbjEcx+lw0jKdDg0OcNOKo9ogUb5IWmNmS6od57mGHMdJpBvj9t3pnIynmHAcZwbdakLxnEvJuCJwHGcG3Ri3P7x2hC0vbJ2x3XMuuWnIcXqOLCafbjOhRCOccuU2OFBi5XEHdLzJq1FcEThOD3H68F187+b7p9YJTltucZeB0tSSjHF2GSi1QsymkzTCAdhh+1k9rwTATUOO0zMMrx2ZpgQikkw+UnIZaduLTreNcJqNKwLH6RFWXbthhhKIKG8QR7fMHA1U2l503ElcGVcEjtMjVOr9ljeIaQ2kAYv/z3UdFz3kC/NUxhWB49RIp6YoSGvcBTMaxOVH70epP9kOtHnLOB+/+I6OUgjdvLpYM3BnsePUQHn0SZqztYgsP3q/GZEzAk4+dH6y7FWSDmzeMj517VD81c86fWGePHFF4Dg1UCm+vuiNTC3LVa66dgPjk9XTz4yNT3DmVet5bnyyI5WjE+CmIcepgU6PPlm6eIjlR+/HnoMDPDg6xqprNySad2q5ns1bxrtu8lmv4SMCx6mBTs+Ln9W0lXadtdCocuzGXEdFxUcEjlMDRY8+qebIzpo6Iuk6a6UR5dituY6KiisCx6mBIkefZGk803rpI6Nj0xRH/DrroVHl2I25joqMm4Ycp0aKGn2SxZE9OKfE5pRJYeVmoqWLh1i98QkuvPn+qnUPlPrYdYftm2bG6XRfTKfhisBxuoQsjWe1dajKFcdFtzyQqe6tk9ZUG36n+2I6jaqmIUmvkHS9pF+H3w+UdHr+ojmOUwtZ0ig8mZBIrpy44pjIuILh+IQ11WxTdF9Mt5HFR/AN4DRgHMDM7iRYjN5xnAKRpfHM0qOOH9NfQ5a5ZpptiuyL6UaymIbmmNmtmv5CzFzdwXGctpJlwljS7OI45YrjpEP2zuQjgOabbYrqi+lGsiiCxyS9lHDCuaRlwEO5SuU4Tl1UazyjfWdetX7KaSyCH/dQguI4a+lCfnT7CM++kKw4IpLMNj4PoHPIogg+ApwL7C9pBLgXeG+uUjmOUxdZGt/VG5+Ylk7a2NaQJzXUW6oogW2lTJdj+WXrGJ8Ito+MjrH8snWAp50oIlUVgZn9HnijpB2APjN7On+xHMeplSyzhrMsTrPq2g2MjI7RLzFhNvW/EmPjkyy/dFtDf+ZV66eUQMT4hHHmVetdERSQLFFDfy9p0MyeNbOnJc2VdFYrhHMcJztZJmFVWpwmUhxR2GbU+GeOHJrcFjmUNlchbbvTXrJEDb3VzEajL2a2GXhbfiI5Tm9T73oHWeYRVIrs6ZdSncgQ+BLqlcEpNll8BP2Stjez5wEkDQDb5yuW4/Qmta53EPcJ9KWYcOLRPGkTtUT1nn+WcUFU1+BAidGEOQuDA6UMpTitJsuI4HvA9ZJOkXQK8FPggnzFcpzepJYcO+W5hdIa8mef3zo1qkiaaxAtTtNoI13q01Tk0MrjDqDUpxn7Vx53QEN1OPmQxVn8eUl3Am8IN33OzK7NVyzH6U1qybGTpDSSGB0bnzGqSIosuubO9KjwgVI/s0t9qTb+wYESK487YKr8WhbBcdpPplxDZvZj4Mc5y+I4PU8tOXZqscfHcwilzTUYreDIHRufYPtZfZT6NS0aaKDUnzjj1+cQdBapikDSL83scElPM908KMDMbOfcpXOcHuPI/efNCO9My7FT6+Ix1RRHtfJGx8Yp9Ym5c0qMbhmf0cBHjf/I6NjUJDXwpSs7gVQfgZkdHv7fycx2jv3t5ErAcZrP8NoRLl8zMqPXdcLByT34WheP2aWKDyBLeeOTxpztZnHvOcdw04qjpimBeOhppXkKTvGo6CyW1C/pnlYJ4zi9TJLN34Ab79mUeHyUmG3unGxO3mdf2FoxFLU80Vsa5SOL4bUjfOqSdVX9FR5aWlwqKgIzmwA2SJrfInkcp2fJunpYnKWLh1j72TdnUgblqaKT5issXTzETSuO4t5zjkldnSzur4hGAlkmnflaAsUli7N4LrBe0q3As9FGMzsuN6kcpwepZKNPSxcROWSzzf3dpmyyzFdIylRa7q/IGrnkawkUmyyK4G9zl8JxnKopouORP+UNeVaiXnm1+QqRgtlloMTsUl+icxgqm3sqZTV1ikVFRSBpKfAy4K5a5w5Img38nGAW8izgMjM7Q9K+wA+A3YA1wPvM7IV6hHecbiIee19pZBAdU6sSiPfKK5mh4gpmdGycgVI/Xz7xoMSGPG0U0y/xpXct8sa/Q0j1EUj6F+ATBA325yTVOjJ4HjjKzBYBBwFvkXQo8Hngy2b2MmAzcEpdkjtOF7J08RDLj94vdWUwEZh1agkbBdhhu23x/sNrR+hLKT8p31CliJ+0VdFcCXQWlUYErwMWmdmEpDnAL4DPZS3YzAx4JvxaCv8MOAp4T7j9AmAl8LXaxHac9pHnZKlqzlcjGA1kSQ0dJ1pT4PThuxLTUEPQgKeNMtJGED6DuDuopAheCKOGMLMtUg2Ll4ZI6icw/7wM+Gfgd8ComUVLXf4BSHxjJJ0KnAowf74HLTnFoNakcLVy5lXrM4VhZlcBAQasvHI9T46NJ57bL3H28QtTzVKVIn58ScnOp5Ii2D/MMQTBiPSl4fdoZvGB1QoPFclBkgaBHwH7ZxXMzM4lWBmNJUuW1PreO04uVHKy1tMYxkcXg3NKmfL1R41ymm0+baSQlA00YsKMT1x8B4NzSpT6xPjk9DQSHvHT3VRSBK9sViVmNirpRuC1wKCkWeGoYC8gW7J1xykAtSSFq0b56CKLEij1Bxk+V298InFR+UNfMpdf/e6JmkcMEIwaNm8Zp9QvBgdKPDmWHCnkdB+pisDMNjZSsKR5wHioBAaANxE4im8ElhFEDr0fuKKRehynldSSFC6NeE6eWhmfMD51ybrUXv99j49x8qHzE/MVVcoeWl7H089tTY0UcrqPLOsR1MsewI2hOek24KdmdjXwaeCTkn5LEJF0Xo4yOE5TSYuSyWo6Kc/JUw+VnMQPjo5x1tKFnHzo/KnIo36JEw4e4oxjD8icm2jCjNN+eFfm1dGcziZTGup6MLM7gcUJ238PvCaveh0nT+qNkmlkFFALew4OMLx2hItvfWDamsMX3/oAS/bZdcohHMm+5YWtqaOERnwfTmchqyEErV0sWbLEVq9e3W4xnALRSfnua5kFXOoXO2w3q6Jjt9K5q5YtYuWV61OXibzjjDfXJJuAe885pmZZnGIgaY2ZLal2XNURgaS7mJlV9klgNXCWmT1en4iOUx95h3A2m6yzgMtTMSxYcU1tFYW/0jQlkrQ9qivN7+CJ4nqDLKahHwMTwPfD7+8G5gAPA+cDx+YiWRPopF6jk51mh3DmTbWIorRVvoZqXHhmfNLqyvkf1VstwZzTvWRxFr/RzE4zs7vCv88ArzezzwML8hWvfsoX9o56je786nyaGcLZCir1qocGBxKVAFBXIzwyOsacUvrPOu39L1+LoJJcTveRZUTQL+k1ZnYrgKQ/BqLQg63pp7WXTus1OtlpRghnoySNNiHZiZyWzjnLWr8DpT7Gxiczy9UvsX2pny0p51R6/32GcO+SRRH8JfAtSTsS+I6eAv5S0g7A2XkK1wid1mt0spMlT36eJPkoll+6DsTUwu5JfotqZsqkckv9Spzpm+ZzmDCruAi9v/9OElUVgZndBiyUtEv4/cnY7kvyEqxRitBrdPKh3YnOkkab8YY6Ij4CzdLbTix3wthhu34mxyeZMJuaE3DjPZtS/QcSpAUDStuc0IMDJVYed4CPApzqPgJJ20t6D/AR4GOSPivps/mL1hiNTvxxiku7gwBq6VVnOTZaMjKtYX/2hYlpcwIuXzPCkfvPS50clqCTEveNjo2z/NJ17jdzMjmLrwDeQeAPeDb2V2jc+dWdFCEIoJZRZbVj65lpPDY+wdXrHmJ2BacwBL3/qc8px9QbaeR0F1l8BHuZ2VtylyQH3PnVfRQhCCDJR1Hq0zQfAWQbgdaz0hhUziQ6hcF94WSwfSvMSXC/gZNlRPArSQtzl8RxMlCEIICk0eaqdy5i1bJFNY9AK8k9NDjA4ECpbjnjo5FKIxP3mzlZRgSHAx+QdC/B8pOZ1yNwnGZTlCCAtNFmraOStOsZGhzgphVH1b1IfZSuOmL50fux/LJ100YsEIxk3G/mZFEEb81dCsfJSLtDRyPKHdZH7j+PG+/ZVLMDu9r1JEVIVUoUBzB3Tokzjp0eDRR9PvOq9VPnetSQE5GqCCTtbGZPAU+3UB7HqUi7QkfLVxJ75rmtUyGjI6Nj0xaJSct9NLx2JLEhPvv4hdO2bz+rssX2mAP34PI1I9OUhwhSDZXnK4pTyWfW7kgsp72kZh+VdLWZvT00CRnTAw/MzF7SCgHBs486+VOpIazXPNMvMWk2NWK46NYHmEiI7ZxT6mN8wqbNRYga9rllSgeCEUN8LkF0bHz/2ccHbr0sjXvS9aXNfHY6i6zZRz0NtdPTlPfSI+INYaUY/3YR+RDSZBscKPH81slMjXtaGVEdTueSVRFkmVB2WJhOAknvlfQPkuY3Q0jHaSdRTzjJ3j42PsHKK9dz+nBjq4nlRSRTWtTR6Nh4aphtOUWIxHLaS5bw0a8BWyQtAj4F/A74bq5SOU4LqBbDPzo2nrhAfBGIlqGsNVoqqXFPK8PDSnuHLFFDW83MJL0D+CczO0/SKXkL5jh50aplI/Nkwox9V1zD4JxSYlK6tIXqo6UsyyOeyp3Pno6lt8iiCJ6WdBrwXuB1kvqA+me5OE4CrYpaqdfxW0QMZjT2USQSJC80c+T+82ZkOL3w5vsZKPUxd06J0S3jHjXUg2RRBCcC7wFOMbOHQ//AqnzFcnqJVi49WW9Kh07h+a3BOgRpYbZp1x+seSC+fOJBrgB6EI8aqoLHV+dP1qiVtGdRyzPad8U1Mxbg7jYqRftUu36PFOoumrl4/dNsC1PejsAs9IyZ7dKYiMWn0xZJ71SyRK2kPYvVG5+YZt+u9ozSUjp0E5Wifapdv0cK9SZVo4bMbCcz29nMdgYGgBMIIom6nkqZLp3mkSVqJe1ZXHTLAzU9oyP3n9egtMWnUrRP0jodWc91upcsPoIpLLAjDUs6A1iRj0jFweOrW0OW/EFpvdiJFNNm9IyG146w8sr12dI2dwHVon2Scg5lPdfpXrKYho6Pfe0DlgDP5SZRgShKpstuJ0v+oH4ptdFPIgqTXH7pusRlJLuRfilTWogo55D7v5yIqs5iSd+Ofd0K3Ad8w8wezVGuabTLWew5WIrDggoLq5Qv5h49o06fK1APgrod6U730TRnsZl9sDkidR7tXiS9CBSo2fxuAAAcaklEQVSlIRmqkLc/CoscGR2jX2JsfCLR9NELxJfvrNWR7hSHVv/usowI9gL+ETgs3PQL4GNm9ofcpCrDk861hyKNiKrJUoSJYpH5qlYzVjlfOfEgPnXJutQySv0Co6rJK00ODxEtNs383TUt6RzwbeBKYM/w76pwm9PlFClqKml5yPgPowgTxaJGtxElAMG1Vipj1bJFrHrntmUxq8lTjgc7FJt2/O6yRA3NM7N4w3++pI/nJZBTHIoWNVVpYZVuadzmzgmyt1QyhUX3IPqfNiEvbUTgwQ7Fph2/uywjgsfD9NP94d97gcdzk8gpDHlnpRxeO8Jh59zAviuu4bBzbmB47UjdZWTpg8+dU6KvUhe6zZT6xRnHBnmCkuL9S/3i2ee3zrhfSccOlPo56ZC9E7d7iGixaUc22CyK4C+AdwEPAw8By4CedSD3EmkNTDMaksgOOjI6Ns3BWYsyiJdRjYFSP8+PT1DUSNJ+iVXLFk3r7cdNYXPnlMCC1Njl9yvNbHbW0oUVzWlOMcnzd5eG5xpyKpJX9EIzVsWqtHLY3DklzODJsW3ZND9+8R0NyZwXWRyBvopYb9Gs313D4aOS/hHSR9xm9tGapXI6jkp2+UaoZAfN+iNIK0PA2s++Gdj2g/pEAZSABHvuMjAV5jphNhX+CkFjn3bNRfPXOPmS1+8ujUrO4ngX/EzgjFoKlrQ38B1gdwKFcq6ZfVXSrsDFwAKCyWnvMrPNtZTtdD6Dc0qJcf6Dc0qZE/2lzfzuk1iw4hr6RKFMQWYk9t6zJDf0We5OnqT6CMzsgugP2Bz/Hm6rxlbgU2b2KuBQ4COSXkWQo+h6M3s5cD09kLPImc7w2hGeeW5r4r5a1tpNS6AWRcoUSQlAYMYpZ3jtCJ+6ZF3Va26H3djpHbImnav5J2VmDxE4lzGzpyXdDQwB7wCOCA+7APh34NO1lu90Lquu3ZA6GSrNZZVkAimf+d3X4ESuvFmw23RFEI0EssT7+yx3J09qyj5aL5IWAIuBW4DdQyUBQSTS7innnAqcCjB//vz8hXRaRj127bgJJMmHABTWGRxx0++e4PThuzhr6UKg+iS4crNPq+3GTu+QahqS9LSkpyQ9BRwYfY62Z61A0o7A5cDHzWzaeWFa68TukJmda2ZLzGzJvHndn0O+l6jVrh03gSSFnS6/bB2fLLgSiLjolgemPlcKe3Wzj9NKKvkIphakMbNZsc87hYvUVEVSiUAJfM/MfhhufkTSHuH+PYCWZTF1ikG1xVHiZEklMT5hTDZdynyIzEDDa0dS00NkTSftOM0iN9OQJAHnAXeb2T/Edl0JvB84J/x/RV4yOMUk3qhXmwxWHmXT6eGSfao8/0HAl961yJWA01Ly9BEcBrwPuEtSNG7/3wQK4BJJpwAbCWYtOz1G3N79qr/9MVvGZ/bpo7w7cTp+zWGrbBIqrqvb6WaypJioCzP7pZnJzA40s4PCv381s8fN7A1m9nIze6OZPZGXDE5n8PfHHxikVo4Rz7sTp4hrDvdXSGBUvieLCavWVBuO0ygtiRpyWktRFpPJSi2hkTfes6nV4lVkTqkvcTTTCNEcgiI/M6e7cEXQZWSZpdoKGWpVRFlDI4vmI6imBOo19RTtOmuh0zoiTo6mIac9tHsxmWZkFa1Udp8KnEe6iXRq6og8n7+TH64Iuox2JyfLSxFVm4VbROpVWUlzCJqxdkMraHdHxKkPVwRdRjsWtYiTlyKqdynKUp+moo8aWZSmnlNPPnR+5vkSEUlrBnRSL7vdHRGnPlwRdBntTk6WlyKqpyERcOJr9uaMYw9gaHCgoSR0BgwOzAxnTWNocGBqYZj+jOYsEURFldvTO6mX3e6OiFMf7izuMtqdnGz50ftNc1ZDcxRRPfMHDLh63UNcfNsDjE+0zqQUv97ovpffkyQMuPDm+7l63UPTFtTppF52Xs/fyRdfocxpGlG0SNLCK40qovJoqKIxNDhQUfEOrx2pKyneQKmf2aW+xLUbonqLFpXjUUPFoeEVyhwnIssPu7yhnjCb6gk2oxGoJS1FPYigRz43ZcGcSiQtF5mWIbVWxsYn2H5WHwOl/kQl2I7w4Gp4ltTOw30ETkWyOipbYcdeuniIm1YcxX3nHNOU8iLb/dDgACcfOp+hwQFGqyiBLP6XtHs2UKrv5/bk2PjUIvRJFNVf4HQOrgicimRt4Jtpx84SKlmL4zaJwYESvzv7bXzlxIN49vmtXHjz/VMNd6Vzzj5+4bS6Zyc07mn3bHaNEUQRew4OTCnBNLdzEf0FTufgisCpSNYGvlnRIllGIKcP38XoWG3mmzilPrHyuAOm6spSVnQOwPNbt80m3rxlfIZ8afes2mgjifIRh0flOHngisCpSFoD0ydNa/yaFbZabQQyvHaE7918f01lxhkaHGDVO4M0z1nmJijDOeUjpEqNdZp5p7zOSNZoTkE0ShoZHZsxKvCoHKdR3FnsVCQpHBACZ3DcSdmssNU0R3C0fdW1G+rO3yOmr29QzZyS5ASuJh8E92z5petmrMv8YDjKiRzTaVhZ3eWO+HgZ/dI0RdQOJ61HCXU+rgicikQ/6E9dsm5GeofyLJnNiBbpT1mAPnLsNmIL36XMr1BpbsJAqZ8j95/HYefcMK2BqybfFAnGfIv9r6YM4teZNAqJyohkaVf0UBGSHDqN46YhJ5G4w3bVtRtSc/w020mZVk+0vRFb+OjY+DTnc9qSmXPnlDjh4CEuXzMyzVfx8YvvqChfVO6qazdUncAW9frTTEXx60y7x+U1tCN6qJNmPTvp+IjAmUFSLy+tB9sMJ2XctJDW444azDRTVVaSeqxJ8f5JI6BqROVmVY7RaKT83iY5iLPOnWh19FAnzXp20nFF4MwgzRSRRKMrhiVNRCsnKWXDmVetnzHxq5q5JSJu0io3ZzWS5TQqt9Z0GNVs/knKr5JibqXNPu1aPYqps3DTUM50SvrgOLU0Yo2uGFYtcicpGyfAU2NbZxwbNahZSOux1pvlNGJkdCzV5FSJNJv/8NoRli4e4oSDh6b8EP0Sf/LSXROjtI7cf15LM5W2O8mh0xxcEeRIJ6UPjhheO1JTyuVGTQCVzi9PUTG8doSDzryuoq0+sr1DggM3RlKPdXjtSNPSV1SaCZxGms1/eO0Il68ZmbrmCTNuv/9JTjh4iKHBgakQ17OPX8iN92xqqc1+6eKhqWuNy+GO4s7Ck87lSBT3XU5SWGJRSJM5jUavpVp9UflZk86Vy5N0XmRWGYr5BFZeub6hSWppMlSqPwsiiHZKki2SvzzRX1o59zYpNYfTOXjSuQLQiY60aj30etMLp9mt02Luy+XJYrIp9Ytnn9/KviuumWEbjxrLeCM8MjrG8kvXMQlMVFisoJaGOy4zJPs0Zpf62DppmVJjD1ZIgheNMCv5VyLcZu9Uwk1DOdKJ6QDSZIuG/PWYACqZyJYuHmLH2en9kUieaspTYWs9OjaeWMdNK45iaHBgRoM+PmkVlUBQeNVLTJQ5znOxRe7HxiczKYGBUj+VBuyRYzlLOW6zdyrhiiBHOtGRVknmqEG995xjuGnFUZntwNVizSvl4InuVTXlKZgxqhgbn+DMq9ZPfa9nJDY0OFCxMU6SY2R0bFpgQD0O6EjRPlnBXJUlsslt9k4WXBHkSCc60vKQuZqJLK2RHxwoTdVbLRInrVO/ecv4VINcz0jsyP3nZV5qEqabnaIRSa0KKEqFsXTxUKrMc+eUqjqjI19Fkd83pxi4s9jJnWpO8ySH6kCpn7OPXwhsm/A1OKeEWZCfv6+CY7SWerKce+T+87iwSqK7ND9C1FjX64Cvdm/Sric6xpVAb5PVWewjAid30nrzW17YOmXDTxqFANN8C5u3jPP81km+fOJBTNbQgYl65FE9tfDg6BhnLV3Iew+dPy2O/7CX7jpN3jRpHqxxXkG56bDSCC2+L5ILOmPk6RQLHxHkTCdmZsxD5uG1I4khmpV6rpVCSyuFSpZTHlJaT4hs2j2Ir9Ncqe4saxYPDpRYedwBhX8/nM7BRwQFoFMnlOUh89LFQ+yw/czooMihmzT7upJtPUkJlPpFqW+6PT/JOZ/UQy/1iVJ/si8g7R7E71US5akxqvka4gveOE4rcUWQI52YmTFPmdMa9s1bxhMVTxbnbr+0bfGYZYtY9c5FVR3dSeaWVe9cxKpli2paF7hSNFBS3dVGMEV/N5zuxSeU5Ug3TShrhsxZk7FFDeLyo/dj+WXrKsbcT5rNmDGbxbSStnbC0sVD7LvimkSbf/k9SLsn5QvgRAxluP4ivxtO9+IjghzppgllzZC5Fqfpg6NjgTlpu8p9lXrWRK6WBDDrPaj1XmW5/sE5pY5LUuh0Pq4IcqTbJpQ1SpJJZrBs1bCIqDGtNKGqVrmy+j+y3oMFuyU3+Gnby6N8yj0GpX7xzHNbO8qn5HQHbhrKkWat41sP9Ub+tFrmty/ag8vXjKTmMEozJ/VLNYVIDq8dybTcJmS/Bzf/fnNiXWnbo7Lj2VTjdTz7/NYZUVVJ8jVKJ0ayOfni4aNdSKVJSJV+8Hk3EGlynXDwEDfesymx3nqvpVq9cerNzLlgxTWp++6ro7w030QzM4c24346nUPbs49K+hbwduBRM3t1uG1X4GJgAXAf8C4zS+8+OXVRKfIn7cfeikXI0+S68Z5NqamsmzFCqZbrp17/R6W5DNFEuVpoxWpf9bwbTveTp4/gfOAtZdtWANeb2cuB68PvTpOpJ/Ina9hoIyuu1RuRVG+yuyzlC+r2f5x0yN6p++oJA22FT6kTI9mc/MlNEZjZz4Enyja/A7gg/HwBsDSv+nuZeiJ/sjQQjU42a1cUVaXyjfpHPGctTU9XUX4/kxRo+TYg9ySFnRjJ5uRPq6OGdjezh8LPDwO7px0o6VRJqyWt3rSpsXVxe416epZZGohGJ5u1K4pq+dH7pS4pUOtyklnPj9+3JAW6/NJ1LL9s3QylCjQ0+qlGJ0ayOfnTtvBRC7zUqZ5qMzvXzJaY2ZJ58+a1ULLOp55U0lkaiEbNCu1Ky7108RAnHzp/hjJoRgOY5b4lKdDxhBXKWjGzuBNTozv50+rw0Uck7WFmD0naA3i0xfV3BM2I3kmbOVvpeKjslG2GM7NWuZrFWUsXsmSfXVOvL89w21rs762w1bfrGTjFpdWK4Erg/cA54f8rWlx/4WlF9E4a1RqI5Ufvlxh6mLdZoZ5GOu2ctAyitdzzpLLTop4ge2qN6FjHaTV5ho9eBBwBvEjSH4AzCBTAJZJOATYC78qr/k6lyOF9zZxslrVxr0cxZjknXj/MtFGm3fN65ElSoKU+gZhmHmqGUvXJYk495KYIzOyklF1vyKvObqDo4X3lveoo8qXW3nrWxrQexVjtnKwrlSXd83rkSVOgSdsaabTbOZp0OhtPMVEwWjGpqFnU2/DU0pjWoxirnZN1Mfmke97IXIi0bKfNosijSafYeNK5gtFJ4X31hpPW0pjWE/de7Zyso6uke17kOPyijyad4uKKoGAUNbwvaUJUvQ1PLY1pPYqx2jlZGu25c0qJ97yoinp47Qh9KSugFUFJOcXGTUMFpGjhfWkmoME5JTZvmZkmOqnhiTsxdxkoUepXJkdpPQ7qauckOW/jDJT6OePYA+oqu9J1x49tplM3ej5JeY+KoKSc4uPZRzuUVkaHpC32PjhQ4vmtk1UzWSY5Z0t9YsfZsxjdMs6egwMcuf+81AykeVCumCSmZGlW3ZWyrSal3q535Jf2fPolvvSuRYXqVDitpe3ZR538aHV0SJqp58mxcb584kFVFVLazNo5281i7Wff3JZol1aMutJ8KBfd8kCmdRGykvZ8Js1cCTiZcEXQgbQ6OqRSJFOWBrWeKJ5uiHZJu+601NX1OnU7KdLMKSbuLO5AWh0d0qiDtN4onjyup5E02rWSdt39TXbqFtWB7XQOrgg6kFaHMDYayVRvFE+zr6fRNNq1knbdJx2yd1Mb7qJGmjmdg5uGOpB25PxpxKZeTxRPHtfTahNUpeuulACv3rq84XfqxaOGOpRuyynTiuvJsiZwt91Xp7fxqKEup9t6gK24nmpOVc/V4/Qq7iNweoZqvopGV2BznE7FRwRO4Wjm+gNxqvkqPFeP06u4InAKRV7rD0RUMkF5PL7Tq7hpyCkU9ZhnmmXS8Xh8p1fxEYFTKPJYfyArzVyBzXE6CVcETqGoxzzTTJNOt0VjOU4W3DTkFIo81h9wHKcyPiJwCkUe6w84jlMZn1nsOI7TpWSdWeymIcdxnB7HFYHjOE6P44rAcRynx3FF4DiO0+O4InAcx+lxOiJqSNImYGO75ajCi4DH2i1EC/Dr7C565Tqhd641fp37mNm8aid0hCLoBCStzhKm1en4dXYXvXKd0DvXWs91umnIcRynx3FF4DiO0+O4Imge57ZbgBbh19ld9Mp1Qu9ca83X6T4Cx3GcHsdHBI7jOD2OKwLHcZwexxVBE5DUL2mtpKvbLUueSLpP0l2S7pDUtelgJQ1KukzSPZLulvTadsvUbCTtFz7H6O8pSR9vt1x5IOkTktZL+rWkiyTNbrdMeSDpY+E1rq/1Wfp6BM3hY8DdwM7tFqQFHGlm3T4p56vAT8xsmaTtgDntFqjZmNkG4CAIOjLACPCjtgqVA5KGgI8CrzKzMUmXAO8Gzm+rYE1G0quBDwGvAV4AfiLpajP7bZbzfUTQIJL2Ao4BvtluWZzGkbQL8DrgPAAze8HMRtsrVe68AfidmRV99n69zAIGJM0iUOoPtlmePHglcIuZbTGzrcDPgOOznuyKoHG+AvwNMNluQVqAAddJWiPp1HYLkxP7ApuAb4fmvm9K2qHdQuXMu4GL2i1EHpjZCPBF4H7gIeBJM7uuvVLlwq+BP5W0m6Q5wNuAvbOe7IqgASS9HXjUzNa0W5YWcbiZ/RHwVuAjkl7XboFyYBbwR8DXzGwx8Cywor0i5Udo+joOuLTdsuSBpLnAOwgU/J7ADpLe216pmo+Z3Q18HrgO+AlwBzCR9XxXBI1xGHCcpPuAHwBHSbqwvSLlR9i7wsweJbAnv6a9EuXCH4A/mNkt4ffLCBRDt/JW4HYze6TdguTEG4F7zWyTmY0DPwT+pM0y5YKZnWdmB5vZ64DNwH9mPdcVQQOY2WlmtpeZLSAYXt9gZl3X2wCQtIOknaLPwJsJhqNdhZk9DDwgab9w0xuA37RRpLw5iS41C4XcDxwqaY4kETzPu9ssUy5I+m/h//kE/oHvZz3Xo4acrOwO/Cj4LTEL+L6Z/aS9IuXGXwPfC80mvwc+2GZ5ciFU6G8C/qrdsuSFmd0i6TLgdmArsJbuTTVxuaTdgHHgI7UEOXiKCcdxnB7HTUOO4zg9jisCx3GcHscVgeM4To/jisBxHKfHcUXgOI7T47gi6AIkLZVkkvZvtyztRNIzLarnIkl3SvpEK+orEpKOaDTLrqQFkn5dXp6k4yR17SzuIuPzCLqDk4Bfhv/PaLQwSbPCxFU9Q9ZrlvRi4I/N7GXNKK9ohJOuZGYtz51lZlcCV7a6XsdHBB2PpB2Bw4FTCGY3R9t/IOmY2PfzJS0L105YJem2sFf7V+H+IyT9QtKVhDNpJQ2HCebWx5PMSTpF0n9KulXSNyT9U7h9nqTLw7Jvk3RYgrwfkPRDST+R9F+SvhDb90zs8zJJ58dk/5qkmyX9PpT1W+FaAeeXlf/lUN7rJc0Lt700rG9NeI37x8r9uqRbgC+UlTNb0rcVrL+wVtKR4a7rgCEFOfz/tOycaeVJ2jW8h3eGsh8YHpe2faWkC0IZN0o6XtIXQhl+IqkUHneOpN+E538x4R6vlPRdSf8R3uMPxfYtjz37M8NtCyRtkPQdgtnie5eV9xYFazPcTiyjZSjXoAIel/Tn4fbvSHpT2ruWRvhuRO/S+ZL+r6Rfhc98Wbi9T9K/hPL8VNK/RvucBjAz/+vgP+Bk4Lzw86+Ag8PPfwZcEH7eDngAGABOBU4Pt28PrCZIyHUEQYK1fWNl7xr+HyBoIHYjSNx1H7ArUAJ+AfxTeNz3CRLTAcwH7k6Q9wMEs3V3AWYDG4G9w33PxI5bBpwffj6fIJeTCBKIPQUsJOjIrAEOCo8z4OTw82djcl0PvDz8fAhBKpCo3KuB/gQ5PwV8K/y8P0GqgtnAAuDXKc9iWnnAPwJnhJ+PAu6osn0lwciuBCwCtgBvDff9CFgaPoMNbJsMOpggx0pgXfjcXhQ++z0J0oKcG97HvlDW14XXNAkcmlDW7PD8l4fnXQJcHe77OkEK9lcDtwHfCLf/F7AD6e/a1D0keO+i8j4Qe2bnEyTC6wNeBfw29l78a7j9xQQ5dZa1+3fY6X9uGup8TiJYSAWCxvIkgsbxx8BXJW0PvAX4uQULc7wZODDWi9qF4Ef+AnCrmd0bK/ujkv4s/Lx3eNyLgZ+Z2RMAki4FXhEe80bgVQrSUADsLGlHMyu33V9vZk+G5/8G2IegsanEVWZmku4CHjGzu8Lz1xM0LHcQNGYXh8dfCPxQwYjpT4BLY3JtHyv3UjNLytJ4OEGDjZndI2ljeJ1PVZEzXt7hwAlhGTcoSBG8c4XtAD82s/HwOvsJMkkC3BVe59XAc8B5Cmzrafb6K8xsDBiTdCNBgsDDCZTB2vCYHQme6f3ARjO7OaGc/QmStv0XgIKkitHo8BcEimQj8DXgVAULwWw2s2crvGtZk6ENW2Ci+o2k3cNthxPc40ng4fDanAZxRdDBSNqVoEe5UJIRNBwmabmZPSfp34GjgRMJlAQEvbq/NrNry8o6gmBEEP/+RuC1ZrYlLKvaEn99BL3K56oc93zs8wTb3sN4vpPyuqJzJsvOnyT9PbZQplEzOyjlmGdTttdLo+U9D2Bmk5LGLewGE16nmW2V9BqC5GnLgP9J8A6UU547xgie/dlm9v/iOyQtqFPunwMfIRj9fYZgFLqMQEFA+ru2IGP58ees1KOchnEfQWezDPiume1jZgvMbG/gXiCyXV9MkDDtT9nWs7wW+O8xe/MrlLzwyi4EPbstoU390HD7bcDrJc1VsOLTCbFzriNI2EZYdlrjm8Yjkl4pqY+gUamVPoJ7AvAe4Jdm9hRwr6R3hjJJ0qIMZf2CwOyGpFcQNHYbapQnXsYRwGOhPGnbqxKOcHYxs38FPkFgQkriHQr8HLsRmF9uI3j2fxGWgaQhhRkrK3APsEDSS8PvJ0U7zOwBAtPTy83s9wRmrf9FoCAg+7tWCzcBJ4S+gt3Da3MaxEcEnc1JBItRxLk83P5zgob5uwRmghfC/d8kMDHcrsBWsonA9lzOT4APS7qboAG8GYI1CST9PXAr8ARBQ/FkeM5HgX+WdCfBu/Vz4MM1XM8KAlPHJgJ78o41nAtBr/Y1kk4HHiUYCUHQ6H4t3F4iGB2tq1LWv4Tn3EWQtfIDZvZ8zLyUhZXAt8L7sQV4f5XtWdgJuELBAuwCPply3J3AjQQN9efM7EHgQUmvBP4jvI5ngPdSYQGTcGR5KnCNpC0ESmyn2CG3EIxECfedTaAQIPu7VguXsy01+AMEWUWfrHiGUxXPPurUTGT3D0cEPyJwqnbdwuediqSVBI73GRFF3UDs/duNoENymAXrSDh14iMCpx5WSnojgR3/OmC4zfI4vcXVkgYJouE+50qgcXxE4DiO0+O4s9hxHKfHcUXgOI7T47gicBzH6XFcETiO4/Q4rggcx3F6nP8PE+7o+52RjEYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(data[\"RM\"], labels)\n",
    "plt.xlabel(\"Average number of rooms per dwelling\")\n",
    "plt.ylabel(\"Housing Price\")\n",
    "plt.title(\"Relationship between Rooms and Price\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "predicted_prices = lm.predict(features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXucXHV999/f3QxkF5ANECgshKDwhIpIIlFi01ZJK1G5uAXlUrDYaqm9IYjR4ItHYh8o0VRR+/RGRYuFQgLBCKJGXhK0xodLQhIwEl5V7itKkKxCdiGb3e/zx5yzOTt7zpkzs3NmZmc+79drXzvnzLl85+zs7/v7fa/m7gghhGhfOhotgBBCiMYiRSCEEG2OFIEQQrQ5UgRCCNHmSBEIIUSbI0UghBBtjhSBGMPMvm1mFzZajnbEzO41sw8Fr883s+/W4Z6zzczNbFqNrqfvzxRFiqCFMbMnzWzIzF42s1+a2X+Y2b5Jx7v7u9z9hnrKOFUws2VmNhw8ywEz+5GZvTWPe7n7Te5+SkaZbsxDhuD6+v60CVIErc/p7r4v8CZgPnBF6QFWZEp+F+os+8rgWc4EfgjcbmYWI1NNZthNQkt/f0QR/fHaBHfvB74NvAHGTBFXm9l6YBB4bdQ8ERzz52b2qJm9ZGY/MbM3BfsPM7PVZrbdzJ4ws4sj57zFzDaY2W+CWeTn4+QJrntaZHtacL3wHguCWfeAmW0xs7dHjo2T/QNm9ngg6xNmdn5w7LhZc6k5JOm8Ms9yGLgB+C3gwOAa683sWjP7FbAsuPafBZ9zh5mtNbMjI3K8w8y2mdmvzez/AhZ57wNm9sPI9nFmdreZvRg800+a2TuBTwLnBDP2LcGx+5vZ9Wb2nJn1m9lVZtYZvNdpZv9gZi+Y2ePAqeU+a+QzN9X3R9QYd9dPi/4ATwJ/GLw+AtgK/J9g+17gaeA4YBpQCPZ9KHj/fUA/8GaKg9TRwJEUJw8bgU8BewGvBR4HFgfn/T/g/cHrfYEFCbJ9Crgpsn0q8Gjwuhf4FfDu4H7vCLZnJsi+P/AbYE7w/qHAccHrZcCNkfvMBjw4b5+k82LkHbsOsDewAng62P4AsBv42+C6XcB7gJ8Cvx3suwL4UXD8QcBLwHuD535pcP6HItf7YfB6P+A54DJgerB9UtxnC/Z9Hfi34LMdDDwA/EXw3oeBbcF34QBgXfgsptr3Rz+1/dGKoPVZY2YDFE0Z3wf+PvLef7j7Vnff7cVZbpQPAZ919we9yE/d/SmK/9gz3f3v3H2Xuz8O/DtwbnDeMHC0mR3k7i+7+30Jcv0XcIaZdQfbfwzcHLy+APiWu3/L3Ufd/W5gA0XFMEF2ioPoKPAGM+ty9+fcfWvG51PJeWcHz/IZ4ETgjyLv/dzd/zF4lkMUB91r3P3RQMa/B+YGq4J3A1vd/bbguX8B+EXCPU8DfuHun3P3V9z9JXe/P+5AMzskuPYl7r7T3Z8HrmXP3+Zs4Avu/oy7vwhcU/bpNO/3R9QQKYLWp8/de9z9SHf/q2CQCnkm5bwjgJ/F7D8SOCww2QwEg8QngUOC9z8I/C9gm5k9GDX/RHH3nwKPAqcHyuAMisohvMf7Su7xuxRn7BNkd/edwDkUB9/nzOwuMzs25bNVe96q4Fke7O6L3H1jnDyRz/DFiPwvUpwZ9wKHlcjvMeeHJP0d4jiS4sz8uch9/43iyoDS+wJPZbhmU35/RG1pJaeWqJy00rPPAK9L2P+Eux8Te0H3/wHOs6Lz8EzgNjM7MBh0S7kZOI/ihOQngXII7/Gf7v7nWWV397XAWjPrAq6iOMv8PWAn0B059Lcynlcppc/yGeBqd7+p9EAzO4biQBluW3Q75jrnJrwXd89XgYOCVUgpz5XcZ1bCdbPS6O+PqBFaEYgkvgx8zMxOtCJHB2aNB4CXzOwTZtYVOCDfYGZvBjCzC8xspruPAgPBtUYT7nELcArwl+xZDQDcSHGlsDi4/nQze7uZHR53ETM7xMzeY2b7UBwIX47cczPw+2Y2y8z2By7PeN5k+VfgcjM7LrjX/mb2vuC9u4DjzOxMKzqtL6ZEQUX4JnComV1iZnub2X5mdlLw3i+B2cGgibs/B3wX+JyZvcbMOszsdWb2tuD4VcDFZna4mc0Altbos8ZRj++PqBFSBCIWd78VuJriAP0SsAY4wN1HKNqt5wJPAC9Q/KffPzj1ncBWM3sZ+CJwbok5IXqP5yg6B38HWBnZ/wxFZ+snge0UZ5FLSP6+dgAfBX5O0QTzNorKhcC/sBJ4mKKT8ptZzpss7v514DPALWb2G+DHwLuC916g6ExdTtEJfgywPuE6L1F0lp9O0Y/wP8DJwdu3Br9/ZWYPBa//hKIT9ifADuA29pjU/h1YC2wBHgJur8FHjaUe3x9RO6xonhRCCNGuaEUghBBtjhSBEEK0OVIEQgjR5kgRCCFEmzMl8ggOOuggnz17dqPFEEKIKcXGjRtfcPeZ5Y6bEopg9uzZbNiwodFiCCHElMLMsmSPyzQkhBDtjhSBEEK0OVIEQgjR5kgRCCFEmyNFIIQQbc6UiBoSQoh2Ys2mflasfYyfDwxxWE8XSxbPoW9eb273kyIQQogmYs2mfi6//RGGhkcA6B8Y4vLbHwHITRnINCSEEE3EirWPjSmBkKHhEVasfSy3e0oRCCFEE/Hzgfj2C0n7a4EUgRBCNBGH9XRVtL8WSBEIIUQTsWTxHAqdNm5fodNYsnhObveUIhBCiCZjZMRTt2uNFIEQQjQRy+7YymjJvtFgf15IEQghRBMxMDRc0f5aIEUghBBtjhLKhBCiTmTJGJ7RXWDH4MTZ/4zuQm5yaUUghBB1IMwY7h8YwtmTMbxmU/+4405946Gx5yftrwVSBEIIUQeyZgyv27Y99vyk/bVAikAIIepA1oxhZRYLIUSLkjVjWJnFQgjRoixZPIeuQueE/YO7do/zEyxZPIdCR0lmcYcyi4UQYsrTN6+Xa848np6u8dE/OwaHJziNd4+OzyQu3a41UgRCCFEn+ub1ss/eE6P2o07jT97+MKXDvgf78yL3PAIz6wQ2AP3ufpqZHQXcAhwIbATe7+678pZDCCEmQ626hpVzBg8OlxaYIHV/LajHiuAjwKOR7c8A17r70cAO4IN1kEEIIaomaw5AFhrhDC5HrorAzA4HTgW+HGwbsAi4LTjkBqAvTxmEEGKy1LJrWJzTuKvQOeYMtriTUvbXgrxNQ18APg7sF2wfCAy4++5g+1kgv47MQghRA2oZ2x+ak5LMTF2FjlgzUFchv3l7borAzE4Dnnf3jWb29irOvwi4CGDWrFk1lk4I0U5M1r5/WE8X/TGDfrXmnL55vYn3H0rwBSTtrwV5moYWAmeY2ZMUncOLgC8CPWYWKqDDgVgjm7tf5+7z3X3+zJkzcxRTCNHK1MK+X86cU0taKqHM3S9398PdfTZwLnCPu58PrAPeGxx2IfCNvGQQQohq7ftrNvWzcPk9HLX0LlasfYyzTuylt6cLA3p7urjmzOOrihoqRz2VTkgjylB/ArjFzK4CNgHXN0AGIUSbkNW+HzUf7d9VYOeu3QwHLSL7B4ZYvbE/t8E/SjkfQh7URRG4+73AvcHrx4G31OO+QgiRxb4fmo/ClUNcN7BwFZG3IoB0H0IeKLNYCDHliJptFi6/J9Xen8XUEmc+iiPPCqCNRB3KRFNTq2xOkT/1+luVzt5D5y8Qe78sppasA3wjk77yRIpANC2V/sOLxlHPv1Wa8zfpXqWmlnBFESqG/bsKZZvD5+2wjVLvCZBMQ6JpqWU2p8iXev6tkmbv/QNDmUxFceGkO3ftji39PKO7ULMooazmrFqWs8iKVgSiaWlEpyZRHfX8WyU5f4FxA2dI6cw6TmkNjzgzugt07zUtl1l4JSumalY8k0WKQDQttc7mFPlRz7/VksVzxg2qcQwNj7Dsjq28unt0wuCbdN7A4DCbPnVKzeWFygb3JCWXtL8WyDQkmpZGJNaI6qjn3yps8BImdyUxMDQcO/h2WvxZeU4wKlkxJcmXtL8WaEUgmpZGJNa0ArV0NGa9Vr3/VlHn78Ll91Q0Wx5xp6vQOU5J5D3BqGTFNOLx3ciS9tcCKQLR1NQ7sWYqETdIAzWL3qkmTDPpHnlGwcSZiroKnUwvdLBjcGIkUG/EV1CvCUaSjHHKpzdBafTmuGIxz1HL1Ir58+f7hg0bGi2GEE1D6SANxYFl72kdsWGQvT1drF+6qKJ7pM20eysYPJNkrWW5hixKsV73nawyvGLNI9x439MT9l+wYBZX9R1fkXxmttHd55c7TisCIaYgSc7HJEdoNY7GtIifSlYa9YiCSVuNRAffk4+dyYq1j3Hpys01MZvVasUUZd227RXtrwVSBEJMQaoJy1yzqT9xILpizSPcfP8zjLjTacZ5Jx2RGqYJ2QfzRoYBRwffWie95aXgGvG8FDUkxBQkKcKlIyWwJCm5KzRFhM7IEXduvO9pZh/YNSESqJQsg1Oz9OitddJbXgN2S/UjEEKUp5LiaVGSwjVHU1x+/QNDY/cI7zt76V2x9miA+x7fMRammUSWwalcaGm1z6BSaj1w5zVgL1k8JzbLOc+oJikCIRrEZEoJlMbS9/Z0cdaJvWUbnPcPDLHkti0suXVLWb/BiDt983pZv3QRXzhnbtV5AnGyhg7bepZTqPXAnWvuROkfMs/O9chHIETDKGeqKBdhUup8XLj8HrLEAIbNVrIQ+hWS8gTC+2bJM4jbX89yCpWEcGYhr9yJFWsfm/A3Gh5xlZgQohVJK55WjVMzD2di9L5xFTwn63ytp2M0j4E7jzyXRjiLpQiEaBBJUTmdZqmz5LgIn6v6ji8b5VMNabPzpNn8p+/cmnmwrXc9qamQoNiIGlvyEQjRIJJszEmlBPoHhsacu6URPleseST2eoVOm+h4jNmXRqUz1B2Dw5lt/pXa2evlWG4k7dK8XogpRaXlESZbn2fF2scqntnffP8zY1mncXb8pH1Z7nNYT1fsZ8q6AklbVVRirmmXRkWNqLGlEhNCpFBpeYRalFOIu0YWnlx+akXHZ7lvocPYa1oHO3eNl6Wr0MlZJ/ay8sFnMjmfDXhikvIllbyopnxGu6ASE0LUgCQ7+GWrtgDVNxWJzrD37ypgVqyHH87+rjnz+LH3s0zVwhLFkynuVjoT3b+rwM5duycogfAz3fXwc2QSjtrYt9upUZFaVQrRRCQNMiPuE2zfazb1J5pKotcpjZ0fGBpmx+DwhO5a65cu4tpz5mYKIT/vpCNiY/IvXbmZK9Y8Uvb8kDBv4Inlp7LP3tNSZ/s7BocZjslgK5W3VvbtZslQzptGtKqUIhAihbRBZmh4hEtWbmbh8nu4Ys0j49ojpl0nbtVQet0Vax9jzaZ+Llu1peykO6xKGXddB2667+mqBpFqZ9oOscljk6VdGhU1ole3TENCpJClLWL/wBA33fd04oBdOlhlGWD7B4a4ZOXm1GMMuPacuWODbNJ1HapKRkpzBte65HUW2qVRkfIIhGgywkHmslVbUjtEpc3aX9ldXDlctmpLpqqeWXHG+yjSrpt1ECn1XRQ6bYJ5qKerwLIzjgPi6/3nOUOfCnkAk6UReQRSBEKUIRx4qonkAQj1Rxjzf8zB+8QOsJXSacZRS+8amxkvWTyHS1dujlVKWQaR0qihgaFhCh3GjO7COEd2nIO8lWfo9abWpTCyoPBRITISzpZrMZvvAEYnL9IYYYjqhqdenGCmyhq+qvDM5qFWUUNZw0elCISokGrj/MvR29PFzld3x9rdszCju0D3XtPoHxii04wR99SWkqWDTZKCi8sBqHd4o6gO5RGItmPNpn6W3bF1bCCd0V3gytOPq/kAFXVapg2elU6x+geGKHRWX294x+DwWLP2Efcxc0LWLN0kmUvNSu2S4dtOaEUgWoI1m/pZcuuW2Lh22NNsHWpr067l6sBsjz+hViSZdZLMQKXKIM6sJBPS1EErAtFWrFj7WKISgKAhy61bwPbU46/FTLZ0dVDNSgCKpRzS5C8l630qDUUMcwDSFGU7Zfi2C7kpAjObDvwA2Du4z23ufqWZHQXcAhwIbATe7+678pJDtAdZBqG4gTatXERWwpDGpJlyOXp7uhjctXvMrFNK3Cz9rBN7E1tMRknLxk1aEZRbJTUivLFS5MOojDwzi18FFrn7CcBc4J1mtgD4DHCtux8N7AA+mKMMok2YzCAUVy6iGqqZEfd0FQASlQDA+QtmTcjUvaovvZcwpIccLlk8J7Z0RZh8lkazZ/g2okTDVCc3ReBFXg42C8GPA4uA24L9NwB9eckgWp+wPv1kQzprkcJfqTIqdBg7d+1Olb2nq8BVfceP1f9Zv3TR2Mw2aTCHYo5BWsho37zeRNNSOYWW1oO4GWhEiYapTq4+AjPrpGj+ORr4J+BnwIC77w4OeRaI/faY2UXARQCzZs3KU0wxRblizSOppR0qZbI27rhEoCRbfqcZ+06flroS6Cp0jmXwxtE3rzexDMVo0Hg+jd5JmHiaOcNXPozKybXonLuPuPtc4HDgLcCxFZx7nbvPd/f5M2fOzE1GMbUIVwBhp66kQbYaJmvjjpspn79gVqwZ5XNnn8BAihLIOstOMg/1dBfKdvJqdhNPtbRLldJaUnZFYGYfAb4KvAR8GZgHLHX372a9ibsPmNk64K1Aj5lNC1YFhwMy3IlUohm9WaJl0moCpVE6AFbicIzK2Gk2JuP8Iw9g/pEHxF4nKQ+hkjDMuFVIodN4+ZU9zuek6KhWLeLWiBINU52yeQRmtsXdTzCzxcBfAP8b+E93f1OZ82YCw4ES6AK+S9FRfCGw2t1vMbN/BR52939Ou5byCNqXvLJ4S+npKrD5ylNS72sUHbdhS8gsMubdzSy8TnQwT8pObqc4f0UNFallHkG4zn43RQWw1SzT2vtQ4IbAT9ABrHL3b5rZT4BbzOwqYBNwfYZriTalXO3+WnHaCYeWvW9Y23/+kQeMG1TSZAx7Fmx46sUJCqTcjLyS3sfR/UctvStWlmpt5FNxUG1mH0YzkkURbDSz7wJHAZeb2X5kqJfl7g9TNCOV7n+cor9AiLLUy8G3btv2TPeNq+2fRcYw5j9OGdS6UXst4/xVTqI9yOIs/iCwFHizuw8CewF/mqtUQgTUy8FXOnCm3beSY6PcfP8zmeWZTAhkLZ3ACsVsD7IoAgdeD1wcbO8DTM9NIiEiLFk8Z1KF2LJSGmmUNmjGHVs68MZRiRN7MiGQtYzzT8pxqEUpbtE8ZDEN/TNFU9Ai4O8oRg+tBt6co1yiTchifx6ZZAOXLJQO0mkx+nHHQno1UqgsrHWy5p1a2cjDctZx+0XrkGVFcJK7/zXwCoC776BoHhJiUmQpBbDsjq01beCSRKfZhFj7pBj9sDNYND6/b14v65cu4snlp3LBgvgEyPNOOiKzPM0S45+0iqk2RFc0J1kUwXAQ+eMwFhZaj/9N0eJksT9X26SlUuLqDSWZfEbcU2vYXNV3PBcsmDU2a+4044KYsNM0mqWMQ5IyLFfnSEwtspiGvgR8HTjYzK4G3gtckatUoi1otlIAoRIKB9vS8M6OGDNJ6TkhV/UdX9HAH0czhEAqOas9KKsI3P0mM9sI/AHFnII+d380d8lEy9OM5YxLlVB0MK51fP5UoFWzj5udeuduZCkxsQDY6u7/FGy/xsxOcvf7c5NKtATlvsxZZpszuguphdlqTZoSakbFVQ+aYWXSTjQidyOLj+BfgJcj2y8H+4RIJIsjOIsd/MrTk6tv1ppSJRQWuAsdwycfO7MpHLiitWlE7kamEhMeKUjk7qNmphaXU4w8l5px1077Mldy3755vVy6anPNe/mW0mnGWSfumfnGzcpWb+znrBN7Wbdte9nnWO55T8WyDaI+NMJ3lmVAf9zMLmbPKuCvgMdzk0jUnDyXmknXTqq9E/0yp8m14akXufn+Z+oWpjjizuqN/WN1hJIU2bpt22MLt0UH9p7uAi+/snusNWbp81bZBpFGI0yQWUxDHwZ+h2K56GeBkwgaxoipQZ5LzaRrJyUcRb/MSed+8vaHufG+p+seqx59JpXMykrNYDsGhyf0Rw6vvWZTP5et2hL7uS9btUXtFEVsNn2h03I1QWaJGnoeODc3CUTu5LnUTLrGiDtdhc5UR3DSuYPDjUtTCWUqNyuLrgDiwkrjCGf+aUlaWhkIYGLTjZznRIkrAjP7ePD7H83sS6U/+YolakmeHZuSrhE6ftMcwc0YbRPKlJbZW7oCyLpy6TQrW1JbBd3EirWPTVhRDo96rt+LNNNQmCuwgWLf4dIfMUXIs1xBUvZt/8AQl6zczOCu3Vx7ztxxTddDTj525oTm642sYBN9JmkRTdX0SOgqdGZWGK2clyDK01TOYne/Mygtcby7fyw3CUTu5JkUVK7g2o7B4djGLGs29bN6Y/+4FW+WNpR5Eg70pRE9154zt+L+A4VOY5+9pvHroeGx533Zqi2ZlEEzrpRE/WiEszjVR+DuI2a2MLe7i7qRZ1JQeO2Fy+9JrL5Z2tkrqQNYI8ka0ZP0j9ppxqh7oqJNqmYaRXkJYsniOSy5bQvDkaq7DXcWA5vN7A7gVmBnuNPdb89NKjElSZsph529oHy55kbQ01UA0iOswoE9KSO6XFG43ioVSFaUm9BC1NlZnEURTAd+RbEfQYgDUgRiHEkz5ZD+gaEJM51moNBhLDujmMGcxT5bramtWgWSBeUmtA5pzuK8/pZZFMESd38hl7uLlmLJ4jksuXXLhC9xlGZTAh0G57zliLJmn1L7bDWmtjx9NbXK5BaNp6mcxWZ2OvAViv0IRoGz3f1HuUkipjzhgHP57Q8z1MBcgEoYdVj5wDNj/ou8yy7n5atptpLeonqazVl8NfB77r7NzE4CPgu8LTdJRNMRtTnv31XADAYGh1NnsuFAd9ynvsPOXZWFWDaK4VFn2R1bxw3SoR8jjP0P/RvNOrtu18qorUgjekCk5RHsdvdtAEHJ6f1yk0I0HaVJUwNDw+wYHB6rJHrpys3MLmnXGGWqKIGQaCe0cGUQjf1P6kbWLDRLa0sxeRrRnS5tRXCwmX00advdP5+bVKLhlEuaCi39reqUnGo2dzWQaS3q3QMiTRH8O+NXAaXbooWpxLYcHSBDc1IzUegw9p0+LbXBzYzuwrjtpM/fbGGvUdRARlRLWmbxp+spiGgcpSWU3SsPW/75wNCEEMZmoLdkZrxmU39ssk5pA5wkm7sF19CAK1oJNZhpQuqZGFQ6eFfbFvKwnq6qavDkSW9P14TeAVlNKEsWz+HSlZtj83qa1TwkRLVIETQZ9U4Myjp49wRRQzsGhyfUBAqdkllKKNSLQsf4lPxKlWvfvN7Ez6OQTNFqSBE0GfV2UmYZ1AzYfOUpY9txg+qGp16suWzV0l3o4O/PfGNq28ksyjWpJIRCMkWrkZZQ9tGk90BRQ3lR78SgcmUhwmOiRJ2SoVKotxPVgP27CuPCPkNm7LP3uAG+WuXaiHhuIRpB2oogjBCaA7wZuCPYPh14IE+h2pksiUFrNvXz6Tu3jtnze7oKnHbCoRU1VQ+TpUbcy5Z/3rHz1VgHaSOdw+cvmMVN9z0d+16oNMspqXLKVSGZol0oGzVkZj8A3uTuLwXby4C7yl3YzI4AvgYcQnGcuc7dv2hmBwArgdnAkxRLV+yY1KdoIcrNQuOiXgaGhrkxMigmmT5KB+4wWapchNDg8ChLbt0ytl1pi8ZaM6O7wFV9x7N647OxpSz27ypkUlJZTDxZQjJV9VNMdbL4CA4BdkW2dwX7yrEbuMzdHzKz/YCNZnY38AHge+6+3MyWAkuBT1QmdutSbha6Yu1jmQq3xZk+JhPVE5ZheHX36ARFUm9OfeOhrNnUn1jPyCzbZz352JkV37vciqpVE+xEa5NFEXwNeMDMvh5s9wE3lDvJ3Z8Dngtev2RmjwK9wHuAtweH3QDcixTBONJmoZX4CkqPnayfIc4e3wjWbdvOum3bE98fGBxmIEMYbNo14si6omrmDGQh4iirCNz9ajP7NvB7wa4/dfdNldzEzGYD84D7gUMCJQHwCxJWF2Z2EXARwKxZsyq5XcsRNT1UYo4p9Su0Clmd2+WOq1QxVrKiUoipmEpkDR/tBn7j7l81s5lmdpS7P5HlRDPbF1gNXOLuvzHb057c3d3MYkc1d78OuA5g/vz5zVXEvo4kzULL0VXo5ORjZ6a2j2xFDMb8KbXwEUSpZHBXiKmYSpRVBGZ2JTCfYvTQV4ECcCNQtpexmRUoKoGbIq0tf2lmh7r7c2Z2KPB8tcK3A0mz0A4r1tKH+Kihk4+dyeqN/ZlmsI1uGh9H1oimKEYxmihqkklqGB9VGFnJEmoLCjEVU48sK4I/omjWeQjA3X8eOH9TseLU/3rg0ZKcgzuAC4Hlwe9vVCp0K5A10iRpFuoOTy4/NfG6NyaEVsZeK7vYdeNn17wbKH6epMG8lGvPmTvuGe5plDN+ZRCnMLIQF9EVvaYzsbaREFOBLIpgV9SEY2b7ZLz2QuD9wCNmFubqf5KiAlhlZh8EngLOrlDmKU+WTNdwQE8a/uJMD81Y9K1awryFvnm9XJqhdEVvT1dioxyoTS5AXNOaEXcN/mLKk0URrDKzfwN6zOzPgT8DvlzuJHf/IcWJUhx/kF3E1qNcpmuWAT0u9DHvom+dZpx30hGs27Y9d7/Dp+/cWraPcEg5U0wtyzOr1LNoRdI6lAHg7v8A3EbR1j8H+JS7fylvwRrJmk39LFx+D0eldOCaDOXKSGQZ0Fc+8Mw4udZs6s9tcO4qdHLBglnsN30aN973dF2cz9EqqHHdt8IZRj26NwnR6pRVBGb2GXe/292XuPvH3P1uM/tMPYRrBKUtGvNoUZgUURLuzxKdMjzqYw1gQpnzYmh4hBvve7pheQTR1n1QXJnIHi9E7SirCIB3xOx7V60FaRbSzDa1olx/2ayhh5WsIKYaPV3jO4al9RG+Ys0jua7ghGh1EhWBmf2lmT0CHGtmD0d+ngDym342mHpU/yzXnDpOUcTRYcaaTf1TPnmp9EtY6DCWnXHchOOSlPRNgbkqrxWcEK1OmrP4v4BvA9dQrAchNtkvAAAUEklEQVQU8pK7N0/x+RqTpfpnpSSFiiaZNEqjU5IYcefy2x9JLMecha5CJ2ed2DuWg9DTXeDlV3YzPFqfoNJ99urk6j86fnKhtCXbKvEgRGWkVR/9NfBrM/si8GKk+uhrzOwkd7+/XkLWk1rXoK+2KUppzf9o2ekoQ8MjTC900FXorMo8FOdorWePgZ27Rtjw1IsTWkrGkTWhC1TiQYhKyOIj+Bfg5cj2y8G+lqSc2SYka2RRLXwOffN62fSpUxJjcXcMDjO90JH4fhJpsffrly4ac87mzY33Pc3sDPb9rCYzUIkHISohSx6Bue9J63T3UTNr6RaX5WLFK5nlJ81gq5ltp82Iq2k6X64Mc737EJdbLZUmh+3fVeClV3czUmLGKu1XLIRIJ8uK4HEzu9jMCsHPR4DH8xasmalklt9p8fP0uP3lVhmVzIizEM7EX3f5t7hizR7/fyhHlozeWlNutRSuVp5Yfir77D1tghIA2Hf6NPkHhKiALDP7DwNfAq6g6Jf7HkF56HalksiipBo5pfuzrDL65vWy4akXK6ojBMUZ8m53ksr1jLiPXXP+kQfkVqai0GmZmupEn2NaTaakv0OWXgRCiD1kySx+3t3PdfeD3f0Qd/9jd2/riqHlEsKiJNnZS/dnXWVU2kzFgHPeckSmynI33vc0l6zcnIsS6O3pYsV7T2Dh6w4oe2z4HMsl91XydxBCJJOWR/Dx4Pc/mtmXSn/qJ2LzUS4hrJpjs64yKo2GceCuh59LrvpUBwzGooIeevrXqcdGn0055VjJ30EIkUyaaejR4PeGeggylaikomXWY7PmL1QSQhlSjSM5K12FjsTewSHhZ0jKgO40Y9Q9s+kn3F/LyqJCtDNpeQR3Br/L9iduN7L2EgjJUrFyyeI5LLltyzgbeqFzYvTLksVz+OiqzdQp3wvYk3S2euOzY4N+h8EfnzSrrE8hOkNPGthH3XkiprdCFuWoaqBCTJ5ERWBmd5JiWXb3M3KRqMmppJdAqaIoq0BKn3bC0w9n0KUUOgyMTA7ZrPQG3c7WbdvOK8OjiYXews/V013AHX49NDzhM1aatV3r5L5aUulkQIhmxjwhlMTM3ha8PBP4LYrtKQHOA37p7pfmL16R+fPn+4YNzWGhSuoB3NvTxfqli2J7CeyZUSe3jkxqyRhet9z9Q3q6CphN3hzUVejkmjOPByZ2+ArfSxr40hRhra7VSKr5HEI0AjPb6O7zyx2XZhr6fnChz5Vc6E4za45RuQFU00tgaHiEm+9/JrXdYtI7lTqLB4aGKXRW7xk2GDfgLlx+T6rDtnSQBsqumGptVqs35RoLCTHVyJJHsI+ZvdbdHwcws6OArO0qW45y5o2kgTpLz92k+2W5f5RqTUOlqw9I/jzhAF864E8vdKQOks04sFdKPSrUClFPsmQWXwrca2b3mtn3gXXAJfmK1RzEZfpW20sgKcO4HKX28HJlIcpxwYJZzOguTNifVJZh/66Jx0Jx5RA34CeZpPoHhlqmX4DyF0SrkSWh7DvAMcBHgIuBOe6+Nm/BGk1SMhNQcS+BrkIn5510RMXlIboLHaxY+9i4AbTShLIovT1dXNV3PFeeftxE81GCnkrSX9WsOVqlX4DyF0SrkaVVZTewBPgbd98CzDKz03KXrMEk2YE/fefWVBt337xezjqxd2wF0GnGWSf2clXf8ePaLWZhcHh0nCJacuuWqktDR2f8K9Y+NsF8NDzisTV+8ijXUOuOb/Uma4VaIaYKWXwEXwU2Am8NtvuBW4Fv5iVUM5Bk790xODxm/kgKHV29sX/MJzDizuqN/cw/ck9pBQN6ugu8MjxSNhkrSrXNYnq6Ciw747jMiVpRknwSM7oLvDI8mhgFFYa4ZnWCTzVawdchREgWH8Hr3P2zwDCAuw/S0IIF9SGrvbd0dpu0klh2x9ZxpqaiMrEJvXlrSaHT+MI5c9l85SnjBq1KbNxJZpArTz9uLLw0jjBJLGkFJHu6EM1DFkWwy8y6CMzCZvY64NVcpWoCKin5HJ3dJlbEHBqOVRBm1LS0dEh3oYMV7z0hdta6ZPGcYvJZhCRncZoZpG9eb9mBXvZ0IZqfLKahK4HvAEeY2U3AQuADeQqVB9WUhYDxMe87X90d2xs4OruttBbQwOAw154zt+atIb3coq307WC70v7K5bJ/VQ9IiOYnMbMYwMwMOBwYBBZQHC7uc/cX6iNekclmFtcqEzTLdZKOmV7oiA2tjMbux52blHGchbi8AEjOTu4OCshF75flOTVj9q8QogaZxQDu7mb2LXc/HrirZtLVmVplgmaZ3SYdAxNLNQAM7trNmk3942bdy+7YOrby6Oku8PpD92P9z16cIM8xB+/Dk78aTEwgKzVTlWtKPxjjuM7ynOQ4FWJqk8U09JCZvdndH8xdmpyoZSZolkEv7ZjoIA9Fp3Fp5NGru0fHvf+jGCUAMLhrlBXvPYHLVm2JzVyOmqziVhtZmWoRPlqhCFEZWRTBScAFZvYksJPAWuHub8xTsFqSZLfv6S6wcPk9dRsw+ub1smLtYxP8DNFZd9zqJS0EM5S3XJXOpF4AWWjmCJ/SQf/kY2eOK+4XF+IrhBhPFkWwOHcpcibOoVnoNF5+ZXdqTkAelFudVDL7DgfoLCartOv29nQxuGt3rA/DmFjmolmIKwl+031PT1CcKggnRDpp/QimU2xcfzTwCHC9u++ul2C1JGsEUD0GjHJF65LeL3Ual874y5mskq6bVj7bgPMXzGraAbTS1ZMQIp60PIIbgPkUlcC7gM/VRaKc6JvXy/qli3hi+amsX7qIX8eEgUL+A0a5uPqk989fMGtSJQ3K3TcuX+Dac+ZyVV9y0lijqWb1JISYSJpp6PVBtBBmdj3wQCUXNrOvAKcBz7v7G4J9BwArgdnAk8DZ7r6jcrEnT6XdskKyOiLLxeMnXSOvuPusEU9Z7tMszthqV09CiPGkdSh7yN3flLRd9sJmvw+8DHwtogg+C7zo7svNbCkww90/Ue5aeXQoq7ZbVpZzWrmDVTN9trRucOu2bW+4ohKi0dQij+AEM/tNeD2gK9gOo4Zek3Zhd/+Bmc0u2f0e4O3B6xuAe4GyiiAP0mbISTPerPkIrdzBqpk+m7KWhagNaa0qa18ABw5x9+eC178ADkk60MwuAi4CmDVrVg6ixJtC0prTZ81HqEcHq3LmmbzMN83WnUvJbEJMnixF53LBizapxOoJ7n6du8939/kzZ06uK1clpM14s1btnEwHq7iuaHHHxDXNCY8t9/5kUHcuIVqPeiuCX5rZoQDB7+frfP+ypM144yJvjOJAGx20q624mXUAT1NWae8vu2Nr6v2zoGqiQrQe9VYEdwAXBq8vBL5R5/uXJW3GGw2xhPHRKdFBu9oOVuUG+JBqk9IGhoYnvSpQdy4hWo8smcVVYWY3U3QMH2Rmz1IsZ70cWGVmHwSeAs7O6/7V2siXLJ7Dktu2jCvkVui0cfH2ffN6Yyt4Rp2m1dius9rfq01KA2ri1JVdXojWIrcVgbuf5+6HunvB3Q939+vd/Vfu/gfufoy7/6G7x1dTmySTtpGXei5iPBl5OE2TViP7l3Qxy5KUloQybIUQpTTMWZwnWU0scSy7Y+uE3sDDoxMbu+fhNI3rHAawMyhVHVLOPNM3r5cZ3fEtMOXUFUKU0pKKoNrZ+ppN/bEdyOLOPfnYmROafE3Wado3r5d9p0+01g2PTFREpSUzSk01V55+nJy6QohMtKQiqHa2nrZiKK3tv3pj/ziLkQFnnTh52/lATAVQqNykI6euECIruTmLG0m5PrpJpA225Wr7O7Bu2/bU62dxYFdbAykOOXWFEFloyRVBtbPhpMF2RnchU23/NEWS1YGtOH0hRL1pSUVQLUmD8JWnHzduX6WmpzWb+rls1ZZMDmyZdIQQ9aYlTUNp9YLKNWGH8kXMKjE9hbLE9RSG+FVENSadZikNLYSYerSkIphMhcyszenD+/QPDNFpNm52X64SaZSehDDPSqhW8QkhBLSoaagWyV7lir/1zesdMyWFs/04u3+5eyYsFCpiMnkTQgjRkopgssletSr+luWeSS0zK6HZSkMLIaYWLakIJht5U6vib0myRKlFpq9KQwshJkNLKoLJRt5UUvwtjuj+UJa4kg+1CgtVyKkQYjK0pLMYJpdMVS6pK4zQ6R8YytQoPZQlr8getWwUQkyGxOb1zUQezevTSGvQDkx4L1QGvRqAhRBNRC2a17ctaTPshcvviS0v0dvTxfqlixogrRBCTA4pggSSTEtTJUJHCWZCiKy0pLM4T6ZChE6ezeuFEK2HFEGFTIUIHSWYCSEqQYqgQkpDU2d0F9h7WgeXrtwcm4HcCKaK+UoI0RxIEVRAWHbi0pWbATh/wSxeGR5lYGi4qUwwU8F8JYRoHqQIMhJnd7/pvqeb0gQzFcxXQojmQVFDGUnqShZHo00wSjATQlSCFEFGKhncm8EEozaVQoisyDSUkaTB3Uq2ZYIRQkw1pAgykmR3P3/BLLWVFEJMaWQayojs7ntQ1rIQrYUUQQXI7q62mEK0IjINiYpQ1rIQrYdWBC1Cvcw1yloWovXQiqAFqGeROWUtC9F6SBG0APU01yhrWYjWoyGKwMzeaWaPmdlPzWxpI2RoJepprplsP2ghRPNRdx+BmXUC/wS8A3gWeNDM7nD3n9RbllahXI/lWqPoKSFai0asCN4C/NTdH3f3XcAtwHsaIEfLIHONEGIyNEIR9ALPRLafDfaNw8wuMrMNZrZh+/btdRNuKiJzjRBiMjRt+Ki7XwdcBzB//vykQp8iQOYaIUS1NGJF0A8cEdk+PNgnhBCiATRCETwIHGNmR5nZXsC5wB0NkEMIIQQNMA25+24z+xtgLdAJfMXdt9ZbDiGEEEUa4iNw928B32rEvYUQQoxHmcVCCNHmSBEIIUSbI0UghBBtjhSBEEK0OVIEQgjR5kgRCCFEmyNFIIQQbY4UgRBCtDlSBEII0eY0bfXRRlGvJvBCCNEsSBFECJvAh/1/wybwgJSBEKJlkWkoQj2bwAshRLMgRRChnk3ghRCiWZAiiJDU7D2vJvBCCNEMSBFEUBN4IUQ7ImdxhNAhrKghIUQ7IUVQgprACyHaDZmGhBCizZEiEEKINkeKQAgh2hwpAiGEaHOkCIQQos0xd2+0DGUxs+3AU42WY5IcBLzQaCGaCD2PPehZjEfPYw+TfRZHuvvMcgdNCUXQCpjZBnef32g5mgU9jz3oWYxHz2MP9XoWMg0JIUSbI0UghBBtjhRB/biu0QI0GXoee9CzGI+exx7q8izkIxBCiDZHKwIhhGhzpAiEEKLNkSLIATP7ipk9b2Y/juw7wMzuNrP/CX7PaKSM9cLMjjCzdWb2EzPbamYfCfa36/OYbmYPmNmW4Hl8Oth/lJndb2Y/NbOVZrZXo2WtF2bWaWabzOybwXY7P4snzewRM9tsZhuCfbn/r0gR5MN/AO8s2bcU+J67HwN8L9huB3YDl7n764EFwF+b2etp3+fxKrDI3U8A5gLvNLMFwGeAa939aGAH8MEGylhvPgI8Gtlu52cBcLK7z43kD+T+vyJFkAPu/gPgxZLd7wFuCF7fAPTVVagG4e7PuftDweuXKP7D99K+z8Pd/eVgsxD8OLAIuC3Y3zbPw8wOB04FvhxsG236LFLI/X9FiqB+HOLuzwWvfwEc0khhGoGZzQbmAffTxs8jMIVsBp4H7gZ+Bgy4++7gkGcpKst24AvAx4HRYPtA2vdZQHFS8F0z22hmFwX7cv9fUYeyBuDubmZtFbdrZvsCq4FL3P03xYlfkXZ7Hu4+Asw1sx7g68CxDRapIZjZacDz7r7RzN7eaHmahN91934zOxi428y2Rd/M639FK4L68UszOxQg+P18g+WpG2ZWoKgEbnL324Pdbfs8Qtx9AFgHvBXoMbNwYnY40N8wwerHQuAMM3sSuIWiSeiLtOezAMDd+4Pfz1OcJLyFOvyvSBHUjzuAC4PXFwLfaKAsdSOw+V4PPOrun4+81a7PY2awEsDMuoB3UPSbrAPeGxzWFs/D3S9398PdfTZwLnCPu59PGz4LADPbx8z2C18DpwA/pg7/K8oszgEzuxl4O8USsr8ErgTWAKuAWRRLap/t7qUO5ZbDzH4X+G/gEfbYgT9J0U/Qjs/jjRQdfp0UJ2Kr3P3vzOy1FGfFBwCbgAvc/dXGSVpfAtPQx9z9tHZ9FsHn/nqwOQ34L3e/2swOJOf/FSkCIYRoc2QaEkKINkeKQAgh2hwpAiGEaHOkCIQQos2RIhBCiDZHikAIwMxGgoqPPzazW82sO+G4b4V5AEK0CgofFQIws5fdfd/g9U3AxmgCXJAYZ+4+mnQNIaYqWhEIMZH/Bo42s9lm9piZfY1ihucRQb34gwDM7E/M7OGgt8B/BvtmmtlqM3sw+FkY7H9bsOLYHNTe369hn06IElR0TogIQY2bdwHfCXYdA1zo7vcF74fHHQdcAfyOu79gZgcEx3+RYi39H5rZLGAt8NvAx4C/dvf1QQG+V+r1mYQohxSBEEW6gtLQUFwRXA8cBjwVKoESFgG3uvsLAJGU/z8EXh+prvqaYOBfD3w+MDvd7u7P5vQ5hKgYKQIhigy5+9zojmAw31nhdTqABe5eOuNfbmZ3Ae8G1pvZYnffNvF0IeqPfARCVMc9wPuCgmBETEPfBf42PMjM5ga/X+fuj7j7Z4AHadMeBKI5kSIQogrcfStwNfB9M9sChBFGFwPzAyfyT4APB/svCUJTHwaGgW/XXWghElD4qBBCtDlaEQghRJsjRSCEEG2OFIEQQrQ5UgRCCNHmSBEIIUSbI0UghBBtjhSBEEK0Of8ftB+tAZaR+TgAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(labels, predicted_prices)\n",
    "plt.xlabel(\"Prices\")\n",
    "plt.ylabel(\"Predicted Prices\")\n",
    "plt.title(\"Prices versus Predicted Prices\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21.831934375295628"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_error = \\\n",
    "    (labels - predicted_prices).apply(lambda x: x ** 2).mean()\n",
    "\n",
    "training_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Citation: http://bigdata-madesimple.com/how-to-run-linear-regression-in-python-scikit-learn/"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:modin-dev]",
   "language": "python",
   "name": "conda-env-modin-dev-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/quickstart.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/img/MODIN_ver2_hrz.png?raw=True)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Getting Started"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To install the most recent stable release for Modin run the following code on your command line:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install \"modin[all]\" "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For further instructions on how to install Modin with conda or for specific platforms or engines, see our detailed [installation guide](https://modin.readthedocs.io/en/latest/getting_started/installation.html).\n",
    "\n",
    "Modin acts as a drop-in replacement for pandas so you can simply change a single line of import to speed up your pandas workflows. To use Modin, you simply have to replace the import of pandas with the import of Modin, as follows."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-01-07 07:29:30,173\tINFO services.py:1250 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265\u001b[39m\u001b[22m\n"
     ]
    }
   ],
   "source": [
    "#############################################\n",
    "### For the purpose of timing comparisons ###\n",
    "#############################################\n",
    "import time\n",
    "import ray\n",
    "# Look at the Ray documentation with respect to the Ray configuration suited to you most.\n",
    "ray.init()\n",
    "from IPython.display import Markdown, display\n",
    "def printmd(string):\n",
    "    display(Markdown(string))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dataset: NYC taxi trip data\n",
    "\n",
    "Link to raw dataset: https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv (**Size: ~200MB**)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('taxi.csv', <http.client.HTTPMessage at 0x1307faf70>)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# This may take a few minutes to download\n",
    "import urllib.request\n",
    "dataset_url = \"https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\"\n",
    "urllib.request.urlretrieve(dataset_url, \"taxi.csv\")  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Faster Data Loading with Modin's ``read_csv``"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DtypeWarning: Columns (6) have mixed types.Specify dtype option on import or set low_memory=False.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time to read with pandas: 2.744 seconds\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_df = pandas.read_csv(\"taxi.csv\", parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to read with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time to read with Modin: 1.35 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "UserWarning: `read_*` implementation has mismatches with pandas:\n",
      "Data types of partitions are different! Please refer to the troubleshooting section of the Modin documentation to fix this issue.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "## Modin is 2.03x faster than pandas at `read_csv`!"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_df = pd.read_csv(\"taxi.csv\", parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to read with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"## Modin is {}x faster than pandas at `read_csv`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can quickly check that the result from pandas and Modin is exactly the same."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>VendorID</th>\n",
       "      <th>tpep_pickup_datetime</th>\n",
       "      <th>tpep_dropoff_datetime</th>\n",
       "      <th>passenger_count</th>\n",
       "      <th>trip_distance</th>\n",
       "      <th>RatecodeID</th>\n",
       "      <th>store_and_fwd_flag</th>\n",
       "      <th>PULocationID</th>\n",
       "      <th>DOLocationID</th>\n",
       "      <th>payment_type</th>\n",
       "      <th>fare_amount</th>\n",
       "      <th>extra</th>\n",
       "      <th>mta_tax</th>\n",
       "      <th>tip_amount</th>\n",
       "      <th>tolls_amount</th>\n",
       "      <th>improvement_surcharge</th>\n",
       "      <th>total_amount</th>\n",
       "      <th>congestion_surcharge</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2021-01-01 00:30:10</td>\n",
       "      <td>2021-01-01 00:36:12</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.10</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>142</td>\n",
       "      <td>43</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>11.80</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2021-01-01 00:51:20</td>\n",
       "      <td>2021-01-01 00:52:19</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.20</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>238</td>\n",
       "      <td>151</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.00</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>4.30</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2021-01-01 00:43:30</td>\n",
       "      <td>2021-01-01 01:11:06</td>\n",
       "      <td>1.0</td>\n",
       "      <td>14.70</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>132</td>\n",
       "      <td>165</td>\n",
       "      <td>1.0</td>\n",
       "      <td>42.00</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.5</td>\n",
       "      <td>8.65</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>51.95</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2021-01-01 00:15:48</td>\n",
       "      <td>2021-01-01 00:31:01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.60</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>138</td>\n",
       "      <td>132</td>\n",
       "      <td>1.0</td>\n",
       "      <td>29.00</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.5</td>\n",
       "      <td>6.05</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>36.35</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2021-01-01 00:31:49</td>\n",
       "      <td>2021-01-01 00:48:21</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.94</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>68</td>\n",
       "      <td>33</td>\n",
       "      <td>1.0</td>\n",
       "      <td>16.50</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.5</td>\n",
       "      <td>4.06</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>24.36</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369760</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:32:04</td>\n",
       "      <td>2021-01-25 08:49:32</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.80</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>135</td>\n",
       "      <td>82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>21.84</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>25.39</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369761</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:34:00</td>\n",
       "      <td>2021-01-25 09:04:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.86</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>42</td>\n",
       "      <td>161</td>\n",
       "      <td>NaN</td>\n",
       "      <td>26.67</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>30.22</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369762</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:37:00</td>\n",
       "      <td>2021-01-25 08:53:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.45</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14</td>\n",
       "      <td>106</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25.29</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>28.84</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369763</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:28:00</td>\n",
       "      <td>2021-01-25 08:50:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>175</td>\n",
       "      <td>216</td>\n",
       "      <td>NaN</td>\n",
       "      <td>28.24</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>31.79</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369764</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:38:00</td>\n",
       "      <td>2021-01-25 08:50:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.93</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>248</td>\n",
       "      <td>168</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.76</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>24.31</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1369765 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         VendorID tpep_pickup_datetime tpep_dropoff_datetime  passenger_count  \\\n",
       "0             1.0  2021-01-01 00:30:10   2021-01-01 00:36:12              1.0   \n",
       "1             1.0  2021-01-01 00:51:20   2021-01-01 00:52:19              1.0   \n",
       "2             1.0  2021-01-01 00:43:30   2021-01-01 01:11:06              1.0   \n",
       "3             1.0  2021-01-01 00:15:48   2021-01-01 00:31:01              0.0   \n",
       "4             2.0  2021-01-01 00:31:49   2021-01-01 00:48:21              1.0   \n",
       "...           ...                  ...                   ...              ...   \n",
       "1369760       NaN  2021-01-25 08:32:04   2021-01-25 08:49:32              NaN   \n",
       "1369761       NaN  2021-01-25 08:34:00   2021-01-25 09:04:00              NaN   \n",
       "1369762       NaN  2021-01-25 08:37:00   2021-01-25 08:53:00              NaN   \n",
       "1369763       NaN  2021-01-25 08:28:00   2021-01-25 08:50:00              NaN   \n",
       "1369764       NaN  2021-01-25 08:38:00   2021-01-25 08:50:00              NaN   \n",
       "\n",
       "         trip_distance  RatecodeID store_and_fwd_flag  PULocationID  \\\n",
       "0                 2.10         1.0                  N           142   \n",
       "1                 0.20         1.0                  N           238   \n",
       "2                14.70         1.0                  N           132   \n",
       "3                10.60         1.0                  N           138   \n",
       "4                 4.94         1.0                  N            68   \n",
       "...                ...         ...                ...           ...   \n",
       "1369760           8.80         NaN                NaN           135   \n",
       "1369761           5.86         NaN                NaN            42   \n",
       "1369762           4.45         NaN                NaN            14   \n",
       "1369763          10.04         NaN                NaN           175   \n",
       "1369764           4.93         NaN                NaN           248   \n",
       "\n",
       "         DOLocationID  payment_type  fare_amount  extra  mta_tax  tip_amount  \\\n",
       "0                  43           2.0         8.00   3.00      0.5        0.00   \n",
       "1                 151           2.0         3.00   0.50      0.5        0.00   \n",
       "2                 165           1.0        42.00   0.50      0.5        8.65   \n",
       "3                 132           1.0        29.00   0.50      0.5        6.05   \n",
       "4                  33           1.0        16.50   0.50      0.5        4.06   \n",
       "...               ...           ...          ...    ...      ...         ...   \n",
       "1369760            82           NaN        21.84   2.75      0.5        0.00   \n",
       "1369761           161           NaN        26.67   2.75      0.5        0.00   \n",
       "1369762           106           NaN        25.29   2.75      0.5        0.00   \n",
       "1369763           216           NaN        28.24   2.75      0.5        0.00   \n",
       "1369764           168           NaN        20.76   2.75      0.5        0.00   \n",
       "\n",
       "         tolls_amount  improvement_surcharge  total_amount  \\\n",
       "0                 0.0                    0.3         11.80   \n",
       "1                 0.0                    0.3          4.30   \n",
       "2                 0.0                    0.3         51.95   \n",
       "3                 0.0                    0.3         36.35   \n",
       "4                 0.0                    0.3         24.36   \n",
       "...               ...                    ...           ...   \n",
       "1369760           0.0                    0.3         25.39   \n",
       "1369761           0.0                    0.3         30.22   \n",
       "1369762           0.0                    0.3         28.84   \n",
       "1369763           0.0                    0.3         31.79   \n",
       "1369764           0.0                    0.3         24.31   \n",
       "\n",
       "         congestion_surcharge  \n",
       "0                         2.5  \n",
       "1                         0.0  \n",
       "2                         0.0  \n",
       "3                         0.0  \n",
       "4                         2.5  \n",
       "...                       ...  \n",
       "1369760                   0.0  \n",
       "1369761                   0.0  \n",
       "1369762                   0.0  \n",
       "1369763                   0.0  \n",
       "1369764                   0.0  \n",
       "\n",
       "[1369765 rows x 18 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pandas_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>VendorID</th>\n",
       "      <th>tpep_pickup_datetime</th>\n",
       "      <th>tpep_dropoff_datetime</th>\n",
       "      <th>passenger_count</th>\n",
       "      <th>trip_distance</th>\n",
       "      <th>RatecodeID</th>\n",
       "      <th>store_and_fwd_flag</th>\n",
       "      <th>PULocationID</th>\n",
       "      <th>DOLocationID</th>\n",
       "      <th>payment_type</th>\n",
       "      <th>fare_amount</th>\n",
       "      <th>extra</th>\n",
       "      <th>mta_tax</th>\n",
       "      <th>tip_amount</th>\n",
       "      <th>tolls_amount</th>\n",
       "      <th>improvement_surcharge</th>\n",
       "      <th>total_amount</th>\n",
       "      <th>congestion_surcharge</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2021-01-01 00:30:10</td>\n",
       "      <td>2021-01-01 00:36:12</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.10</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>142</td>\n",
       "      <td>43</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>11.80</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2021-01-01 00:51:20</td>\n",
       "      <td>2021-01-01 00:52:19</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.20</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>238</td>\n",
       "      <td>151</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.00</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>4.30</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2021-01-01 00:43:30</td>\n",
       "      <td>2021-01-01 01:11:06</td>\n",
       "      <td>1.0</td>\n",
       "      <td>14.70</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>132</td>\n",
       "      <td>165</td>\n",
       "      <td>1.0</td>\n",
       "      <td>42.00</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.5</td>\n",
       "      <td>8.65</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>51.95</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2021-01-01 00:15:48</td>\n",
       "      <td>2021-01-01 00:31:01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.60</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>138</td>\n",
       "      <td>132</td>\n",
       "      <td>1.0</td>\n",
       "      <td>29.00</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.5</td>\n",
       "      <td>6.05</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>36.35</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2021-01-01 00:31:49</td>\n",
       "      <td>2021-01-01 00:48:21</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.94</td>\n",
       "      <td>1.0</td>\n",
       "      <td>N</td>\n",
       "      <td>68</td>\n",
       "      <td>33</td>\n",
       "      <td>1.0</td>\n",
       "      <td>16.50</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.5</td>\n",
       "      <td>4.06</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>24.36</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369760</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:32:04</td>\n",
       "      <td>2021-01-25 08:49:32</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.80</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>135</td>\n",
       "      <td>82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>21.84</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>25.39</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369761</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:34:00</td>\n",
       "      <td>2021-01-25 09:04:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.86</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>42</td>\n",
       "      <td>161</td>\n",
       "      <td>NaN</td>\n",
       "      <td>26.67</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>30.22</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369762</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:37:00</td>\n",
       "      <td>2021-01-25 08:53:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.45</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14</td>\n",
       "      <td>106</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25.29</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>28.84</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369763</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:28:00</td>\n",
       "      <td>2021-01-25 08:50:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>175</td>\n",
       "      <td>216</td>\n",
       "      <td>NaN</td>\n",
       "      <td>28.24</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>31.79</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1369764</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-25 08:38:00</td>\n",
       "      <td>2021-01-25 08:50:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.93</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>248</td>\n",
       "      <td>168</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.76</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>24.31</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1369765 rows x 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         VendorID tpep_pickup_datetime tpep_dropoff_datetime  passenger_count  \\\n",
       "0             1.0  2021-01-01 00:30:10   2021-01-01 00:36:12              1.0   \n",
       "1             1.0  2021-01-01 00:51:20   2021-01-01 00:52:19              1.0   \n",
       "2             1.0  2021-01-01 00:43:30   2021-01-01 01:11:06              1.0   \n",
       "3             1.0  2021-01-01 00:15:48   2021-01-01 00:31:01              0.0   \n",
       "4             2.0  2021-01-01 00:31:49   2021-01-01 00:48:21              1.0   \n",
       "...           ...                  ...                   ...              ...   \n",
       "1369760       NaN  2021-01-25 08:32:04   2021-01-25 08:49:32              NaN   \n",
       "1369761       NaN  2021-01-25 08:34:00   2021-01-25 09:04:00              NaN   \n",
       "1369762       NaN  2021-01-25 08:37:00   2021-01-25 08:53:00              NaN   \n",
       "1369763       NaN  2021-01-25 08:28:00   2021-01-25 08:50:00              NaN   \n",
       "1369764       NaN  2021-01-25 08:38:00   2021-01-25 08:50:00              NaN   \n",
       "\n",
       "         trip_distance  RatecodeID store_and_fwd_flag  PULocationID  \\\n",
       "0                 2.10         1.0                  N           142   \n",
       "1                 0.20         1.0                  N           238   \n",
       "2                14.70         1.0                  N           132   \n",
       "3                10.60         1.0                  N           138   \n",
       "4                 4.94         1.0                  N            68   \n",
       "...                ...         ...                ...           ...   \n",
       "1369760           8.80         NaN                NaN           135   \n",
       "1369761           5.86         NaN                NaN            42   \n",
       "1369762           4.45         NaN                NaN            14   \n",
       "1369763          10.04         NaN                NaN           175   \n",
       "1369764           4.93         NaN                NaN           248   \n",
       "\n",
       "         DOLocationID  payment_type  fare_amount  extra  mta_tax  tip_amount  \\\n",
       "0                  43           2.0         8.00   3.00      0.5        0.00   \n",
       "1                 151           2.0         3.00   0.50      0.5        0.00   \n",
       "2                 165           1.0        42.00   0.50      0.5        8.65   \n",
       "3                 132           1.0        29.00   0.50      0.5        6.05   \n",
       "4                  33           1.0        16.50   0.50      0.5        4.06   \n",
       "...               ...           ...          ...    ...      ...         ...   \n",
       "1369760            82           NaN        21.84   2.75      0.5        0.00   \n",
       "1369761           161           NaN        26.67   2.75      0.5        0.00   \n",
       "1369762           106           NaN        25.29   2.75      0.5        0.00   \n",
       "1369763           216           NaN        28.24   2.75      0.5        0.00   \n",
       "1369764           168           NaN        20.76   2.75      0.5        0.00   \n",
       "\n",
       "         tolls_amount  improvement_surcharge  total_amount  \\\n",
       "0                 0.0                    0.3         11.80   \n",
       "1                 0.0                    0.3          4.30   \n",
       "2                 0.0                    0.3         51.95   \n",
       "3                 0.0                    0.3         36.35   \n",
       "4                 0.0                    0.3         24.36   \n",
       "...               ...                    ...           ...   \n",
       "1369760           0.0                    0.3         25.39   \n",
       "1369761           0.0                    0.3         30.22   \n",
       "1369762           0.0                    0.3         28.84   \n",
       "1369763           0.0                    0.3         31.79   \n",
       "1369764           0.0                    0.3         24.31   \n",
       "\n",
       "         congestion_surcharge  \n",
       "0                         2.5  \n",
       "1                         0.0  \n",
       "2                         0.0  \n",
       "3                         0.0  \n",
       "4                         2.5  \n",
       "...                       ...  \n",
       "1369760                   0.0  \n",
       "1369761                   0.0  \n",
       "1369762                   0.0  \n",
       "1369763                   0.0  \n",
       "1369764                   0.0  \n",
       "\n",
       "[1369765 rows x 18 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "modin_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Faster Append with Modin's ``concat``"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Our previous ``read_csv`` example operated on a relatively small dataframe. In the following example, we duplicate the same taxi dataset 100 times and then concatenate them together.\n",
    "\n",
    "Please note that this quickstart notebook is assumed to be run on a machine that has enough memory in order to be able to perform the operations both with pandas and Modin in a single pipeline (which at least doubles the amount of required memory). If your machine doesn't have enough resources to execute every cell of the notebook and you see an OOM issue, you most likely need to reduce ``N_copies`` in the cell below."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time to concat with pandas: 34.144 seconds\n"
     ]
    }
   ],
   "source": [
    "N_copies= 100\n",
    "start = time.time()\n",
    "\n",
    "big_pandas_df = pandas.concat([pandas_df for _ in range(N_copies)])\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to concat with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time to concat with Modin: 0.564 seconds\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Modin is 60.57x faster than pandas at `concat`!"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "start = time.time()\n",
    "\n",
    "big_modin_df = pd.concat([modin_df for _ in range(N_copies)])\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to concat with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `concat`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The result dataset is around 19GB in size."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[2m\u001b[36m(apply_list_of_funcs pid=73415)\u001b[0m \n",
      "\u001b[2m\u001b[36m(apply_list_of_funcs pid=73416)\u001b[0m \n",
      "<class 'modin.pandas.dataframe.DataFrame'>\n",
      "Int64Index: 136976500 entries, 0 to 1369764\n",
      "Data columns (total 18 columns):\n",
      " #   Column                 Non-Null Count      Dtype         \n",
      "---  ---------------------  ------------------  -----         \n",
      " 0   VendorID               127141300 non-null  float64\n",
      " 1   tpep_pickup_datetime   136976500 non-null  datetime64[ns]\n",
      " 2   tpep_dropoff_datetime  136976500 non-null  datetime64[ns]\n",
      " 3   passenger_count        127141300 non-null  float64\n",
      " 4   trip_distance          136976500 non-null  float64\n",
      " 5   RatecodeID             127141300 non-null  float64\n",
      " 6   store_and_fwd_flag     127141300 non-null  object\n",
      " 7   PULocationID           136976500 non-null  int64\n",
      " 8   DOLocationID           136976500 non-null  int64\n",
      " 9   payment_type           127141300 non-null  float64\n",
      " 10  fare_amount            136976500 non-null  float64\n",
      " 11  extra                  136976500 non-null  float64\n",
      " 12  mta_tax                136976500 non-null  float64\n",
      " 13  tip_amount             136976500 non-null  float64\n",
      " 14  tolls_amount           136976500 non-null  float64\n",
      " 15  improvement_surcharge  136976500 non-null  float64\n",
      " 16  total_amount           136976500 non-null  float64\n",
      " 17  congestion_surcharge   136976500 non-null  float64\n",
      "dtypes: float64(13), datetime64[ns](2), int64(2), object(1)\n",
      "memory usage: 19.4 GB\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "UserWarning: Distributing <class 'int'> object. This may take some time.\n"
     ]
    }
   ],
   "source": [
    "big_modin_df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Faster ``apply`` over a single column\n",
    "\n",
    "The performance benefits of Modin becomes aparent when we operate on large gigabyte-scale datasets. For example, let's say that we want to round up the number across a single column via the ``apply`` operation. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time to apply with pandas: 43.969 seconds\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "rounded_trip_distance_pandas = big_pandas_df[\"trip_distance\"].apply(round)\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to apply with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time to apply with Modin: 1.225 seconds\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Modin is 35.88x faster than pandas at `apply` on one column!"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "start = time.time()\n",
    "\n",
    "rounded_trip_distance_modin = big_modin_df[\"trip_distance\"].apply(round)\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to apply with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `apply` on one column!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Summary\n",
    "\n",
    "Hopefully, this tutorial demonstrated how Modin delivers significant speedup on pandas operations without the need for any extra effort. Throughout example, we moved from working with 100MBs of data to 20GBs of data all without having to change anything or manually optimize our code to achieve the level of scalable performance that Modin provides.\n",
    "\n",
    "Note that in this quickstart example, we've only shown ``read_csv``, ``concat``, ``apply``, but these are not the only pandas operations that Modin optimizes for. In fact, Modin covers [more than 90% of the pandas API](https://github.com/modin-project/modin/blob/main/README.md#pandas-api-coverage), yielding considerable speedups for many common operations."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/spreadsheet/requirements.txt
================================================
ray==1.1.0
git+https://github.com/modin-project/modin
git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5

================================================
FILE: examples/spreadsheet/tutorial.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../tutorial/tutorial_notebooks/img/MODIN_ver2_hrz.png)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## modin.spreadsheet\n",
    "`modin.spreadsheet` is a Jupyter notebook widget that allows users to interact with Modin DataFrames in a spreadsheet-like fashion while taking advantage of the underlying capabilities of Modin. The widget makes it quick and easy to explore, sort, filter, edit data and export reproducible code. \n",
    "\n",
    "This tutorial will showcase how to use `modin.spreadsheet`. Before starting, please install the required packages using `pip install -r requirements.txt` in the current directory. Then just run the cells; no editing required!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Please install the required packages using `pip install -r requirements.txt` in the current directory\n",
    "# For all ways to install Modin see official documentation at:\n",
    "# https://modin.readthedocs.io/en/latest/installation.html\n",
    "import modin.pandas as pd\n",
    "import modin.spreadsheet as mss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create a Modin DataFrame\n",
    "The following cells creates a DataFrame using a NYC taxi dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "columns_names = [\n",
    "        \"trip_id\", \"vendor_id\", \"pickup_datetime\", \"dropoff_datetime\", \"store_and_fwd_flag\",\n",
    "        \"rate_code_id\", \"pickup_longitude\", \"pickup_latitude\", \"dropoff_longitude\", \"dropoff_latitude\",\n",
    "        \"passenger_count\", \"trip_distance\", \"fare_amount\", \"extra\", \"mta_tax\", \"tip_amount\",\n",
    "        \"tolls_amount\", \"ehail_fee\", \"improvement_surcharge\", \"total_amount\", \"payment_type\",\n",
    "        \"trip_type\", \"pickup\", \"dropoff\", \"cab_type\", \"precipitation\", \"snow_depth\", \"snowfall\",\n",
    "        \"max_temperature\", \"min_temperature\", \"average_wind_speed\", \"pickup_nyct2010_gid\",\n",
    "        \"pickup_ctlabel\", \"pickup_borocode\", \"pickup_boroname\", \"pickup_ct2010\",\n",
    "        \"pickup_boroct2010\", \"pickup_cdeligibil\", \"pickup_ntacode\", \"pickup_ntaname\", \"pickup_puma\",\n",
    "        \"dropoff_nyct2010_gid\", \"dropoff_ctlabel\", \"dropoff_borocode\", \"dropoff_boroname\",\n",
    "        \"dropoff_ct2010\", \"dropoff_boroct2010\", \"dropoff_cdeligibil\", \"dropoff_ntacode\",\n",
    "        \"dropoff_ntaname\", \"dropoff_puma\",\n",
    "    ]\n",
    "parse_dates=[\"pickup_datetime\", \"dropoff_datetime\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('s3://modin-datasets/trips_data.csv', names=columns_names,\n",
    "                header=None, parse_dates=parse_dates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generate a spreadsheet widget with the DataFrame\n",
    "`mss.from_dataframe` takes in a DataFrame, optional configuration options, and returns a `SpreadsheetWidget`, which contains all the logic for displaying the spreadsheet view of the DataFrame. The object returned will not be rendered unless displayed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "spreadsheet = mss.from_dataframe(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Displaying the Spreadsheet\n",
    "The widget is displayed when the widget is returned by an input cell or passed to the `display` function e.g. `display(spreadsheet)`. When displayed, the SpreadsheetWidget will generate a transformation history cell that contains a record of the transformations applied to the DataFrame unless the cell already exists or the feature is disabled.\n",
    "\n",
    "### Basic Usage\n",
    "`from_dataframe` creates a copy of the input DataFrame, so changes do not alter the original DataFrame.\n",
    "\n",
    "**Filter** - Each column can be filtered according to its datatype using the filter button to the right of the column header. Any number of columns can be filtered simultaneously.\\\n",
    "**Sort** - Each column can be sorted by clicking on the column header. Assumptions on the order of the data should only be made according to the latest sort i.e. the 2nd last sort may not be in order even if grouped by the duplicates in the last sorted column.\\\n",
    "**Cell Edit** - Double click on a cell to edit its value.\\\n",
    "**Add Row**(toolbar) - Click on the `Add Row` button in the toolbar to duplicate the last row in the DataFrame.\\\n",
    "**Remove Row**(toolbar) - Select row(s) on the spreadsheet and click the `Remove Row` button in the toolbar to remove them.\\\n",
    "**Reset Filters**(toolbar) - Click on the `Reset Filters` button in the toolbar to remove all filters on the data.\\\n",
    "**Reset Sort**(toolbar) - Click on the `Reset Sort` button in the toolbar to remove any sorting on the data.\n",
    "\n",
    "### Transformation History and Reproducible Code\n",
    "The widget records the history of transformations, such as filtering, that occur on the spreadsheet. These transformations are updated in the `spreadsheet transformation history` cell as they happen and can be easily copied for reproducibility. The history can be cleared using the `Clear History` button in the toolbar.\n",
    "\n",
    "**Try making some changes to the spreadsheet!**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "spreadsheet"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exporting Changes\n",
    "`to_dataframe` takes in a `SpreadsheetWidget` and returns a copy of the DataFrame reflecting the current state of the UI on the widget. Specifically, any filters, edits, or sorts will be applied on the returned Dataframe.\n",
    "\n",
    "**Export a DataFrame after making some changes on the spreadsheet UI**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "changed_df = mss.to_dataframe(spreadsheet)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "changed_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SpreadsheetWidget API\n",
    "The API on `SpreadsheetWidget` allows users to replicate some of the functionality on the GUI, but also provides other functionality such as applying the transformation history on another DataFrame or getting the DataFrame that matches the spreadsheet state like `to_dataframe`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Duplicates the `Reset Filters` button\n",
    "spreadsheet.reset_filters()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Duplicates the `Reset Sort` button\n",
    "spreadsheet.reset_sort()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "qgrid6f69f373-ae0e-423e-8e26-429f52e1669d": true
   },
   "outputs": [],
   "source": [
    "# Duplicates the `Clear History` button\n",
    "spreadsheet.clear_history()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gets the modified DataFrame that matches the changes to the spreadsheet\n",
    "# This is the same functionality as `mss.to_dataframe`\n",
    "spreadsheet.get_changed_df()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Retrieving and Applying Transformation History \n",
    "The transformation history can be retrieved as a list of code snippets using the `get_history` API. The `apply_history` API will apply the transformations on the input DataFrame and return the resultant DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "spreadsheet.get_history()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "another_df = df.copy()\n",
    "spreadsheet.apply_history(another_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Additional Example\n",
    "Here is another example of how to use `from_dataframe` with configuration options."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mss.from_dataframe(df, show_toolbar=False, grid_options={'forceFitColumns': False, 'editable': False, 'highlightSelectedCell': True})"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}


================================================
FILE: examples/tutorial/README.md
================================================
# Modin tutorial Jupyter Notebooks

Tutorial for how to use different features of Modin.


================================================
FILE: examples/tutorial/jupyter/README.md
================================================
# Jupyter notebook examples to run with Modin

Currently we provide tutorial notebooks for the following execution backends:

- [PandasOnRay](https://modin.readthedocs.io/en/latest/development/using_pandas_on_ray.html)
- [PandasOnDask](https://modin.readthedocs.io/en/latest/development/using_pandas_on_dask.html)
- [PandasOnMPI through unidist](https://modin.readthedocs.io/en/latest/development/using_pandas_on_mpi.html)

## Creating a development environment

To get required dependencies for `PandasOnRay`, `PandasOnDask` and `PandasOnUnidist` Jupyter Notebooks
you should create a development environment with `pip`
using `requirements.txt` file located in the respective directory:

```bash
pip install -r execution/pandas_on_ray/requirements.txt
```

to install dependencies needed to run notebooks with Modin on `PandasOnRay` execution or

```bash
pip install -r execution/pandas_on_dask/requirements.txt
```

to install dependencies needed to run notebooks with Modin on `PandasOnDask` execution or

```bash
pip install -r execution/pandas_on_unidist/requirements.txt
```

to install dependencies needed to run notebooks with Modin on `PandasOnUnidist` execution.

**Note:** Sometimes pip is installing every version of a package. If you encounter that issue,
please install every package listed in `requirements.txt` file individually with `pip install <package>`.

## Run Jupyter Notebooks

A Jupyter Notebook server can be run from the current directory as follows:

```bash
jupyter notebook
```

Navigate to a concrete notebook (for example, to the `execution/pandas_on_ray/local/exercise_1.ipynb`).

**Note:** Since there are some specifics regarding the run of jupyter notebooks with the `Unidist` engine,
refer to [PandasOnUnidist](https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_unidist/README.md) document
to get more information on the matter.

================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_dask/Dockerfile
================================================
FROM continuumio/miniconda3

RUN conda install -c conda-forge psutil setproctitle
RUN pip install -r requirements-dev.txt


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_dask/cluster/exercise_5.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<h1>Scale your pandas workflows by changing one line of code</h2>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 5: Setting up cluster environment\n",
    "\n",
    "**GOAL**: Learn how to set up a Dask cluster for Modin, connect Modin to a Dask cluster and run pandas queries on a cluster.\n",
    "\n",
    "**NOTE**: This exercise has extra requirements. Read instructions carefully before attempting. \n",
    "\n",
    "**This exercise instructs users on how to start a 500+ core Dask cluster, and it is not shut down until the end of exercise.**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Often in practice we have a need to exceed the capabilities of a single machine. Modin works and performs well \n",
    "in both local mode and in a cluster environment. The key advantage of Modin is that your python code does not \n",
    "change between local development and cluster execution. Users are not required to think about how many workers \n",
    "exist or how to distribute and partition their data; Modin handles all of this seamlessly and transparently.\n",
    "\n",
    "![Cluster](../../../img/modin_cluster.png)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extra requirements for AWS authentication\n",
    "\n",
    "First of all, install the necessary dependencies in your environment:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install dask_cloudprovider[aws]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The next step is to setup your AWS credentials, namely, set ``AWS_ACCESS_KEY_ID``, ``AWS_SECRET_ACCESS_KEY``\n",
    "and ``AWS_SESSION_TOKEN`` (Optional) (refer to [AWS CLI environment variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html) to get more insight on this):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"AWS_ACCESS_KEY_ID\"] = \"<aws_access_key_id>\"\n",
    "os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"<aws_secret_access_key>\"\n",
    "os.environ[\"AWS_SESSION_TOKEN\"] = \"<aws_session_token>\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Starting and connecting to the cluster\n",
    "\n",
    "This example starts 1 scheduler node (m5.24xlarge) and 6 worker nodes (m5.24xlarge), 576 total CPUs. Keep in mind the scheduler node manages cluster operation but doesn't perform any execution.\n",
    "\n",
    "You can check the [Amazon EC2 pricing](https://aws.amazon.com/ec2/pricing/on-demand/) page.\n",
    "\n",
    "Dask cluster can be deployed in different ways (refer to [Dask documentaion](https://docs.dask.org/en/latest/deploying.html) to get more information about it), but in this tutorial we will use the ``EC2Cluster`` from [dask_cloudprovider](https://cloudprovider.dask.org/en/latest/) to create and initialize a Dask cluster on Amazon Web Service (AWS).\n",
    "\n",
    "**Note**: EC2Cluster uses a docker container to run the scheduler and each of the workers. Probably you need to use another docker image depending on your python version and requirements. You can find more docker-images on [daskdev](https://hub.docker.com/u/daskdev) page.\n",
    "\n",
    "In the next cell you can see how the EC2Cluster is being created. <b>Set your ``key_name`` and modify AWS settings as required before running it.</b>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dask_cloudprovider.aws import EC2Cluster\n",
    "\n",
    "n_workers = 6\n",
    "cluster = EC2Cluster(\n",
    "    # AWS parameters\n",
    "    key_name = \"\", # set your keyname\n",
    "    region = \"us-west-2\",\n",
    "    availability_zone = [\"us-west-2a\"],\n",
    "    ami = \"ami-0387d929287ab193e\",\n",
    "    instance_type = \"m5.24xlarge\",\n",
    "    vpc = \"vpc-002bd14c63f227832\",\n",
    "    subnet_id = \"subnet-09860dafd79720938\",\n",
    "    filesystem_size = 200, # in GB\n",
    "\n",
    "    # DASK parameters\n",
    "    n_workers = n_workers,\n",
    "    docker_image = \"daskdev/dask:latest\",\n",
    "    debug = True,\n",
    "    security=False,\n",
    ")\n",
    "\n",
    "scheduler_adress = cluster.scheduler_address\n",
    "print(f\"Scheduler IP address of Dask cluster: {scheduler_adress}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After creating the cluster you need to connect to it. To do this you should put the ``EC2Cluster`` instance or the scheduler IP address in ``distributed.Client``.\n",
    "\n",
    "When you connect to the cluster, the workers may not be initialized yet, so you need to wait for them using ``client.wait_for_workers``.\n",
    "\n",
    "Then you can call ``client.ncores()`` and check which workers are available and how many threads are used for each of them."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from distributed import Client\n",
    "\n",
    "client = Client(cluster)\n",
    "# Or use an IP address connection if the cluster instance is unavailable:\n",
    "# client = Client(f\"{scheduler_adress}:8687\")\n",
    "\n",
    "client.wait_for_workers(n_workers)\n",
    "client.ncores()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After successful initialization of the cluster, you need to configure it.\n",
    "\n",
    "You can use plugins to install any requirements into workers:\n",
    "* [InstallPlugin](https://distributed.dask.org/en/stable/plugins.html#distributed.diagnostics.plugin.InstallPlugin)\n",
    "* [PipInstall](https://distributed.dask.org/en/stable/plugins.html#distributed.diagnostics.plugin.PipInstall)\n",
    "* [CondaInstall](https://distributed.dask.org/en/stable/plugins.html#distributed.diagnostics.plugin.CondaInstall).\n",
    "\n",
    "You have to install Modin package on each worker using ``PipInstall`` plugin."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dask.distributed import PipInstall\n",
    "\n",
    "client.register_plugin(PipInstall(packages=[\"modin\"]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you need an additional workers configuration, you can create your own [WorkerPlugin](https://distributed.dask.org/en/stable/plugins.html#worker-plugins) or function that will be executed on each worker upon calling ``client.run()``.\n",
    "\n",
    "**NOTE**: Dask cluster does not check if this plugin or function has been called before. Therefore, you need to take this into account when using them.\n",
    "\n",
    "In this tutorial a CSV file will be read, so you need to download it to each of the workers and local machine with the same global path."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dask.distributed import Worker\n",
    "import os\n",
    "import urllib\n",
    "\n",
    "def dataset_upload(file_url, file_path):\n",
    "    try:\n",
    "        dir_name = os.path.dirname(file_path)\n",
    "        if not os.path.exists(dir_name):\n",
    "            os.makedirs(dir_name)\n",
    "        if os.path.exists(file_path):\n",
    "            return \"File has already existed.\"\n",
    "        else:\n",
    "            urllib.request.urlretrieve(file_url, file_path)\n",
    "        return \"OK\"\n",
    "    except Exception as ex:\n",
    "        return str(ex)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Set the directory where it should be downloaded (the local directory will be used by default):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "directory_path = \"./\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then you need to run `dataset_upload` function on all workers. As the result, you will get a dictionary, where the result of the function execution will be for each workers:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = os.path.join(os.path.abspath(directory_path), \"taxi.csv\")\n",
    "client.run(dataset_upload, \"https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\", file_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You have to also execute this function on the local machine:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_upload(\"https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\", file_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>Congratulations! The cluster is now fully configured and we can start running Pandas queries.</b>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Executing in a cluster environment\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Same as local mode Modin on cluster uses Ray as an execution engine by default so no additional action is required to start to use it. Alternatively, if you need to use another engine, it should be specified either by setting the Modin config or by setting Modin environment variable before the first operation with Modin as it is shown below. Also, note that the full list of Modin configs and corresponding environment variables can be found in the [Modin Configuration Settings](https://modin.readthedocs.io/en/stable/flow/modin/config.html#modin-configs-list) section of the Modin documentation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Modin engine can be specified either by config\n",
    "import modin.config as cfg\n",
    "cfg.Engine.put(\"dask\")\n",
    "\n",
    "# or by setting the environment variable\n",
    "# import os\n",
    "# os.environ[\"MODIN_ENGINE\"] = \"dask\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now you can use Modin on the Dask cluster.\n",
    "\n",
    "Let's read the downloaded CSV file and execute such pandas operations as count, groupby and map:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import time\n",
    "\n",
    "t0 = time.perf_counter()\n",
    "\n",
    "df = pd.read_csv(file_path, quoting=3)\n",
    "df_count = df.count()\n",
    "df_groupby_count = df.groupby(\"passenger_count\").count()\n",
    "df_map = df.map(str)\n",
    "\n",
    "t1 = time.perf_counter()\n",
    "print(f\"Full script time is {(t1 - t0):.3f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Shutting down the cluster\n",
    "\n",
    "Now that we have finished computation, we can shut down the cluster:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cluster.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### This ends the cluster exercise"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_1.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 1: How to use Modin\n",
    "\n",
    "**GOAL**: Learn how to import Modin to accelerate and scale pandas workflows."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modin is a drop-in replacement for pandas that distributes the computation \n",
    "across all of the cores in your machine or in a cluster.\n",
    "In practical terms, this means that you can continue using the same pandas scripts\n",
    "as before and expect the behavior and results to be the same. The only thing that needs\n",
    "to change is the import statement. Normally, you would change:\n",
    "\n",
    "```python\n",
    "import pandas as pd\n",
    "```\n",
    "\n",
    "to:\n",
    "\n",
    "```python\n",
    "import modin.pandas as pd\n",
    "```\n",
    "\n",
    "Changing this line of code will allow you to use all of the cores in your machine to do computation on your data. One of the major performance bottlenecks of pandas is that it only uses a single core for any given computation. Modin exposes an API that is identical to pandas, allowing you to continue interacting with your data as you would with pandas. There are no additional commands required to use Modin locally. Partitioning, scheduling, data transfer, and other related concerns are all handled by Modin under the hood."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<p style=\"text-align:left;\">\n",
    "        <h1>pandas on a multicore laptop\n",
    "    <span style=\"float:right;\">\n",
    "        Modin on a multicore laptop\n",
    "    </span>\n",
    "\n",
    "<div>\n",
    "<img align=\"left\" src=\"../../../img/pandas_multicore.png\"><img src=\"../../../img/modin_multicore.png\">\n",
    "</div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concept for exercise: setting Modin engine\n",
    "\n",
    "Modin uses Ray as an execution engine by default so no additional action is required to start to use it. Alternatively, if you need to use another engine, it should be specified either by setting the Modin config or by setting Modin environment variable before the first operation with Modin as it is shown below. Also, note that the full list of Modin configs and corresponding environment variables can be found in the [Modin Configuration Settings](https://modin.readthedocs.io/en/stable/flow/modin/config.html#modin-configs-list) section of the Modin documentation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Modin engine can be specified either by config\n",
    "import modin.config as cfg\n",
    "cfg.Engine.put(\"dask\")\n",
    "\n",
    "# or by setting the environment variable\n",
    "# import os\n",
    "# os.environ[\"MODIN_ENGINE\"] = \"dask\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concept for exercise: Dataframe constructor\n",
    "\n",
    "Often when playing around in pandas, it is useful to create a DataFrame with the constructor. That is where we will start.\n",
    "\n",
    "```python\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\n",
    "df = pd.DataFrame(frame_data)\n",
    "```\n",
    "\n",
    "When creating a dataframe from a non-distributed object, it will take extra time to partition the data. When this is happening, you will see this message:\n",
    "\n",
    "```\n",
    "UserWarning: Distributing <class 'numpy.ndarray'> object. This may take some time.\n",
    "```\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Note: Do not change this code!\n",
    "import numpy as np\n",
    "import pandas\n",
    "import sys\n",
    "import modin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Implement your answer here. You are also free to play with the size\n",
    "# and shape of the DataFrame, but beware of exceeding your memory!\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\n",
    "df = pd.DataFrame(frame_data)\n",
    "\n",
    "# ***** Do not change the code below! It verifies that \n",
    "# ***** the exercise has been done correctly. *****\n",
    "\n",
    "try:\n",
    "    assert df is not None\n",
    "    assert frame_data is not None\n",
    "    assert isinstance(frame_data, np.ndarray)\n",
    "except:\n",
    "    raise AssertionError(\"Don't change too much of the original code!\")\n",
    "assert \"modin.pandas\" in sys.modules, \"Not quite correct. Remember the single line of code change (See above)\"\n",
    "\n",
    "import modin.pandas\n",
    "assert pd == modin.pandas, \"Remember the single line of code change (See above)\"\n",
    "assert hasattr(df, \"_query_compiler\"), \"Make sure that `df` is a modin.pandas DataFrame.\"\n",
    "\n",
    "print(\"Success! You only need to change one line of code!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that we have created a toy example for playing around with the DataFrame, let's print it out in different ways."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concept for Exercise: Data Interaction and Printing\n",
    "\n",
    "When interacting with data, it is very imporant to look at different parts of the data (e.g. `df.head()`). Here we will show that you can print the modin.pandas DataFrame in the same ways you would pandas."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print the first 10 lines.\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print the DataFrame.\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Free cell for custom interaction (Play around here!)\n",
    "df.add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Please move on to [Exercise 2](./exercise_2.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_2.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 2: Speed improvements\n",
    "\n",
    "**GOAL**: Learn about common functionality that Modin speeds up by using all of your machine's cores."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for Exercise: `read_csv` speedups\n",
    "\n",
    "The most commonly used data ingestion method used in pandas is CSV files (link to pandas survey). This concept is designed to give an idea of the kinds of speedups possible, even on a non-distributed filesystem. Modin also supports other file formats for parallel and distributed reads, which can be found in the documentation. We will import both Modin and pandas so that the speedups are evident.\n",
    "\n",
    "**Note: Rerunning the `read_csv` cells many times may result in degraded performance, depending on the memory of the machine**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import time\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "def printmd(string):\n",
    "    display(Markdown(string))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dataset: 2015 NYC taxi trip data\n",
    "\n",
    "\n",
    "We will be using a version of this data already in S3, originally posted in this blog post: https://matthewrocklin.com/blog/work/2017/01/12/dask-dataframes\n",
    "\n",
    "**Size: ~1.8GB**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modin execution engine setting:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.config as cfg\n",
    "cfg.Engine.put(\"dask\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## `pandas.read_csv`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_df = pandas.read_csv(path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to read with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Expect pandas to take >3 minutes on EC2, longer locally\n",
    "\n",
    "This is a good time to chat with your neighbor\n",
    "Dicussion topics\n",
    "- Do you work with a large amount of data daily?\n",
    "- How big is your data?\n",
    "- What’s the common use case of your data?\n",
    "- Do you use any big data analytics tools?\n",
    "- Do you use any interactive analytics tool?\n",
    "- What’s are some drawbacks of your current interative analytic tools today?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## `modin.pandas.read_csv`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_df = pd.read_csv(path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to read with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `read_csv`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Reduces\n",
    "\n",
    "In pandas, a reduce would be something along the lines of a `sum` or `count`. It computes some summary statistics about the rows or columns. We will be using `count`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_count = pandas_df.count()\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "\n",
    "print(\"Time to count with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_count = modin_df.count()\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to count with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `count`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_count"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Map operations\n",
    "\n",
    "In pandas, map operations are operations that do a single pass over the data and do not change its shape. Operations like `isnull` and `applymap` are included in this. We will be using `isnull`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_isnull = pandas_df.isnull()\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "\n",
    "print(\"Time to isnull with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_isnull = modin_df.isnull()\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to isnull with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `isnull`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_isnull"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_isnull"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Apply over a single column\n",
    "\n",
    "Sometimes we want to compute some summary statistics on a single column from our dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "rounded_trip_distance_pandas = pandas_df[\"trip_distance\"].apply(round)\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to groupby with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "rounded_trip_distance_modin = modin_df[\"trip_distance\"].apply(round)\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to add a column with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `apply` on one column!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rounded_trip_distance_pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rounded_trip_distance_modin"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Add a column\n",
    "\n",
    "It is common to need to add a new column to an existing dataframe, here we show that this is significantly faster in Modin due to metadata management and an efficient zero copy implementation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "pandas_df[\"rounded_trip_distance\"] = rounded_trip_distance_pandas\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to groupby with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_df[\"rounded_trip_distance\"] = rounded_trip_distance_modin\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to add a column with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas add a column!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Please move on to [Exercise 3](./exercise_3.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_3.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 3: Not Implemented\n",
    "\n",
    "**GOAL**: Learn what happens when a function is not yet supported in Modin as well as how to extend Modin's functionality using the DataFrame Algebra."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "When functionality has not yet been implemented, we default to pandas\n",
    "\n",
    "![](../../../img/convert_to_pandas.png)\n",
    "\n",
    "We convert a Modin dataframe to pandas to do the operation, then convert it back once it is finished. These operations will have a high overhead due to the communication involved and will take longer than pandas.\n",
    "\n",
    "When this is happening, a warning will be given to the user to inform them that this operation will take longer than usual. For example, `DataFrame.mask` is not yet implemented. In this case, when a user tries to use it, they will see this warning:\n",
    "\n",
    "```\n",
    "UserWarning: `DataFrame.mask` defaulting to pandas implementation.\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Default to pandas\n",
    "\n",
    "In this section of the exercise we will see first-hand how the runtime is affected by operations that are not implemented."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import numpy as np\n",
    "import time\n",
    "import modin.config as cfg\n",
    "cfg.Engine.put(\"dask\")\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**18, 2**8))\n",
    "df = pd.DataFrame(frame_data).add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df = pandas.DataFrame(frame_data).add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_start = time.time()\n",
    "\n",
    "print(df.mask(df < 50))\n",
    "\n",
    "modin_end = time.time()\n",
    "print(\"Modin mask took {} seconds.\".format(round(modin_end - modin_start, 4)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_start = time.time()\n",
    "\n",
    "print(pandas_df.mask(pandas_df < 50))\n",
    "\n",
    "pandas_end = time.time()\n",
    "print(\"pandas mask took {} seconds.\".format(round(pandas_end - pandas_start, 4)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Register custom functions\n",
    "\n",
    "Modin's user-facing API is pandas, but it is possible that we do not yet support your favorite or most-needed functionalities. Your user-defined function may also be able to be executed more efficiently if you pre-define the type of function it is (e.g. map, reduce, etc.) using the DataFrame Algebra. To solve either case, it is possible to register a custom function to be applied to your data."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Registering a custom function for all query compilers\n",
    "\n",
    "To register a custom function for a query compiler, we first need to import it:\n",
    "\n",
    "```python\n",
    "from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n",
    "```\n",
    "\n",
    "The `PandasQueryCompiler` is responsible for defining and compiling the queries that can be operated on by Modin, and is specific to the pandas storage format. Any queries defined here must also both be compatible with and result in a `pandas.DataFrame`. Many functionalities are very simply implemented, as you can see in the current code: [Link](https://github.com/modin-project/modin/blob/7a8158873e77cb5f1a5a3b89be4ddac89f576269/modin/core/storage_formats/pandas/query_compiler.py#L216).\n",
    "\n",
    "If we want to register a new function, we need to understand what kind of function it is. In our example, we will try to implement a `kurtosis` on the unary negation of the values in the dataframe, which is a map (unargy negation of each cell) followed by a reduce. So we next want to import the function type so we can use it in our definition:\n",
    "\n",
    "```python\n",
    "from modin.core.dataframe.algebra import TreeReduce\n",
    "```\n",
    "\n",
    "Then we can just use the `TreeReduce.register` `classmethod` and assign it to the `PandasQueryCompiler`:\n",
    "\n",
    "```python\n",
    "PandasQueryCompiler.neg_kurtosis = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value, pandas.DataFrame.kurtosis)\n",
    "```\n",
    "\n",
    "We include `**kwargs` to the `lambda` function since the query compiler will pass all keyword arguments to both the map and reduce functions.\n",
    "\n",
    "Finally, we want a handle to it from the `DataFrame`, so we need to create a way to do that:\n",
    "\n",
    "```python\n",
    "def neg_kurtosis_func(self, **kwargs):\n",
    "    # The constructor allows you to pass in a query compiler as a keyword argument\n",
    "    return self.__constructor__(query_compiler=self._query_compiler.neg_kurtosis(**kwargs))\n",
    "\n",
    "pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\n",
    "```\n",
    "\n",
    "And then you can use it like you usually would:\n",
    "\n",
    "```python\n",
    "df.neg_kurtosis_custom()\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n",
    "from modin.core.dataframe.algebra import TreeReduce"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "PandasQueryCompiler.neg_kurtosis_custom = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value,\n",
    "                                                             pandas.DataFrame.kurtosis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pandas._libs import lib\n",
    "# The function signature came from the pandas documentation:\n",
    "# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurtosis.html\n",
    "def neg_kurtosis_func(self, axis=lib.no_default, skipna=True, level=None, numeric_only=None, **kwargs):\n",
    "    # We need to specify the axis for the query compiler\n",
    "    if axis in [None, lib.no_default]:\n",
    "        axis = 0\n",
    "    # The constructor allows you to pass in a query compiler as a keyword argument\n",
    "    # Reduce dimension is used for reduces\n",
    "    # We also pass all keyword arguments here to ensure correctness\n",
    "    return self._reduce_dimension(\n",
    "        self._query_compiler.neg_kurtosis_custom(\n",
    "            axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, **kwargs\n",
    "        )\n",
    "    )\n",
    "\n",
    "pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Speed improvements\n",
    "If we were to try and replicate this functionality using the pandas API, we would need to call `df.applymap` with our unary negation function, and subsequently `df.kurtosis` on the result of the first call. Let's see how this compares with our new, custom function!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "print(pandas_df.applymap(lambda cell_value: ~cell_value).kurtosis())\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"pandas unary negation kurtosis took {} seconds.\".format(pandas_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "print(df.applymap(lambda x: ~x).kurtosis())\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Modin unary negation kurtosis took {} seconds.\".format(modin_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "custom_start = time.time()\n",
    "\n",
    "print(df.neg_kurtosis_custom())\n",
    "\n",
    "custom_end = time.time()\n",
    "modin_custom_duration = custom_end - custom_start\n",
    "print(\"Modin neg_kurtosis_custom took {} seconds.\".format(modin_custom_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import Markdown, display\n",
    "\n",
    "display(Markdown(\"### As expected, Modin is {}x faster than pandas when chaining the functions; however we see that our custom function is even faster than that - beating pandas by {}x, and Modin (when chaining the functions) by {}x!\".format(round(pandas_duration / modin_duration, 2), round(pandas_duration / modin_custom_duration, 2), round(modin_duration / modin_custom_duration, 2))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Congratulations! You have just implemented new DataFrame functionality!\n",
    "\n",
    "## Consider opening a pull request: https://github.com/modin-project/modin/pulls\n",
    "\n",
    "For a complete list of what is implemented, see the [Supported APIs](https://modin.readthedocs.io/en/latest/supported_apis/index.html) section."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test your knowledge: Add a custom function for another tree reduce: finding `DataFrame.mad` after squaring all of the values\n",
    "\n",
    "See the pandas documentation for the correct signature: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mad.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_mad_custom_start = time.time()\n",
    "\n",
    "# Implement your function here! Put the result of your custom squared `mad` in the variable `modin_mad_custom`\n",
    "# Hint: Look at the kurtosis walkthrough above\n",
    "\n",
    "modin_mad_custom = ...\n",
    "print(modin_mad_custom)\n",
    "\n",
    "modin_mad_custom_end = time.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluation code, do not change!\n",
    "modin_mad_start = time.time()\n",
    "modin_mad = df.applymap(lambda x: x**2).mad()\n",
    "print(modin_mad)\n",
    "modin_mad_end = time.time()\n",
    "\n",
    "assert modin_mad_end - modin_mad_start > modin_mad_custom_end - modin_mad_custom_start, \\\n",
    "    \"Your implementation was too slow, or you used the chaining functions approach. Try again\"\n",
    "assert modin_mad._to_pandas().equals(modin_mad_custom._to_pandas()), \"Your result did not match the result of chaining the functions, try again\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now that you are able to create custom functions, you know enough to contribute to Modin!\n",
    "\n",
    "**Please move on to [Exercise 4](./exercise_4.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_4.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "99f41d2d",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fdda1c9c",
   "metadata": {},
   "source": [
    "# Exercise 4: Experimental Features\n",
    "\n",
    "**GOAL**: Explore some of the experimental features being added to Modin."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7bf87a5",
   "metadata": {},
   "source": [
    "### Concept for exercise: Spreadsheet\n",
    "\n",
    "For those who have worked with Excel, the Spreadsheet API will definitely feel familiar! The Spreadsheet API is a Jupyter notebook widget that allows us to interact with Modin DataFrames in a spreadsheet-like fashion while taking advantage of the underlying capabilities of Modin. The widget makes it quick and easy to explore, sort, filter, and edit data as well as export the changes as reproducible code.\n",
    "\n",
    "Let's look back at a subset of the 2015 NYC Taxi Data from Exercise 2, and see how the Spreadsheet API can make it easy to play with the data!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d5c4a3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "!jupyter nbextension enable --py --sys-prefix modin_spreadsheet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc8d5903",
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import modin.experimental.spreadsheet as mss\n",
    "from modin.config import Engine\n",
    "Engine.put(\"dask\")\n",
    "\n",
    "s3_path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"\n",
    "modin_df = pd.read_csv(s3_path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3, nrows=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "145e7bbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "spreadsheet = mss.from_dataframe(modin_df)\n",
    "spreadsheet"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c18b7f2",
   "metadata": {},
   "source": [
    "### Thank you for participating!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_dask/requirements.txt
================================================
fsspec>=2022.11.0
jupyterlab
ipywidgets
modin[dask]
modin[spreadsheet]


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import os
import sys

import nbformat

MODIN_DIR = os.path.abspath(
    os.path.join(os.path.dirname(__file__), *[".." for _ in range(6)])
)
sys.path.insert(0, MODIN_DIR)
from examples.tutorial.jupyter.execution.test.utils import (  # noqa: E402
    _execute_notebook,
    _replace_str,
    download_taxi_dataset,
    test_dataset_path,
)

local_notebooks_dir = "examples/tutorial/jupyter/execution/pandas_on_dask/local"


# in this notebook user should replace 'import pandas as pd' with
# 'import modin.pandas as pd' to make notebook work
def test_exercise_1():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_1_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_1.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    _replace_str(nb, "import pandas as pd", "import modin.pandas as pd")

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


# this notebook works "as is" but for testing purposes we can use smaller dataset
def test_exercise_2():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_2_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_2.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    new_cell = f'path = "{test_dataset_path}"\n' + download_taxi_dataset

    _replace_str(
        nb,
        'path = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv"',
        new_cell,
    )

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


# in this notebook user should add custom mad implementation
# to make notebook work
def test_exercise_3():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_3_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_3.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    user_mad_implementation = """PandasQueryCompiler.sq_mad_custom = TreeReduce.register(lambda cell_value, **kwargs: cell_value ** 2,
                                                             pandas.DataFrame.mad)

def sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):
    if axis is None:
        axis = 0

    return self._reduce_dimension(
        self._query_compiler.sq_mad_custom(
            axis=axis, skipna=skipna, level=level, **kwargs
        )
    )

pd.DataFrame.sq_mad_custom = sq_mad_func

modin_mad_custom = df.sq_mad_custom()
    """

    _replace_str(nb, "modin_mad_custom = ...", user_mad_implementation)

    nbformat.write(nb, modified_notebook_path)
    # need to update example, `.mad` doesn't exist
    # _execute_notebook(modified_notebook_path)


# this notebook works "as is" but for testing purposes we can use smaller dataset
def test_exercise_4():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_4_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_4.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    s3_path_cell = f's3_path = "{test_dataset_path}"\n' + download_taxi_dataset
    _replace_str(
        nb,
        's3_path = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv"',
        s3_path_cell,
    )

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/Dockerfile
================================================
FROM continuumio/miniconda3

RUN conda install -c conda-forge psutil setproctitle
RUN pip install -r requirements-dev.txt


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/cluster/README.md
================================================
![LOGO](../../../img/MODIN_ver2_hrz.png)

<center>
<h1>Scale your pandas workflows on a Ray cluster</h2>
</center>

**NOTE**: Before starting the exercise, please read the full instructions in the 
[Modin documenation](https://modin.readthedocs.io/en/latest/getting_started/using_modin/using_modin_cluster.html).

The basic steps to run the script on a remote Ray cluster are:

Step 1. Install the necessary dependencies

```bash
pip install boto3
```

Step 2. Setup your AWS credentials.

```bash
aws configure
```

Step 3. Modify configuration file and start up the Ray cluster.

```bash
ray up modin-cluster.yaml
```

Step 4. Submit your script to the remote cluster.

```bash
ray submit modin-cluster.yaml exercise_5.py
```

Step 5. Shut down the Ray remote cluster.

```bash
ray down 


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_5.py
================================================
import time

import ray

import modin.pandas as pd

ray.init(address="auto")
cpu_count = ray.cluster_resources()["CPU"]
assert cpu_count == 576, f"Expected 576 CPUs, but found {cpu_count}"

file_path = "big_yellow.csv"

t0 = time.perf_counter()

df = pd.read_csv(file_path, quoting=3)
df_count = df.count()
df_groupby_count = df.groupby("passenger_count").count()
df_map = df.map(str)

t1 = time.perf_counter()
print(f"Full script time is {(t1 - t0):.3f}")  # noqa: T201


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/cluster/modin-cluster.yaml
================================================
# An unique identifier for the head node and workers of this cluster.
cluster_name: modin_init

# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 5

# The autoscaler will scale up the cluster faster with higher upscaling speed.
# E.g., if the task requires adding more nodes then autoscaler will gradually
# scale up the cluster in chunks of upscaling_speed*currently_running_nodes.
# This number should be > 0.
upscaling_speed: 1.0

# This executes all commands on all nodes in the docker container,
# and opens all the necessary ports to support the Ray cluster.
# Empty string means disabled.
docker:
    # image: "rayproject/ray-ml:latest-gpu" # You can change this to latest-cpu if you don't need GPU support and want a faster startup
    image: rayproject/ray:latest-cpu   # use this one if you don't need ML dependencies, it's faster to pull
    container_name: "ray_container"
    # If true, pulls latest version of image. Otherwise, `docker run` will only pull the image
    # if no cached version is present.
    pull_before_run: True
    run_options:   # Extra options to pass into "docker run"
        - --ulimit nofile=65536:65536

    # Example of running a GPU head with CPU workers
    # head_image: "rayproject/ray-ml:latest-gpu"
    # Allow Ray to automatically detect GPUs

    # worker_image: "rayproject/ray-ml:latest-cpu"
    # worker_run_options: []

# If a node is idle for this many minutes, it will be removed.
idle_timeout_minutes: 5

# Cloud-provider specific configuration.
provider:
    type: aws
    region: us-west-2
    # Availability zone(s), comma-separated, that nodes may be launched in.
    # Nodes will be launched in the first listed availability zone and will
    # be tried in the subsequent availability zones if launching fails.
    availability_zone: us-west-2a,us-west-2b
    # Whether to allow node reuse. If set to False, nodes will be terminated
    # instead of stopped.
    cache_stopped_nodes: False # If not present, the default is True.

# How Ray will authenticate with newly launched nodes.
auth:
    ssh_user: ubuntu
# By default Ray creates a new private keypair, but you can also use your own.
# If you do so, make sure to also set "KeyName" in the head and worker node
# configurations below.
#    ssh_private_key: /path/to/your/key.pem

# Tell the autoscaler the allowed node types and the resources they provide.
# The key is the name of the node type, which is just for debugging purposes.
# The node config specifies the launch config and physical instance type.
available_node_types:
    ray.head.default:
        # The node type's CPU and GPU resources are auto-detected based on AWS instance type.
        # If desired, you can override the autodetected CPU and GPU resources advertised to the autoscaler.
        # You can also set custom resources.
        # For example, to mark a node type as having 1 CPU, 1 GPU, and 5 units of a resource called "custom", set
        # resources: {"CPU": 1, "GPU": 1, "custom": 5}
        resources: {}
        # Provider-specific config for this node type, e.g. instance type. By default
        # Ray will auto-configure unspecified fields such as SubnetId and KeyName.
        # For more documentation on available fields, see:
        # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
        node_config:
            InstanceType: m5.24xlarge
            # Default AMI for us-west-2.
            # Check https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/_private/aws/config.py
            # for default images for other zones.
            ImageId: ami-0387d929287ab193e
            # You can provision additional disk space with a conf as follows
            BlockDeviceMappings:
                - DeviceName: /dev/sda1
                  Ebs:
                      VolumeSize: 500
                      VolumeType: gp3
            # Additional options in the boto docs.
    ray.worker.default:
        # The minimum number of worker nodes of this type to launch.
        # This number should be >= 0.
        min_workers: 5
        # The maximum number of worker nodes of this type to launch.
        # This takes precedence over min_workers.
        max_workers: 5
        # The node type's CPU and GPU resources are auto-detected based on AWS instance type.
        # If desired, you can override the autodetected CPU and GPU resources advertised to the autoscaler.
        # You can also set custom resources.
        # For example, to mark a node type as having 1 CPU, 1 GPU, and 5 units of a resource called "custom", set
        # resources: {"CPU": 1, "GPU": 1, "custom": 5}
        resources: {}
        # Provider-specific config for this node type, e.g. instance type. By default
        # Ray will auto-configure unspecified fields such as SubnetId and KeyName.
        # For more documentation on available fields, see:
        # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
        node_config:
            InstanceType: m5.24xlarge
            # Default AMI for us-west-2.
            # Check https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/_private/aws/config.py
            # for default images for other zones.
            ImageId: ami-0387d929287ab193e
            # You can provision additional disk space with a conf as follows
            BlockDeviceMappings:
                - DeviceName: /dev/sda1
                  Ebs:
                      VolumeSize: 500
                      VolumeType: gp3
            # Run workers on spot by default. Comment this out to use on-demand.
            # NOTE: If relying on spot instances, it is best to specify multiple different instance
            # types to avoid interruption when one instance type is experiencing heightened demand.
            # Demand information can be found at https://aws.amazon.com/ec2/spot/instance-advisor/
            # InstanceMarketOptions:
                # MarketType: spot
                # Additional options can be found in the boto docs, e.g.
                #   SpotOptions:
                #       MaxPrice: MAX_HOURLY_PRICE
            # Additional options in the boto docs.

# Specify the node type of the head node (as configured above).
head_node_type: ray.head.default

# Files or directories to copy to the head and worker nodes. The format is a
# dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
file_mounts: {
#    "/path1/on/remote/machine": "/path1/on/local/machine",
#    "/path2/on/remote/machine": "/path2/on/local/machine",
}

# Files or directories to copy from the head node to the worker nodes. The format is a
# list of paths. The same path on the head node will be copied to the worker node.
# This behavior is a subset of the file_mounts behavior. In the vast majority of cases
# you should just use file_mounts. Only use this if you know what you're doing!
cluster_synced_files: []

# Whether changes to directories in file_mounts or cluster_synced_files in the head node
# should sync to the worker node continuously
file_mounts_sync_continuously: False

# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude:
    - "**/.git"
    - "**/.git/**"

# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter:
    - ".gitignore"

# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.
initialization_commands: []

# List of shell commands to run to set up nodes.
setup_commands:
    # Note: if you're developing Ray, you probably want to create a Docker image that
    # has your Ray repo pre-cloned. Then, you can replace the pip installs
    # below with a git checkout <your_sha> (and possibly a recompile).
    # To run the nightly version of ray (as opposed to the latest), either use a rayproject docker image
    # that has the "nightly" (e.g. "rayproject/ray-ml:nightly-gpu") or uncomment the following line:
    # - pip install -U "ray[default] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl"
    - conda create -n "modin" -c conda-forge modin "ray-default">=2.10.0,<3 -y
    - conda activate modin && pip install -U fsspec>=2022.11.0 boto3
    - echo "conda activate modin" >> ~/.bashrc
    - wget https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv
    - printf "VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,airport_fee\n" > big_yellow.csv
    - tail -n +2 yellow_tripdata_2015-01.csv{,}{,}{,}{,}{,}{,} >> big_yellow.csv
    - echo 'export MODIN_RAY_CLUSTER=True' >> ~/.bashrc
    
# Custom commands that will be run on the head node after common setup.
head_setup_commands: 
    - echo 'export MODIN_REDIS_ADDRESS="localhost:6379"' >> ~/.bashrc

# Custom commands that will be run on worker nodes after common setup.
worker_setup_commands: []

# Command to start ray on the head node. You don't need to change this.
head_start_ray_commands:
    - ray stop
    - echo 'export MEMORY_STORE_SIZE=$(awk "/MemFree/ { printf \"%d \\n\", \$2*1024}" /proc/meminfo)' >> ~/.bashrc
    - echo 'export TMPDIR="$(dirname $(mktemp tmp.XXXXXXXXXX -ut))"' >> ~/.bashrc
    - ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --object-store-memory=$MEMORY_STORE_SIZE --plasma-directory=$TMPDIR --dashboard-host=0.0.0.0

# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands:
    - ray stop
    - echo 'export MEMORY_STORE_SIZE=$(awk "/MemFree/ { printf \"%d \\n\", \$2*1024}" /proc/meminfo)' >> ~/.bashrc
    - echo 'export TMPDIR="$(dirname $(mktemp tmp.XXXXXXXXXX -ut))"' >> ~/.bashrc
    - ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076  --object-store-memory=$MEMORY_STORE_SIZE --plasma-directory=$TMPDIR


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_1.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 1: How to use Modin\n",
    "\n",
    "**GOAL**: Learn how to import Modin to accelerate and scale pandas workflows."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modin is a drop-in replacement for pandas that distributes the computation \n",
    "across all of the cores in your machine or in a cluster.\n",
    "In practical terms, this means that you can continue using the same pandas scripts\n",
    "as before and expect the behavior and results to be the same. The only thing that needs\n",
    "to change is the import statement. Normally, you would change:\n",
    "\n",
    "```python\n",
    "import pandas as pd\n",
    "```\n",
    "\n",
    "to:\n",
    "\n",
    "```python\n",
    "import modin.pandas as pd\n",
    "```\n",
    "\n",
    "Changing this line of code will allow you to use all of the cores in your machine to do computation on your data. One of the major performance bottlenecks of pandas is that it only uses a single core for any given computation. Modin exposes an API that is identical to pandas, allowing you to continue interacting with your data as you would with pandas. There are no additional commands required to use Modin locally. Partitioning, scheduling, data transfer, and other related concerns are all handled by Modin under the hood."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<p style=\"text-align:left;\">\n",
    "        <h1>pandas on a multicore laptop\n",
    "    <span style=\"float:right;\">\n",
    "        Modin on a multicore laptop\n",
    "    </span>\n",
    "\n",
    "<div>\n",
    "<img align=\"left\" src=\"../../../img/pandas_multicore.png\"><img src=\"../../../img/modin_multicore.png\">\n",
    "</div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concept for exercise: Dataframe constructor\n",
    "\n",
    "Often when playing around in pandas, it is useful to create a DataFrame with the constructor. That is where we will start.\n",
    "\n",
    "```python\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\n",
    "df = pd.DataFrame(frame_data)\n",
    "```\n",
    "\n",
    "When creating a dataframe from a non-distributed object, it will take extra time to partition the data. When this is happening, you will see this message:\n",
    "\n",
    "```\n",
    "UserWarning: Distributing <class 'numpy.ndarray'> object. This may take some time.\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Note: Do not change this code!\n",
    "import numpy as np\n",
    "import pandas\n",
    "import sys\n",
    "import modin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Implement your answer here. You are also free to play with the size\n",
    "# and shape of the DataFrame, but beware of exceeding your memory!\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\n",
    "df = pd.DataFrame(frame_data)\n",
    "\n",
    "# ***** Do not change the code below! It verifies that \n",
    "# ***** the exercise has been done correctly. *****\n",
    "\n",
    "try:\n",
    "    assert df is not None\n",
    "    assert frame_data is not None\n",
    "    assert isinstance(frame_data, np.ndarray)\n",
    "except:\n",
    "    raise AssertionError(\"Don't change too much of the original code!\")\n",
    "assert \"modin.pandas\" in sys.modules, \"Not quite correct. Remember the single line of code change (See above)\"\n",
    "\n",
    "import modin.pandas\n",
    "assert pd == modin.pandas, \"Remember the single line of code change (See above)\"\n",
    "assert hasattr(df, \"_query_compiler\"), \"Make sure that `df` is a modin.pandas DataFrame.\"\n",
    "\n",
    "print(\"Success! You only need to change one line of code!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that we have created a toy example for playing around with the DataFrame, let's print it out in different ways."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concept for Exercise: Data Interaction and Printing\n",
    "\n",
    "When interacting with data, it is very imporant to look at different parts of the data (e.g. `df.head()`). Here we will show that you can print the modin.pandas DataFrame in the same ways you would pandas."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print the first 10 lines.\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print the DataFrame.\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Free cell for custom interaction (Play around here!)\n",
    "df.add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Please move on to [Exercise 2](./exercise_2.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_2.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 2: Speed improvements\n",
    "\n",
    "**GOAL**: Learn about common functionality that Modin speeds up by using all of your machine's cores."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for Exercise: `read_csv` speedups\n",
    "\n",
    "The most commonly used data ingestion method used in pandas is CSV files (link to pandas survey). This concept is designed to give an idea of the kinds of speedups possible, even on a non-distributed filesystem. Modin also supports other file formats for parallel and distributed reads, which can be found in the documentation.\n",
    "\n",
    "![](../../../img/read_csv_perf.png)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will import both Modin and pandas so that the speedups are evident.\n",
    "\n",
    "**Note: Rerunning the `read_csv` cells many times may result in degraded performance, depending on the memory of the machine**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import time\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "def printmd(string):\n",
    "    display(Markdown(string))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dataset: 2015 NYC taxi trip data\n",
    "\n",
    "We will be using a version of this data already in S3, originally posted in this blog post: https://matthewrocklin.com/blog/work/2017/01/12/dask-dataframes\n",
    "\n",
    "**Size: ~1.8GB**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Optional:** Note that the dataset takes a while to download. To speed things up a bit, if you prefer to download this file once locally, you can run the following code in the notebook:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# [Optional] Download data locally. This may take a few minutes to download.\n",
    "# import urllib.request\n",
    "# url_path = \"https://dask-data.s3.amazonaws.com/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"\n",
    "# urllib.request.urlretrieve(url_path, \"taxi.csv\")\n",
    "# path = \"taxi.csv\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## `pandas.read_csv`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_df = pandas.read_csv(path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to read with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Expect pandas to take >3 minutes on EC2, longer locally\n",
    "\n",
    "This is a good time to chat with your neighbor\n",
    "Dicussion topics\n",
    "- Do you work with a large amount of data daily?\n",
    "- How big is your data?\n",
    "- What’s the common use case of your data?\n",
    "- Do you use any big data analytics tools?\n",
    "- Do you use any interactive analytics tool?\n",
    "- What’s are some drawbacks of your current interative analytic tools today?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## `modin.pandas.read_csv`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_df = pd.read_csv(path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to read with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `read_csv`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Reduces\n",
    "\n",
    "In pandas, a reduce would be something along the lines of a `sum` or `count`. It computes some summary statistics about the rows or columns. We will be using `count`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_count = pandas_df.count()\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "\n",
    "print(\"Time to count with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_count = modin_df.count()\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to count with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `count`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_count"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Map operations\n",
    "\n",
    "In pandas, map operations are operations that do a single pass over the data and do not change its shape. Operations like `isnull` and `applymap` are included in this. We will be using `isnull`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_isnull = pandas_df.isnull()\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "\n",
    "print(\"Time to isnull with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_isnull = modin_df.isnull()\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to isnull with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `isnull`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_isnull"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_isnull"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Apply over a single column\n",
    "\n",
    "Sometimes we want to compute some summary statistics on a single column from our dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "rounded_trip_distance_pandas = pandas_df[\"trip_distance\"].apply(round)\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to groupby with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "rounded_trip_distance_modin = modin_df[\"trip_distance\"].apply(round)\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to add a column with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `apply` on one column!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rounded_trip_distance_pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rounded_trip_distance_modin"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Add a column\n",
    "\n",
    "It is common to need to add a new column to an existing dataframe, here we show that this is significantly faster in Modin due to metadata management and an efficient zero copy implementation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "pandas_df[\"rounded_trip_distance\"] = rounded_trip_distance_pandas\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to groupby with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_df[\"rounded_trip_distance\"] = rounded_trip_distance_modin\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to add a column with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas add a column!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Please move on to [Exercise 3](./exercise_3.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_3.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 3: Not Implemented\n",
    "\n",
    "**GOAL**: Learn what happens when a function is not yet supported in Modin as well as how to extend Modin's functionality using the DataFrame Algebra."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "When functionality has not yet been implemented, we default to pandas\n",
    "\n",
    "![](../../../img/convert_to_pandas.png)\n",
    "\n",
    "We convert a Modin dataframe to pandas to do the operation, then convert it back once it is finished. These operations will have a high overhead due to the communication involved and will take longer than pandas.\n",
    "\n",
    "When this is happening, a warning will be given to the user to inform them that this operation will take longer than usual. For example, `DataFrame.mask` is not yet implemented. In this case, when a user tries to use it, they will see this warning:\n",
    "\n",
    "```\n",
    "UserWarning: `DataFrame.mask` defaulting to pandas implementation.\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Default to pandas\n",
    "\n",
    "In this section of the exercise we will see first-hand how the runtime is affected by operations that are not implemented."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import numpy as np\n",
    "import time\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**18, 2**8))\n",
    "df = pd.DataFrame(frame_data).add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df = pandas.DataFrame(frame_data).add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_start = time.time()\n",
    "\n",
    "print(df.mask(df < 50))\n",
    "\n",
    "modin_end = time.time()\n",
    "print(\"Modin mask took {} seconds.\".format(round(modin_end - modin_start, 4)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_start = time.time()\n",
    "\n",
    "print(pandas_df.mask(pandas_df < 50))\n",
    "\n",
    "pandas_end = time.time()\n",
    "print(\"pandas mask took {} seconds.\".format(round(pandas_end - pandas_start, 4)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Register custom functions\n",
    "\n",
    "Modin's user-facing API is pandas, but it is possible that we do not yet support your favorite or most-needed functionalities. Your user-defined function may also be able to be executed more efficiently if you pre-define the type of function it is (e.g. map, reduce, etc.) using the DataFrame Algebra. To solve either case, it is possible to register a custom function to be applied to your data."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Registering a custom function for all query compilers\n",
    "\n",
    "To register a custom function for a query compiler, we first need to import it:\n",
    "\n",
    "```python\n",
    "from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n",
    "```\n",
    "\n",
    "The `PandasQueryCompiler` is responsible for defining and compiling the queries that can be operated on by Modin, and is specific to the pandas storage format. Any queries defined here must also both be compatible with and result in a `pandas.DataFrame`. Many functionalities are very simply implemented, as you can see in the current code: [Link](https://github.com/modin-project/modin/blob/7a8158873e77cb5f1a5a3b89be4ddac89f576269/modin/core/storage_formats/pandas/query_compiler.py#L216).\n",
    "\n",
    "If we want to register a new function, we need to understand what kind of function it is. In our example, we will try to implement a `kurtosis` on the unary negation of the values in the dataframe, which is a map (unargy negation of each cell) followed by a reduce. So we next want to import the function type so we can use it in our definition:\n",
    "\n",
    "```python\n",
    "from modin.core.dataframe.algebra import TreeReduce\n",
    "```\n",
    "\n",
    "Then we can just use the `TreeReduce.register` `classmethod` and assign it to the `PandasQueryCompiler`:\n",
    "\n",
    "```python\n",
    "PandasQueryCompiler.neg_kurtosis = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value, pandas.DataFrame.kurtosis)\n",
    "```\n",
    "\n",
    "We include `**kwargs` to the `lambda` function since the query compiler will pass all keyword arguments to both the map and reduce functions.\n",
    "\n",
    "Finally, we want a handle to it from the `DataFrame`, so we need to create a way to do that:\n",
    "\n",
    "```python\n",
    "def neg_kurtosis_func(self, **kwargs):\n",
    "    # The constructor allows you to pass in a query compiler as a keyword argument\n",
    "    return self.__constructor__(query_compiler=self._query_compiler.neg_kurtosis(**kwargs))\n",
    "\n",
    "pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\n",
    "```\n",
    "\n",
    "And then you can use it like you usually would:\n",
    "\n",
    "```python\n",
    "df.neg_kurtosis_custom()\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n",
    "from modin.core.dataframe.algebra import TreeReduce"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "PandasQueryCompiler.neg_kurtosis_custom = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value,\n",
    "                                                             pandas.DataFrame.kurtosis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pandas._libs import lib\n",
    "# The function signature came from the pandas documentation:\n",
    "# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurtosis.html\n",
    "def neg_kurtosis_func(self, axis=lib.no_default, skipna=True, level=None, numeric_only=None, **kwargs):\n",
    "    # We need to specify the axis for the query compiler\n",
    "    if axis in [None, lib.no_default]:\n",
    "        axis = 0\n",
    "    # The constructor allows you to pass in a query compiler as a keyword argument\n",
    "    # Reduce dimension is used for reduces\n",
    "    # We also pass all keyword arguments here to ensure correctness\n",
    "    return self._reduce_dimension(\n",
    "        self._query_compiler.neg_kurtosis_custom(\n",
    "            axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, **kwargs\n",
    "        )\n",
    "    )\n",
    "\n",
    "pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Speed improvements\n",
    "If we were to try and replicate this functionality using the pandas API, we would need to call `df.applymap` with our unary negation function, and subsequently `df.kurtosis` on the result of the first call. Let's see how this compares with our new, custom function!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "print(pandas_df.applymap(lambda cell_value: ~cell_value).kurtosis())\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"pandas unary negation kurtosis took {} seconds.\".format(pandas_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "print(df.applymap(lambda x: ~x).kurtosis())\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Modin unary negation kurtosis took {} seconds.\".format(modin_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "custom_start = time.time()\n",
    "\n",
    "print(df.neg_kurtosis_custom())\n",
    "\n",
    "custom_end = time.time()\n",
    "modin_custom_duration = custom_end - custom_start\n",
    "print(\"Modin neg_kurtosis_custom took {} seconds.\".format(modin_custom_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import Markdown, display\n",
    "\n",
    "display(Markdown(\"### As expected, Modin is {}x faster than pandas when chaining the functions; however we see that our custom function is even faster than that - beating pandas by {}x, and Modin (when chaining the functions) by {}x!\".format(round(pandas_duration / modin_duration, 2), round(pandas_duration / modin_custom_duration, 2), round(modin_duration / modin_custom_duration, 2))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Congratulations! You have just implemented new DataFrame functionality!\n",
    "\n",
    "## Consider opening a pull request: https://github.com/modin-project/modin/pulls\n",
    "\n",
    "For a complete list of what is implemented, see the [Supported APIs](https://modin.readthedocs.io/en/latest/supported_apis/index.html) section."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test your knowledge: Add a custom function for another tree reduce: finding `DataFrame.mad` after squaring all of the values\n",
    "\n",
    "See the pandas documentation for the correct signature: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mad.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_mad_custom_start = time.time()\n",
    "\n",
    "# Implement your function here! Put the result of your custom squared `mad` in the variable `modin_mad_custom`\n",
    "# Hint: Look at the kurtosis walkthrough above\n",
    "\n",
    "modin_mad_custom = ...\n",
    "print(modin_mad_custom)\n",
    "\n",
    "modin_mad_custom_end = time.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluation code, do not change!\n",
    "modin_mad_start = time.time()\n",
    "modin_mad = df.applymap(lambda x: x**2).mad()\n",
    "print(modin_mad)\n",
    "modin_mad_end = time.time()\n",
    "\n",
    "assert modin_mad_end - modin_mad_start > modin_mad_custom_end - modin_mad_custom_start, \\\n",
    "    \"Your implementation was too slow, or you used the chaining functions approach. Try again\"\n",
    "assert modin_mad._to_pandas().equals(modin_mad_custom._to_pandas()), \"Your result did not match the result of chaining the functions, try again\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now that you are able to create custom functions, you know enough to contribute to Modin!\n",
    "\n",
    "**Please move on to [Exercise 4](./exercise_4.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_4.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "99f41d2d",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fdda1c9c",
   "metadata": {},
   "source": [
    "# Exercise 4: Experimental Features\n",
    "\n",
    "**GOAL**: Explore some of the experimental features being added to Modin."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b487c51",
   "metadata": {},
   "source": [
    "### Concept for exercise: Progress Bar\n",
    "\n",
    "\n",
    "Sometimes when running long functions on DataFrames, it can be hard to tell how much progress has been made, as well as how much longer the function will run. A progress bar allows users to see the estimated progress and completion time of each line they run, in environments such as a shell or Jupyter notebook.\n",
    "\n",
    "To enable Modin's Progress Bar, add the following lines of code after importing `modin.pandas`:\n",
    "```python\n",
    "from modin.config import ProgressBar\n",
    "ProgressBar.enable()\n",
    "```\n",
    "\n",
    "In this exercise, we'll see how the progress bar can improve our experience running dataframe queries!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f95d4874",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import numpy as np\n",
    "from modin.config import ProgressBar\n",
    "ProgressBar.enable()\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**18, 2**8))\n",
    "df = pd.DataFrame(frame_data).add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6905bc6b",
   "metadata": {},
   "source": [
    "On longer functions, its nice to be able to see an estimation of how much longer things will take!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "236ec8e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.applymap(lambda x: ~x)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7bf87a5",
   "metadata": {},
   "source": [
    "### Concept for exercise: Spreadsheet\n",
    "\n",
    "For those who have worked with Excel, the Spreadsheet API will definitely feel familiar! The Spreadsheet API is a Jupyter notebook widget that allows us to interact with Modin DataFrames in a spreadsheet-like fashion while taking advantage of the underlying capabilities of Modin. The widget makes it quick and easy to explore, sort, filter, and edit data as well as export the changes as reproducible code.\n",
    "\n",
    "Let's look back at a subset of the 2015 NYC Taxi Data from Exercise 2, and see how the Spreadsheet API can make it easy to play with the data!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d5c4a3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "!jupyter nbextension enable --py --sys-prefix modin_spreadsheet\n",
    "ProgressBar.disable()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc8d5903",
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.experimental.spreadsheet as mss\n",
    "\n",
    "s3_path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"\n",
    "modin_df = pd.read_csv(s3_path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3, nrows=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "145e7bbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "spreadsheet = mss.from_dataframe(modin_df)\n",
    "spreadsheet"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c18b7f2",
   "metadata": {},
   "source": [
    "### Thank you for participating!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/requirements.txt
================================================
fsspec>=2022.11.0
jupyterlab
ipywidgets
tqdm>=4.60.0
modin[ray]
modin[spreadsheet]


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import os
import sys

import nbformat

MODIN_DIR = os.path.abspath(
    os.path.join(os.path.dirname(__file__), *[".." for _ in range(6)])
)
sys.path.insert(0, MODIN_DIR)
from examples.tutorial.jupyter.execution.test.utils import (  # noqa: E402
    _execute_notebook,
    _find_code_cell_idx,
    _replace_str,
    download_taxi_dataset,
    test_dataset_path,
)

local_notebooks_dir = "examples/tutorial/jupyter/execution/pandas_on_ray/local"


# in this notebook user should replace 'import pandas as pd' with
# 'import modin.pandas as pd' to make notebook work
def test_exercise_1():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_1_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_1.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    _replace_str(nb, "import pandas as pd", "import modin.pandas as pd")

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


# this notebook works "as is" but for testing purposes we can use smaller dataset
def test_exercise_2():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_2_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_2.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    _replace_str(
        nb,
        'path = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv"',
        '# path = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv"',
    )

    new_optional_cell = f'path = "{test_dataset_path}"\n' + download_taxi_dataset

    optional_cell_idx = _find_code_cell_idx(nb, "[Optional] Download data locally.")
    nb["cells"][optional_cell_idx]["source"] = new_optional_cell

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


# in this notebook user should add custom mad implementation
# to make notebook work
def test_exercise_3():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_3_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_3.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    user_mad_implementation = """PandasQueryCompiler.sq_mad_custom = TreeReduce.register(lambda cell_value, **kwargs: cell_value ** 2,
                                                             pandas.DataFrame.mad)

def sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):
    if axis is None:
        axis = 0

    return self._reduce_dimension(
        self._query_compiler.sq_mad_custom(
            axis=axis, skipna=skipna, level=level, **kwargs
        )
    )

pd.DataFrame.sq_mad_custom = sq_mad_func

modin_mad_custom = df.sq_mad_custom()
    """

    _replace_str(nb, "modin_mad_custom = ...", user_mad_implementation)

    nbformat.write(nb, modified_notebook_path)
    # need to update example, `.mad` doesn't exist
    # _execute_notebook(modified_notebook_path)


# this notebook works "as is" but for testing purposes we can use smaller dataset
def test_exercise_4():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_4_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_4.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    s3_path_cell = f's3_path = "{test_dataset_path}"\n' + download_taxi_dataset
    _replace_str(
        nb,
        's3_path = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv"',
        s3_path_cell,
    )

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/Dockerfile
================================================
FROM continuumio/miniconda3

RUN conda env create -f jupyter_unidist_env.yml
RUN conda install -c conda-forge psutil setproctitle


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/README.md
================================================
# Jupyter notebook examples to run with PandasOnUnidist

Currently, Modin supports `PandasOnUnidist` execution only with MPI backend of [unidist](https://github.com/modin-project/unidist).
There are some specifics on how to run a jupyter notebook with MPI, namely, you should use `mpiexec` command.

```bash
mpiexec -n 1 jupyter notebook
```

**Important**

MPI is not reliable yet to work in interactive environment such as jupyter notebooks. Thus, some things may not work.
For example, if you are experiencing the error `The kernel appears to have died. It will restart automatically.`,
you may want to modify `kernel.json` file or create a new one in order to fix the problem.

For simplicity, you can just run `setup_kernel.py` script located in this directory. This will install a new MPI enabled kernel,
which you can then select using the dropdown menu in your browser. Otherwise, you can follow the steps below:

1. First, what you should do is locate `kernel.json` file with `jupyter kernelspec list` command. It should generally be like this.

```bash
jupyter kernelspec list

Available kernels:
  python3    $PREFIX/share/jupyter/kernels/python3
```

`kernel.json` file should be located in `python3` folder.

2. Second, you should make a copy of the `python3` folder, say to `python3mpi` folder.

```bash
cp -r $PREFIX/share/jupyter/kernels/python3 $PREFIX/share/jupyter/kernels/python3mpi
```

3. Third, modify `kernel.json` file in `python3mpi` folder to add `mpiexec -n 1` command
(like "mpiexec", "-n", "1") to the beginning of the launched command (`argv`).

4. Fourth, change `display_name` in `kernel.json` file to something like `Python 3 (ipykernel) with MPI`.
That way you can specifically select the Python kernel with MPI-enabled using the dropdown menu in your browser.

## Run Jupyter Notebooks with PandasOnUnidist

After the `setup_kernel.py` script is run or the steps above are done, you can run a jupyter notebook with `PandasOnUnidist` in a normal way.

```bash
jupyter notebook
```


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/jupyter_unidist_env.yml
================================================
name: jupyter_modin_on_unidist
channels:
  - conda-forge
dependencies:
  - pip
  - fsspec>=2022.11.0
  - jupyterlab
  - ipywidgets
  - modin-mpi
  - pip:
      - modin[spreadsheet]


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/local/exercise_1.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 1: How to use Modin\n",
    "\n",
    "**GOAL**: Learn how to import Modin to accelerate and scale pandas workflows."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modin is a drop-in replacement for pandas that distributes the computation \n",
    "across all of the cores in your machine or in a cluster.\n",
    "In practical terms, this means that you can continue using the same pandas scripts\n",
    "as before and expect the behavior and results to be the same. The only thing that needs\n",
    "to change is the import statement. Normally, you would change:\n",
    "\n",
    "```python\n",
    "import pandas as pd\n",
    "```\n",
    "\n",
    "to:\n",
    "\n",
    "```python\n",
    "import modin.pandas as pd\n",
    "```\n",
    "\n",
    "Changing this line of code will allow you to use all of the cores in your machine to do computation on your data. One of the major performance bottlenecks of pandas is that it only uses a single core for any given computation. Modin exposes an API that is identical to pandas, allowing you to continue interacting with your data as you would with pandas. There are no additional commands required to use Modin locally. Partitioning, scheduling, data transfer, and other related concerns are all handled by Modin under the hood."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<p style=\"text-align:left;\">\n",
    "        <h1>pandas on a multicore laptop\n",
    "    <span style=\"float:right;\">\n",
    "        Modin on a multicore laptop\n",
    "    </span>\n",
    "\n",
    "<div>\n",
    "<img align=\"left\" src=\"../../../img/pandas_multicore.png\"><img src=\"../../../img/modin_multicore.png\">\n",
    "</div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concept for exercise: setting Modin engine\n",
    "\n",
    "Modin uses Ray as an execution engine by default so no additional action is required to start to use it. Alternatively, if you need to use another engine, it should be specified either by setting the Modin config or by setting Modin environment variable before the first operation with Modin as it is shown below. Also, note that the full list of Modin configs and corresponding environment variables can be found in the [Modin Configuration Settings](https://modin.readthedocs.io/en/stable/flow/modin/config.html#modin-configs-list) section of the Modin documentation.\n",
    "\n",
    "One of the execution engines that Modin uses is Unidist. Currently, Modin only supports MPI through unidist, so it should be specified either by setting the Unidist config or by setting Unidist environment variable. The full list of Unidist configs and corresponding environment variables can be found in the [Unidist Configuration Settings](https://unidist.readthedocs.io/en/latest/flow/unidist/config.html#unidist-configuration-settings-list) section of the Unidist documentation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "# Modin engine and Unidist backend can be specified either by config\n",
    "import modin.config as modin_cfg\n",
    "import unidist.config as unidist_cfg\n",
    "modin_cfg.Engine.put(\"unidist\")\n",
    "unidist_cfg.Backend.put(\"mpi\")\n",
    "\n",
    "# or by setting the environment variable\n",
    "# import os\n",
    "# os.environ[\"MODIN_ENGINE\"] = \"unidist\"\n",
    "# os.environ[\"UNIDIST_BACKEND\"] = \"mpi\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concept for exercise: Dataframe constructor\n",
    "\n",
    "Often when playing around in pandas, it is useful to create a DataFrame with the constructor. That is where we will start.\n",
    "\n",
    "```python\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\n",
    "df = pd.DataFrame(frame_data)\n",
    "```\n",
    "\n",
    "When creating a dataframe from a non-distributed object, it will take extra time to partition the data. When this is happening, you will see this message:\n",
    "\n",
    "```\n",
    "UserWarning: Distributing <class 'numpy.ndarray'> object. This may take some time.\n",
    "```\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "# Note: Do not change this code!\n",
    "import numpy as np\n",
    "import pandas\n",
    "import sys\n",
    "import modin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "pandas.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "modin.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "# Implement your answer here. You are also free to play with the size\n",
    "# and shape of the DataFrame, but beware of exceeding your memory!\n",
    "\n",
    "# import pandas as pd\n",
    "import pandas as pd\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**5, 2**5))\n",
    "df = pd.DataFrame(frame_data)\n",
    "\n",
    "# ***** Do not change the code below! It verifies that \n",
    "# ***** the exercise has been done correctly. *****\n",
    "\n",
    "try:\n",
    "    assert df is not None\n",
    "    assert frame_data is not None\n",
    "    assert isinstance(frame_data, np.ndarray)\n",
    "except:\n",
    "    raise AssertionError(\"Don't change too much of the original code!\")\n",
    "assert \"modin.pandas\" in sys.modules, \"Not quite correct. Remember the single line of code change (See above)\"\n",
    "\n",
    "import modin.pandas\n",
    "assert pd == modin.pandas, \"Remember the single line of code change (See above)\"\n",
    "assert hasattr(df, \"_query_compiler\"), \"Make sure that `df` is a modin.pandas DataFrame.\"\n",
    "\n",
    "print(\"Success! You only need to change one line of code!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that we have created a toy example for playing around with the DataFrame, let's print it out in different ways."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concept for Exercise: Data Interaction and Printing\n",
    "\n",
    "When interacting with data, it is very imporant to look at different parts of the data (e.g. `df.head()`). Here we will show that you can print the modin.pandas DataFrame in the same ways you would pandas."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "# Print the first 10 lines.\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "# Print the DataFrame.\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "# Free cell for custom interaction (Play around here!)\n",
    "df.add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "python"
    }
   },
   "outputs": [],
   "source": [
    "df.count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Please move on to [Exercise 2](./exercise_2.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel) with MPI",
   "language": "python",
   "name": "python3mpi"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/local/exercise_2.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 2: Speed improvements\n",
    "\n",
    "**GOAL**: Learn about common functionality that Modin speeds up by using all of your machine's cores."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for Exercise: `read_csv` speedups\n",
    "\n",
    "The most commonly used data ingestion method used in pandas is CSV files (link to pandas survey). This concept is designed to give an idea of the kinds of speedups possible, even on a non-distributed filesystem. Modin also supports other file formats for parallel and distributed reads, which can be found in the documentation. We will import both Modin and pandas so that the speedups are evident.\n",
    "\n",
    "**Note: Rerunning the `read_csv` cells many times may result in degraded performance, depending on the memory of the machine**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import time\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "def printmd(string):\n",
    "    display(Markdown(string))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dataset: 2015 NYC taxi trip data\n",
    "\n",
    "\n",
    "We will be using a version of this data already in S3, originally posted in this blog post: https://matthewrocklin.com/blog/work/2017/01/12/dask-dataframes\n",
    "\n",
    "**Size: ~1.8GB**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modin execution engine setting:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.config as modin_cfg\n",
    "import unidist.config as unidist_cfg\n",
    "modin_cfg.Engine.put(\"unidist\")\n",
    "unidist_cfg.Backend.put(\"mpi\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## `pandas.read_csv`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_df = pandas.read_csv(path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to read with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Expect pandas to take >3 minutes on EC2, longer locally\n",
    "\n",
    "This is a good time to chat with your neighbor\n",
    "Dicussion topics\n",
    "- Do you work with a large amount of data daily?\n",
    "- How big is your data?\n",
    "- What’s the common use case of your data?\n",
    "- Do you use any big data analytics tools?\n",
    "- Do you use any interactive analytics tool?\n",
    "- What’s are some drawbacks of your current interative analytic tools today?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## `modin.pandas.read_csv`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_df = pd.read_csv(path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to read with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `read_csv`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Reduces\n",
    "\n",
    "In pandas, a reduce would be something along the lines of a `sum` or `count`. It computes some summary statistics about the rows or columns. We will be using `count`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_count = pandas_df.count()\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "\n",
    "print(\"Time to count with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_count = modin_df.count()\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to count with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `count`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_count"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Map operations\n",
    "\n",
    "In pandas, map operations are operations that do a single pass over the data and do not change its shape. Operations like `isnull` and `applymap` are included in this. We will be using `isnull`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "pandas_isnull = pandas_df.isnull()\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "\n",
    "print(\"Time to isnull with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_isnull = modin_df.isnull()\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to isnull with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `isnull`!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_isnull"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_isnull"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Apply over a single column\n",
    "\n",
    "Sometimes we want to compute some summary statistics on a single column from our dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "rounded_trip_distance_pandas = pandas_df[\"trip_distance\"].apply(round)\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to groupby with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "rounded_trip_distance_modin = modin_df[\"trip_distance\"].apply(round)\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to add a column with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas at `apply` on one column!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rounded_trip_distance_pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rounded_trip_distance_modin"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Add a column\n",
    "\n",
    "It is common to need to add a new column to an existing dataframe, here we show that this is significantly faster in Modin due to metadata management and an efficient zero copy implementation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "pandas_df[\"rounded_trip_distance\"] = rounded_trip_distance_pandas\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"Time to groupby with pandas: {} seconds\".format(round(pandas_duration, 3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "modin_df[\"rounded_trip_distance\"] = rounded_trip_distance_modin\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Time to add a column with Modin: {} seconds\".format(round(modin_duration, 3)))\n",
    "\n",
    "printmd(\"### Modin is {}x faster than pandas add a column!\".format(round(pandas_duration / modin_duration, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Are they equal?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Please move on to [Exercise 3](./exercise_3.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
    "kernelspec": {
     "display_name": "Python 3 (ipykernel) with MPI",
     "language": "python",
     "name": "python3mpi"
    },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/local/exercise_3.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 3: Not Implemented\n",
    "\n",
    "**GOAL**: Learn what happens when a function is not yet supported in Modin as well as how to extend Modin's functionality using the DataFrame Algebra."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "When functionality has not yet been implemented, we default to pandas\n",
    "\n",
    "![](../../../img/convert_to_pandas.png)\n",
    "\n",
    "We convert a Modin dataframe to pandas to do the operation, then convert it back once it is finished. These operations will have a high overhead due to the communication involved and will take longer than pandas.\n",
    "\n",
    "When this is happening, a warning will be given to the user to inform them that this operation will take longer than usual. For example, `DataFrame.mask` is not yet implemented. In this case, when a user tries to use it, they will see this warning:\n",
    "\n",
    "```\n",
    "UserWarning: `DataFrame.mask` defaulting to pandas implementation.\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Default to pandas\n",
    "\n",
    "In this section of the exercise we will see first-hand how the runtime is affected by operations that are not implemented."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import pandas\n",
    "import numpy as np\n",
    "import time\n",
    "import modin.config as modin_cfg\n",
    "import unidist.config as unidist_cfg\n",
    "modin_cfg.Engine.put(\"unidist\")\n",
    "unidist_cfg.Backend.put(\"mpi\")\n",
    "\n",
    "frame_data = np.random.randint(0, 100, size=(2**18, 2**8))\n",
    "df = pd.DataFrame(frame_data).add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_df = pandas.DataFrame(frame_data).add_prefix(\"col\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_start = time.time()\n",
    "\n",
    "print(df.mask(df < 50))\n",
    "\n",
    "modin_end = time.time()\n",
    "print(\"Modin mask took {} seconds.\".format(round(modin_end - modin_start, 4)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas_start = time.time()\n",
    "\n",
    "print(pandas_df.mask(pandas_df < 50))\n",
    "\n",
    "pandas_end = time.time()\n",
    "print(\"pandas mask took {} seconds.\".format(round(pandas_end - pandas_start, 4)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concept for exercise: Register custom functions\n",
    "\n",
    "Modin's user-facing API is pandas, but it is possible that we do not yet support your favorite or most-needed functionalities. Your user-defined function may also be able to be executed more efficiently if you pre-define the type of function it is (e.g. map, reduce, etc.) using the DataFrame Algebra. To solve either case, it is possible to register a custom function to be applied to your data."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Registering a custom function for all query compilers\n",
    "\n",
    "To register a custom function for a query compiler, we first need to import it:\n",
    "\n",
    "```python\n",
    "from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n",
    "```\n",
    "\n",
    "The `PandasQueryCompiler` is responsible for defining and compiling the queries that can be operated on by Modin, and is specific to the pandas storage format. Any queries defined here must also both be compatible with and result in a `pandas.DataFrame`. Many functionalities are very simply implemented, as you can see in the current code: [Link](https://github.com/modin-project/modin/blob/7a8158873e77cb5f1a5a3b89be4ddac89f576269/modin/core/storage_formats/pandas/query_compiler.py#L216).\n",
    "\n",
    "If we want to register a new function, we need to understand what kind of function it is. In our example, we will try to implement a `kurtosis` on the unary negation of the values in the dataframe, which is a map (unargy negation of each cell) followed by a reduce. So we next want to import the function type so we can use it in our definition:\n",
    "\n",
    "```python\n",
    "from modin.core.dataframe.algebra import TreeReduce\n",
    "```\n",
    "\n",
    "Then we can just use the `TreeReduce.register` `classmethod` and assign it to the `PandasQueryCompiler`:\n",
    "\n",
    "```python\n",
    "PandasQueryCompiler.neg_kurtosis = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value, pandas.DataFrame.kurtosis)\n",
    "```\n",
    "\n",
    "We include `**kwargs` to the `lambda` function since the query compiler will pass all keyword arguments to both the map and reduce functions.\n",
    "\n",
    "Finally, we want a handle to it from the `DataFrame`, so we need to create a way to do that:\n",
    "\n",
    "```python\n",
    "def neg_kurtosis_func(self, **kwargs):\n",
    "    # The constructor allows you to pass in a query compiler as a keyword argument\n",
    "    return self.__constructor__(query_compiler=self._query_compiler.neg_kurtosis(**kwargs))\n",
    "\n",
    "pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\n",
    "```\n",
    "\n",
    "And then you can use it like you usually would:\n",
    "\n",
    "```python\n",
    "df.neg_kurtosis_custom()\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n",
    "from modin.core.dataframe.algebra import TreeReduce"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "PandasQueryCompiler.neg_kurtosis_custom = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value,\n",
    "                                                             pandas.DataFrame.kurtosis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pandas._libs import lib\n",
    "# The function signature came from the pandas documentation:\n",
    "# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurtosis.html\n",
    "def neg_kurtosis_func(self, axis=lib.no_default, skipna=True, level=None, numeric_only=None, **kwargs):\n",
    "    # We need to specify the axis for the query compiler\n",
    "    if axis in [None, lib.no_default]:\n",
    "        axis = 0\n",
    "    # The constructor allows you to pass in a query compiler as a keyword argument\n",
    "    # Reduce dimension is used for reduces\n",
    "    # We also pass all keyword arguments here to ensure correctness\n",
    "    return self._reduce_dimension(\n",
    "        self._query_compiler.neg_kurtosis_custom(\n",
    "            axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, **kwargs\n",
    "        )\n",
    "    )\n",
    "\n",
    "pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Speed improvements\n",
    "If we were to try and replicate this functionality using the pandas API, we would need to call `df.applymap` with our unary negation function, and subsequently `df.kurtosis` on the result of the first call. Let's see how this compares with our new, custom function!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "print(pandas_df.applymap(lambda cell_value: ~cell_value).kurtosis())\n",
    "\n",
    "end = time.time()\n",
    "pandas_duration = end - start\n",
    "print(\"pandas unary negation kurtosis took {} seconds.\".format(pandas_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "\n",
    "print(df.applymap(lambda x: ~x).kurtosis())\n",
    "\n",
    "end = time.time()\n",
    "modin_duration = end - start\n",
    "print(\"Modin unary negation kurtosis took {} seconds.\".format(modin_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "custom_start = time.time()\n",
    "\n",
    "print(df.neg_kurtosis_custom())\n",
    "\n",
    "custom_end = time.time()\n",
    "modin_custom_duration = custom_end - custom_start\n",
    "print(\"Modin neg_kurtosis_custom took {} seconds.\".format(modin_custom_duration))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import Markdown, display\n",
    "\n",
    "display(Markdown(\"### As expected, Modin is {}x faster than pandas when chaining the functions; however we see that our custom function is even faster than that - beating pandas by {}x, and Modin (when chaining the functions) by {}x!\".format(round(pandas_duration / modin_duration, 2), round(pandas_duration / modin_custom_duration, 2), round(modin_duration / modin_custom_duration, 2))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Congratulations! You have just implemented new DataFrame functionality!\n",
    "\n",
    "## Consider opening a pull request: https://github.com/modin-project/modin/pulls\n",
    "\n",
    "For a complete list of what is implemented, see the [Supported APIs](https://modin.readthedocs.io/en/latest/supported_apis/index.html) section."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test your knowledge: Add a custom function for another tree reduce: finding `DataFrame.mad` after squaring all of the values\n",
    "\n",
    "See the pandas documentation for the correct signature: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mad.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modin_mad_custom_start = time.time()\n",
    "\n",
    "# Implement your function here! Put the result of your custom squared `mad` in the variable `modin_mad_custom`\n",
    "# Hint: Look at the kurtosis walkthrough above\n",
    "\n",
    "modin_mad_custom = ...\n",
    "print(modin_mad_custom)\n",
    "\n",
    "modin_mad_custom_end = time.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluation code, do not change!\n",
    "modin_mad_start = time.time()\n",
    "modin_mad = df.applymap(lambda x: x**2).mad()\n",
    "print(modin_mad)\n",
    "modin_mad_end = time.time()\n",
    "\n",
    "assert modin_mad_end - modin_mad_start > modin_mad_custom_end - modin_mad_custom_start, \\\n",
    "    \"Your implementation was too slow, or you used the chaining functions approach. Try again\"\n",
    "assert modin_mad._to_pandas().equals(modin_mad_custom._to_pandas()), \"Your result did not match the result of chaining the functions, try again\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now that you are able to create custom functions, you know enough to contribute to Modin!\n",
    "\n",
    "**Please move on to [Exercise 4](./exercise_4.ipynb) when you are ready**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel) with MPI",
   "language": "python",
   "name": "python3mpi"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/local/exercise_4.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "99f41d2d",
   "metadata": {},
   "source": [
    "![LOGO](../../../img/MODIN_ver2_hrz.png)\n",
    "\n",
    "<center><h2>Scale your pandas workflows by changing one line of code</h2>\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fdda1c9c",
   "metadata": {},
   "source": [
    "# Exercise 4: Experimental Features\n",
    "\n",
    "**GOAL**: Explore some of the experimental features being added to Modin."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7bf87a5",
   "metadata": {},
   "source": [
    "### Concept for exercise: Spreadsheet\n",
    "\n",
    "For those who have worked with Excel, the Spreadsheet API will definitely feel familiar! The Spreadsheet API is a Jupyter notebook widget that allows us to interact with Modin DataFrames in a spreadsheet-like fashion while taking advantage of the underlying capabilities of Modin. The widget makes it quick and easy to explore, sort, filter, and edit data as well as export the changes as reproducible code.\n",
    "\n",
    "Let's look back at a subset of the 2015 NYC Taxi Data from Exercise 2, and see how the Spreadsheet API can make it easy to play with the data!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d5c4a3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "!jupyter nbextension enable --py --sys-prefix modin_spreadsheet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc8d5903",
   "metadata": {},
   "outputs": [],
   "source": [
    "import modin.pandas as pd\n",
    "import modin.experimental.spreadsheet as mss\n",
    "import modin.config as modin_cfg\n",
    "import unidist.config as unidist_cfg\n",
    "modin_cfg.Engine.put(\"unidist\")\n",
    "unidist_cfg.Backend.put(\"mpi\")\n",
    "\n",
    "s3_path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"\n",
    "modin_df = pd.read_csv(s3_path, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3, nrows=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "145e7bbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "spreadsheet = mss.from_dataframe(modin_df)\n",
    "spreadsheet"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c18b7f2",
   "metadata": {},
   "source": [
    "### Thank you for participating!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel) with MPI",
   "language": "python",
   "name": "python3mpi"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/setup_kernel.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import sys

from ipykernel import kernelspec

default_make_ipkernel_cmd = kernelspec.make_ipkernel_cmd


def custom_make_ipkernel_cmd(*args, **kwargs):
    """
    Build modified Popen command list for launching an IPython kernel with MPI.

    Parameters
    ----------
    *args : iterable
        Additional positional arguments to be passed in `default_make_ipkernel_cmd`.
    **kwargs : dict
        Additional keyword arguments to be passed in `default_make_ipkernel_cmd`.

    Returns
    -------
    array
        A Popen command list.

    Notes
    -----
    The parameters of the function should be kept in sync with the ones of the original function.
    """
    mpi_arguments = ["mpiexec", "-n", "1"]
    arguments = default_make_ipkernel_cmd(*args, **kwargs)
    return mpi_arguments + arguments


kernelspec.make_ipkernel_cmd = custom_make_ipkernel_cmd

if __name__ == "__main__":
    kernel_name = "python3mpi"
    display_name = "Python 3 (ipykernel) with MPI"
    dest = kernelspec.install(
        kernel_name=kernel_name, display_name=display_name, prefix=sys.prefix
    )
    print(f"Installed kernelspec {kernel_name} in {dest}")  # noqa: T201


================================================
FILE: examples/tutorial/jupyter/execution/pandas_on_unidist/test/test_notebooks.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import os
import sys

import nbformat

MODIN_DIR = os.path.abspath(
    os.path.join(os.path.dirname(__file__), *[".." for _ in range(6)])
)
sys.path.insert(0, MODIN_DIR)
from examples.tutorial.jupyter.execution.test.utils import (  # noqa: E402
    _execute_notebook,
    _replace_str,
    download_taxi_dataset,
    set_kernel,
    test_dataset_path,
)

# the kernel name "python3mpi" must match the one
# that is set up in `examples/tutorial/jupyter/execution/pandas_on_unidist/setup_kernel.py`
# for `Unidist` engine
set_kernel(kernel_name="python3mpi")

local_notebooks_dir = "examples/tutorial/jupyter/execution/pandas_on_unidist/local"


# in this notebook user should replace 'import pandas as pd' with
# 'import modin.pandas as pd' to make notebook work
def test_exercise_1():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_1_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_1.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    _replace_str(nb, "import pandas as pd", "import modin.pandas as pd")

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


# this notebook works "as is" but for testing purposes we can use smaller dataset
def test_exercise_2():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_2_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_2.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    new_cell = f'path = "{test_dataset_path}"\n' + download_taxi_dataset

    _replace_str(
        nb,
        'path = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv"',
        new_cell,
    )

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


# in this notebook user should add custom mad implementation
# to make notebook work
def test_exercise_3():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_3_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_3.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    user_mad_implementation = """PandasQueryCompiler.sq_mad_custom = TreeReduce.register(lambda cell_value, **kwargs: cell_value ** 2,
                                                             pandas.DataFrame.mad)

def sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):
    if axis is None:
        axis = 0

    return self._reduce_dimension(
        self._query_compiler.sq_mad_custom(
            axis=axis, skipna=skipna, level=level, **kwargs
        )
    )

pd.DataFrame.sq_mad_custom = sq_mad_func

modin_mad_custom = df.sq_mad_custom()
    """

    _replace_str(nb, "modin_mad_custom = ...", user_mad_implementation)

    nbformat.write(nb, modified_notebook_path)
    # need to update example, `.mad` doesn't exist
    # _execute_notebook(modified_notebook_path)


# this notebook works "as is" but for testing purposes we can use smaller dataset
def test_exercise_4():
    modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_4_test.ipynb")
    nb = nbformat.read(
        os.path.join(local_notebooks_dir, "exercise_4.ipynb"),
        as_version=nbformat.NO_CONVERT,
    )

    s3_path_cell = f's3_path = "{test_dataset_path}"\n' + download_taxi_dataset
    _replace_str(
        nb,
        's3_path = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv"',
        s3_path_cell,
    )

    nbformat.write(nb, modified_notebook_path)
    _execute_notebook(modified_notebook_path)


================================================
FILE: examples/tutorial/jupyter/execution/test/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import nbformat
from nbconvert.preprocessors import ExecutePreprocessor

test_dataset_path = "taxi.csv"
download_taxi_dataset = f"""import os
import urllib.request
if not os.path.exists("{test_dataset_path}"):
    url_path = "https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv"
    urllib.request.urlretrieve(url_path, "{test_dataset_path}")
    """


# Default kernel name for ``ExecutePreprocessor`` to be created
_default_kernel_name = "python3"


def set_kernel(kernel_name):
    """
    Set custom kernel for ``ExecutePreprocessor`` to be created.

    Parameters
    ----------
    kernel_name : str
        Kernel name.
    """
    global _default_kernel_name
    _default_kernel_name = kernel_name


def make_execute_preprocessor():
    """
    Make ``ExecutePreprocessor`` with the `_default_kernel_name`.

    Returns
    -------
    nbconvert.preprocessors.ExecutePreprocessor
        Execute processor entity.

    Notes
    -----
    Note that `_default_kernel_name` can be changed for the concrete executions
    (e.g., ``PandasOnUnidist`` with MPI backend).
    """
    return ExecutePreprocessor(timeout=600, kernel_name=_default_kernel_name)


def _execute_notebook(notebook):
    """
    Execute a jupyter notebook.

    Parameters
    ----------
    notebook : file-like or str
        File-like object or path to the notebook to execute.
    """
    nb = nbformat.read(notebook, as_version=nbformat.NO_CONVERT)
    ep = make_execute_preprocessor()
    ep.preprocess(nb)


def _find_code_cell_idx(nb, identifier):
    """
    Find code cell index by provided ``identifier``.

    Parameters
    ----------
    nb : dict
        Dictionary representation of the notebook to look for.
    identifier : str
        Unique string which target code cell should contain.

    Returns
    -------
    int
        Code cell index by provided ``identifier``.

    Notes
    -----
    Assertion will be raised if ``identifier`` is found in
    several code cells or isn't found at all.
    """
    import_cell_idx = [
        idx
        for idx, cell in enumerate(nb["cells"])
        if cell["cell_type"] == "code" and identifier in cell["source"]
    ]
    assert len(import_cell_idx) == 1
    return import_cell_idx[0]


def _replace_str(nb, original_str, str_to_replace):
    """
    Replace ``original_str`` with ``str_to_replace`` in the provided notebook.

    Parameters
    ----------
    nb : dict
        Dictionary representation of the notebook which requires replacement.
    original_str : str
        Original string which should be replaced.
    str_to_replace : str
        String to replace original string.

    Notes
    -----
    Assertion will be raised if ``original_str`` is found in
    several code cells or isn't found at all.
    """
    import_cell_idx = _find_code_cell_idx(nb, original_str)
    nb["cells"][import_cell_idx]["source"] = nb["cells"][import_cell_idx][
        "source"
    ].replace(original_str, str_to_replace)


================================================
FILE: modin/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import warnings
from typing import Any, Optional, Tuple, Type, Union

from . import _version


def custom_formatwarning(
    message: Union[Warning, str],
    category: Type[Warning],
    filename: str,
    lineno: int,
    line: Optional[str] = None,
) -> str:
    # ignore everything except the message
    return "{}: {}\n".format(category.__name__, message)


warnings.formatwarning = custom_formatwarning
# Filter numpy version warnings because they are not relevant
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="Large object of size")


def set_execution(engine: Any = None, storage_format: Any = None) -> Tuple[Any, Any]:
    """
    Method to set the _pair_ of execution engine and storage format format simultaneously.
    This is needed because there might be cases where switching one by one would be
    impossible, as not all pairs of values are meaningful.

    The method returns pair of old values, so it is easy to return back.
    """
    from .config import Backend, Engine, Execution, StorageFormat

    old_engine, old_storage_format = None, None
    # defer callbacks until both entities are set
    if engine is not None:
        old_engine = Engine._put_nocallback(engine)
    if storage_format is not None:
        old_storage_format = StorageFormat._put_nocallback(storage_format)
    # execute callbacks if something was changed
    if old_engine is not None:
        Engine._check_callbacks(old_engine)
    if old_storage_format is not None:
        StorageFormat._check_callbacks(old_storage_format)
    old_backend = Backend.get()
    Backend._put_nocallback(
        Backend.get_backend_for_execution(
            Execution(engine=Engine.get(), storage_format=StorageFormat.get())
        )
    )
    Backend._check_callbacks(old_backend)

    return old_engine, old_storage_format


__version__ = _version.get_versions()["version"]


================================================
FILE: modin/__main__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Command-line interface piece, called when user issues "python -m modin --foo"."""

import argparse


def main() -> None:
    parser = argparse.ArgumentParser(
        "python -m modin",
        description="Drop-in pandas replacement; refer to https://modin.readthedocs.io/ for details.",
    )
    parser.add_argument(
        "--versions",
        action="store_true",
        default=False,
        help="Show versions of all known components",
    )

    args = parser.parse_args()
    if args.versions:
        from modin.utils import show_versions

        show_versions()


if __name__ == "__main__":
    main()


================================================
FILE: modin/_version.py
================================================
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.

# This file is released into the public domain.
# Generated by versioneer-0.29
# https://github.com/python-versioneer/python-versioneer

"""Git implementation of _version.py."""

import errno
import functools
import os
import re
import subprocess
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple


def get_keywords() -> Dict[str, str]:
    """Get the keywords needed to look up the version information."""
    # these strings will be replaced by git during git-archive.
    # setup.py/versioneer.py will grep for the variable names, so they must
    # each be defined on a line of their own. _version.py will just call
    # get_keywords().
    git_refnames = "$Format:%d$"
    git_full = "$Format:%H$"
    git_date = "$Format:%ci$"
    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
    return keywords


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""

    VCS: str
    style: str
    tag_prefix: str
    parentdir_prefix: str
    versionfile_source: str
    verbose: bool


def get_config() -> VersioneerConfig:
    """Create, populate and return the VersioneerConfig() object."""
    # these strings are filled in when 'setup.py versioneer' creates
    # _version.py
    cfg = VersioneerConfig()
    cfg.VCS = "git"
    cfg.style = "pep440"
    cfg.tag_prefix = ""
    cfg.parentdir_prefix = "modin-"
    cfg.versionfile_source = "modin/_version.py"
    cfg.verbose = False
    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}


def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
    """Create decorator to mark a method as the handler of a VCS."""

    def decorate(f: Callable) -> Callable:
        """Store f in HANDLERS[vcs][method]."""
        if vcs not in HANDLERS:
            HANDLERS[vcs] = {}
        HANDLERS[vcs][method] = f
        return f

    return decorate


def run_command(
    commands: List[str],
    args: List[str],
    cwd: Optional[str] = None,
    verbose: bool = False,
    hide_stderr: bool = False,
    env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
    """Call the given command(s)."""
    assert isinstance(commands, list)
    process = None

    popen_kwargs: Dict[str, Any] = {}
    if sys.platform == "win32":
        # This hides the console window if pythonw.exe is used
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        popen_kwargs["startupinfo"] = startupinfo

    for command in commands:
        try:
            dispcmd = str([command] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            process = subprocess.Popen(
                [command] + args,
                cwd=cwd,
                env=env,
                stdout=subprocess.PIPE,
                stderr=(subprocess.PIPE if hide_stderr else None),
                **popen_kwargs,
            )
            break
        except OSError as e:
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %s" % dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %s" % (commands,))
        return None, None
    stdout = process.communicate()[0].strip().decode()
    if process.returncode != 0:
        if verbose:
            print("unable to run %s (error)" % dispcmd)
            print("stdout was %s" % stdout)
        return None, process.returncode
    return stdout, process.returncode


def versions_from_parentdir(
    parentdir_prefix: str,
    root: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for _ in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {
                "version": dirname[len(parentdir_prefix) :],
                "full-revisionid": None,
                "dirty": False,
                "error": None,
                "date": None,
            }
        rootdirs.append(root)
        root = os.path.dirname(root)  # up a level

    if verbose:
        print(
            "Tried directories %s but none started with prefix %s"
            % (str(rootdirs), parentdir_prefix)
        )
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords: Dict[str, str] = {}
    try:
        with open(versionfile_abs, "r") as fobj:
            for line in fobj:
                if line.strip().startswith("git_refnames ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["refnames"] = mo.group(1)
                if line.strip().startswith("git_full ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["full"] = mo.group(1)
                if line.strip().startswith("git_date ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["date"] = mo.group(1)
    except OSError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(
    keywords: Dict[str, str],
    tag_prefix: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Get version information from git keywords."""
    if "refnames" not in keywords:
        raise NotThisMethod("Short version file found")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = {r.strip() for r in refnames.strip("()").split(",")}
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = {r for r in refs if re.search(r"\d", r)}
        if verbose:
            print("discarding '%s', no digits" % ",".join(refs - tags))
    if verbose:
        print("likely tags: %s" % ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix) :]
            # Filter out refs that exactly match prefix or that don't start
            # with a number once the prefix is stripped (mostly a concern
            # when prefix is '')
            if not re.match(r"\d", r):
                continue
            if verbose:
                print("picking %s" % r)
            return {
                "version": r,
                "full-revisionid": keywords["full"].strip(),
                "dirty": False,
                "error": None,
                "date": date,
            }
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {
        "version": "0+unknown",
        "full-revisionid": keywords["full"].strip(),
        "dirty": False,
        "error": "no suitable tags",
        "date": None,
    }


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(
    tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command
) -> Dict[str, Any]:
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    # GIT_DIR can interfere with correct operation of Versioneer.
    # It may be intended to be passed to the Versioneer-versioned project,
    # but that should not change where we get our version from.
    env = os.environ.copy()
    env.pop("GIT_DIR", None)
    runner = functools.partial(runner, env=env)

    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose)
    if rc != 0:
        if verbose:
            print("Directory %s not under git control" % root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = runner(
        GITS,
        [
            "describe",
            "--tags",
            "--dirty",
            "--always",
            "--long",
            "--match",
            f"{tag_prefix}[[:digit:]]*",
        ],
        cwd=root,
    )
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces: Dict[str, Any] = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
    # --abbrev-ref was added in git-1.6.3
    if rc != 0 or branch_name is None:
        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
    branch_name = branch_name.strip()

    if branch_name == "HEAD":
        # If we aren't exactly on a branch, pick a branch which represents
        # the current commit. If all else fails, we are on a branchless
        # commit.
        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
        # --contains was added in git-1.5.4
        if rc != 0 or branches is None:
            raise NotThisMethod("'git branch --contains' returned error")
        branches = branches.split("\n")

        # Remove the first line if we're running detached
        if "(" in branches[0]:
            branches.pop(0)

        # Strip off the leading "* " from the list of branches.
        branches = [branch[2:] for branch in branches]
        if "master" in branches:
            branch_name = "master"
        elif not branches:
            branch_name = None
        else:
            # Pick the first branch that is returned. Good or bad.
            branch_name = branches[0]

    pieces["branch"] = branch_name

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[: git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
        if not mo:
            # unparsable. Maybe git-describe is misbehaving?
            pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%s' doesn't start with prefix '%s'"
                print(fmt % (full_tag, tag_prefix))
            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
                full_tag,
                tag_prefix,
            )
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix) :]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
        pieces["distance"] = len(out.split())  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def plus_or_dot(pieces: Dict[str, Any]) -> str:
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces: Dict[str, Any]) -> str:
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_branch(pieces: Dict[str, Any]) -> str:
    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .

    The ".dev0" means not master branch. Note that .dev0 sorts backwards
    (a feature branch will appear "older" than the master branch).

    Exceptions:
    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0"
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
    """Split pep440 version string at the post-release segment.

    Returns the release segments before the post-release and the
    post-release version number (or -1 if no post-release segment is present).
    """
    vc = str.split(ver, ".post")
    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None


def render_pep440_pre(pieces: Dict[str, Any]) -> str:
    """TAG[.postN.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        if pieces["distance"]:
            # update the post release segment
            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
            rendered = tag_version
            if post_version is not None:
                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
            else:
                rendered += ".post0.dev%d" % (pieces["distance"])
        else:
            # no commits, use the tag as the version
            rendered = pieces["closest-tag"]
    else:
        # exception #1
        rendered = "0.post0.dev%d" % pieces["distance"]
    return rendered


def render_pep440_post(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
    return rendered


def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .

    The ".dev0" means not master branch.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_old(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces: Dict[str, Any]) -> str:
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces: Dict[str, Any]) -> str:
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {
            "version": "unknown",
            "full-revisionid": pieces.get("long"),
            "dirty": None,
            "error": pieces["error"],
            "date": None,
        }

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-branch":
        rendered = render_pep440_branch(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-post-branch":
        rendered = render_pep440_post_branch(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%s'" % style)

    return {
        "version": rendered,
        "full-revisionid": pieces["long"],
        "dirty": pieces["dirty"],
        "error": None,
        "date": pieces.get("date"),
    }


def get_versions() -> Dict[str, Any]:
    """Get version information or return default if unable to do so."""
    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
    # __file__, we can work backwards from there to the root. Some
    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
    # case we can only use expanded keywords.

    cfg = get_config()
    verbose = cfg.verbose

    try:
        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
    except NotThisMethod:
        pass

    try:
        root = os.path.realpath(__file__)
        # versionfile_source is the relative path from the top of the source
        # tree (where the .git directory might live) to this file. Invert
        # this to find the root from __file__.
        for _ in cfg.versionfile_source.split("/"):
            root = os.path.dirname(root)
    except NameError:
        return {
            "version": "0+unknown",
            "full-revisionid": None,
            "dirty": None,
            "error": "unable to find root of source tree",
            "date": None,
        }

    try:
        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
        return render(pieces, cfg.style)
    except NotThisMethod:
        pass

    try:
        if cfg.parentdir_prefix:
            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
    except NotThisMethod:
        pass

    return {
        "version": "0+unknown",
        "full-revisionid": None,
        "dirty": None,
        "error": "unable to compute version",
        "date": None,
    }


================================================
FILE: modin/config/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses config entities which can be used for Modin behavior tuning."""

from modin.config.envvars import (
    AsvDataSizeConfig,
    AsvImplementation,
    AsyncReadMode,
    AutoSwitchBackend,
    Backend,
    BackendJoinConsiderAllBackends,
    BackendMergeCastInPlace,
    BenchmarkMode,
    CIAWSAccessKeyID,
    CIAWSSecretAccessKey,
    CpuCount,
    DaskThreadsPerWorker,
    DocModule,
    DynamicPartitioning,
    Engine,
    EnvironmentVariable,
    Execution,
    GithubCI,
    GpuCount,
    IsDebug,
    IsExperimental,
    IsRayCluster,
    LazyExecution,
    LogFileSize,
    LogMemoryInterval,
    LogMode,
    Memory,
    MetricsMode,
    MinColumnPartitionSize,
    MinPartitionSize,
    MinRowPartitionSize,
    ModinNumpy,
    NativePandasDeepCopy,
    NativePandasMaxRows,
    NativePandasTransferThreshold,
    NPartitions,
    PersistentPickle,
    ProgressBar,
    RangePartitioning,
    RayInitCustomResources,
    RayRedisAddress,
    RayRedisPassword,
    RayTaskCustomResources,
    ReadSqlEngine,
    ShowBackendSwitchProgress,
    StorageFormat,
    TestDatasetSize,
    TestReadFromPostgres,
    TestReadFromSqlServer,
    TrackFileLeaks,
)
from modin.config.pubsub import Parameter, ValueSource, context

__all__ = [
    "EnvironmentVariable",
    "Parameter",
    "ValueSource",
    "context",
    # General settings
    "IsDebug",
    "Engine",
    "StorageFormat",
    "CpuCount",
    "GpuCount",
    "Memory",
    "Backend",
    "BackendJoinConsiderAllBackends",
    "BackendMergeCastInPlace",
    "Execution",
    "AutoSwitchBackend",
    "ShowBackendSwitchProgress",
    # Ray specific
    "IsRayCluster",
    "RayRedisAddress",
    "RayRedisPassword",
    "RayInitCustomResources",
    "RayTaskCustomResources",
    "LazyExecution",
    # Dask specific
    "DaskThreadsPerWorker",
    # Native Pandas Specific
    "NativePandasMaxRows",
    "NativePandasTransferThreshold",
    "NativePandasDeepCopy",
    # Partitioning
    "NPartitions",
    "MinPartitionSize",
    "MinRowPartitionSize",
    "MinColumnPartitionSize",
    # ASV specific
    "TestDatasetSize",
    "AsvImplementation",
    "AsvDataSizeConfig",
    # Specific features
    "ProgressBar",
    "BenchmarkMode",
    "PersistentPickle",
    "ModinNumpy",
    "RangePartitioning",
    "AsyncReadMode",
    "ReadSqlEngine",
    "IsExperimental",
    "DynamicPartitioning",
    # For tests
    "TrackFileLeaks",
    "TestReadFromSqlServer",
    "TestReadFromPostgres",
    "GithubCI",
    "CIAWSSecretAccessKey",
    "CIAWSAccessKeyID",
    # Logging
    "LogMode",
    "LogMemoryInterval",
    "LogFileSize",
    "MetricsMode",
    # Plugin settings
    "DocModule",
]


================================================
FILE: modin/config/__main__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Content of this file should be executed if module `modin.config` is called.

If module is called (using `python -m modin.config`) configs help will be printed.
Using `-export_path` option configs description can be exported to the external CSV file
provided with this flag.
"""

import argparse
from textwrap import dedent

import pandas

import modin.config as cfg


def print_config_help() -> None:
    """Print configs help messages."""
    for objname in sorted(cfg.__all__):
        obj = getattr(cfg, objname)
        if (
            isinstance(obj, type)
            and issubclass(obj, cfg.Parameter)
            and not obj.is_abstract
        ):
            print(f"{obj.get_help()}\n\tCurrent value: {obj.get()}")  # noqa: T201


def export_config_help(filename: str) -> None:
    """
    Export all configs help messages to the CSV file.

    Parameters
    ----------
    filename : str
        Name of the file to export configs data.
    """
    configs_data = []
    default_values = dict(
        RayRedisPassword="random string",
        CpuCount="multiprocessing.cpu_count()",
        NPartitions="equals to MODIN_CPUS env",
    )
    for objname in sorted(cfg.__all__):
        obj = getattr(cfg, objname)
        if (
            isinstance(obj, type)
            and issubclass(obj, cfg.Parameter)
            and not obj.is_abstract
        ):
            data = {
                "Config Name": obj.__name__,
                "Env. Variable Name": getattr(
                    obj, "varname", "not backed by environment"
                ),
                "Default Value": default_values.get(obj.__name__, obj._get_default()),
                # `Notes` `-` underlining can't be correctly parsed inside csv table by sphinx
                "Description": dedent(obj.__doc__ or "").replace(
                    "Notes\n-----", "Notes:\n"
                ),
                "Options": obj.choices,
            }
            configs_data.append(data)

    pandas.DataFrame(
        configs_data,
        columns=[
            "Config Name",
            "Env. Variable Name",
            "Default Value",
            "Description",
            "Options",
        ],
    ).to_csv(filename, index=False)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--export-path",
        dest="export_path",
        type=str,
        required=False,
        default=None,
        help="File path to export configs data.",
    )
    export_path = parser.parse_args().export_path
    if export_path:
        export_config_help(export_path)
    else:
        print_config_help()


================================================
FILE: modin/config/envvars.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses Modin configs originated from environment variables."""

import os
import secrets
import sys
import warnings
from collections import namedtuple
from textwrap import dedent
from typing import Any, NoReturn, Optional

from packaging import version
from pandas.util._decorators import doc  # type: ignore[attr-defined]

from modin import set_execution
from modin.config.pubsub import (
    _TYPE_PARAMS,
    _UNSET,
    DeprecationDescriptor,
    ExactStr,
    Parameter,
    ValueSource,
)


class EnvironmentVariable(Parameter, type=str, abstract=True):
    """Base class for environment variables-based configuration."""

    varname: Optional[str] = None

    @classmethod
    def _get_value_from_config(cls) -> Any:
        """
        Read the value from environment variable.

        Returns
        -------
        Any
            Config raw value if it's set, otherwise `_UNSET`.
        """
        if cls.varname is None:
            raise TypeError("varname should not be None")
        if cls.varname not in os.environ:
            return _UNSET
        raw = os.environ[cls.varname]
        if not _TYPE_PARAMS[cls.type].verify(raw):
            # TODO: use and test a better error message, like "Invalid value
            # for {cls.varname}: {raw}"
            raise ValueError(f"Unsupported raw value: {raw}")
        return _TYPE_PARAMS[cls.type].decode(raw)

    @classmethod
    def get_help(cls) -> str:
        """
        Generate user-presentable help for the config.

        Returns
        -------
        str
        """
        help = f"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\n\tProvide {_TYPE_PARAMS[cls.type].help}"
        if cls.choices:
            help += f" (valid examples are: {', '.join(str(c) for c in cls.choices)})"
        return help


class EnvWithSibilings(
    EnvironmentVariable,
    # 'type' is a mandatory parameter for '__init_subclasses__', so we have to pass something here,
    # this doesn't force child classes to have 'str' type though, they actually can be any type
    type=str,
):
    """Ensure values synchronization between sibling parameters."""

    _update_sibling = True

    @classmethod
    def _sibling(cls) -> type["EnvWithSibilings"]:
        """Return a sibling parameter."""
        raise NotImplementedError()

    @classmethod
    def get(cls) -> Any:
        """
        Get parameter's value and ensure that it's equal to the sibling's value.

        Returns
        -------
        Any
        """
        sibling = cls._sibling()

        if sibling._value is _UNSET and cls._value is _UNSET:
            super().get()
            with warnings.catch_warnings():
                # filter warnings that can potentially come from the potentially deprecated sibling
                warnings.filterwarnings("ignore", category=FutureWarning)
                super(EnvWithSibilings, sibling).get()

            if (
                cls._value_source
                == sibling._value_source
                == ValueSource.GOT_FROM_CFG_SOURCE
            ):
                raise ValueError(
                    f"Configuration is ambiguous. You cannot set '{cls.varname}' and '{sibling.varname}' at the same time."
                )

            # further we assume that there are only two valid sources for the variables: 'GOT_FROM_CFG' and 'DEFAULT',
            # as otherwise we wouldn't ended-up in this branch at all, because all other ways of setting a value
            # changes the '._value' attribute from '_UNSET' to something meaningful
            from modin.error_message import ErrorMessage

            if cls._value_source == ValueSource.GOT_FROM_CFG_SOURCE:
                ErrorMessage.catch_bugs_and_request_email(
                    failure_condition=sibling._value_source != ValueSource.DEFAULT
                )
                sibling._value = cls._value
                sibling._value_source = ValueSource.GOT_FROM_CFG_SOURCE
            elif sibling._value_source == ValueSource.GOT_FROM_CFG_SOURCE:
                ErrorMessage.catch_bugs_and_request_email(
                    failure_condition=cls._value_source != ValueSource.DEFAULT
                )
                cls._value = sibling._value
                cls._value_source = ValueSource.GOT_FROM_CFG_SOURCE
            else:
                ErrorMessage.catch_bugs_and_request_email(
                    failure_condition=cls._value_source != ValueSource.DEFAULT
                    or sibling._value_source != ValueSource.DEFAULT
                )
                # propagating 'cls' default value to the sibling
                sibling._value = cls._value
        return super().get()

    @classmethod
    def put(cls, value: Any) -> None:
        """
        Set a new value to this parameter as well as to its sibling.

        Parameters
        ----------
        value : Any
        """
        super().put(value)
        # avoid getting into an infinite recursion
        if cls._update_sibling:
            cls._update_sibling = False
            try:
                with warnings.catch_warnings():
                    # filter potential future warnings of the sibling
                    warnings.filterwarnings("ignore", category=FutureWarning)
                    cls._sibling().put(value)
            finally:
                cls._update_sibling = True


class EnvironmentVariableDisallowingExecutionAndBackendBothSet(
    EnvironmentVariable,
    type=EnvironmentVariable.type,
    abstract=True,
):
    """Subclass to disallow getting this variable from the environment when both execution and backend are set in the environment."""

    @classmethod
    @doc(EnvironmentVariable._get_value_from_config.__doc__)
    def _get_value_from_config(cls) -> str:
        if Backend.varname in os.environ and (
            Engine.varname in os.environ or StorageFormat.varname in os.environ
        ):
            # Handling this case is tricky, in part because the combination of
            # Backend and Engine/StorageFormat may be invalid. For now just
            # disallow it.
            raise ValueError("Can't specify both execution and backend in environment")
        return super()._get_value_from_config()


class IsDebug(EnvironmentVariable, type=bool):
    """Force Modin engine to be "Python" unless specified by $MODIN_ENGINE."""

    varname = "MODIN_DEBUG"


class Engine(
    EnvironmentVariableDisallowingExecutionAndBackendBothSet,
    type=str,
):
    """Distribution engine to run queries by."""

    varname = "MODIN_ENGINE"
    choices = ("Ray", "Dask", "Python", "Unidist", "Native")

    NOINIT_ENGINES = {
        "Python",
        "Native",
    }  # engines that don't require initialization, useful for unit tests

    has_custom_engine = False

    @classmethod
    def _get_default(cls) -> str:
        """
        Get default value of the config.

        Returns
        -------
        str
        """
        from modin.utils import MIN_DASK_VERSION, MIN_RAY_VERSION, MIN_UNIDIST_VERSION

        # If there's a custom engine, we don't need to check for any engine
        # dependencies. Return the default "Python" engine.
        if IsDebug.get() or cls.has_custom_engine:
            return "Python"
        try:
            import ray

        except ImportError:
            pass
        else:
            if version.parse(ray.__version__) < MIN_RAY_VERSION:
                raise ImportError(
                    'Please `pip install "modin[ray]"` to install compatible Ray '
                    + "version "
                    + f"(>={MIN_RAY_VERSION})."
                )
            return "Ray"
        try:
            import dask
            import distributed

        except ImportError:
            pass
        else:
            if (
                version.parse(dask.__version__) < MIN_DASK_VERSION
                or version.parse(distributed.__version__) < MIN_DASK_VERSION
            ):
                raise ImportError(
                    f'Please `pip install "modin[dask]"` to install compatible Dask version (>={MIN_DASK_VERSION}).'
                )
            return "Dask"
        try:
            import unidist

        except ImportError:
            pass
        else:
            if version.parse(unidist.__version__) < MIN_UNIDIST_VERSION:
                raise ImportError(
                    'Please `pip install "unidist[mpi]"` to install compatible unidist on MPI '
                    + "version "
                    + f"(>={MIN_UNIDIST_VERSION})."
                )
            return "Unidist"
        raise ImportError(
            "Please refer to installation documentation page to install an engine"
        )

    @classmethod
    @doc(Parameter.add_option.__doc__)
    def add_option(cls, choice: Any) -> Any:
        choice = super().add_option(choice)
        cls.NOINIT_ENGINES.add(choice)
        cls.has_custom_engine = True
        return choice

    @classmethod
    def put(cls, value: str) -> None:
        """
        Set the engine value.

        Parameters
        ----------
        value : str
            Engine value to set.
        """
        value = cls.normalize(value)
        # Backend.put() will set Engine.
        Backend.put(
            Backend.get_backend_for_execution(
                Execution(engine=value, storage_format=StorageFormat.get())
            )
        )

    @classmethod
    def get(cls) -> str:
        """
        Get the engine value.

        Returns
        -------
        str
            Engine value.
        """
        # We have to override get() because Engine may need to get its value
        # from the OS's environment variables for Backend or Engine.

        cls._warn_if_deprecated()

        # First, check if we've already set the engine value.
        if cls._value is not _UNSET:
            return cls._value

        engine_config_value = cls._get_value_from_config()
        backend_config_value = Backend._get_value_from_config()

        # If Engine is in the OS's configuration, use the configured Engine value.
        # Otherwise, use the Backend config value if that exists. If it doesn't,
        # fall back to the default Engine value.
        cls._value = (
            engine_config_value
            if engine_config_value is not _UNSET
            else (
                Backend.get_execution_for_backend(backend_config_value).engine
                if backend_config_value is not _UNSET
                else cls._get_default()
            )
        )

        return cls._value


class StorageFormat(EnvironmentVariableDisallowingExecutionAndBackendBothSet, type=str):
    """Engine to run on a single node of distribution."""

    @classmethod
    def put(cls, value: str) -> None:
        """
        Set the storage format value.

        Parameters
        ----------
        value : str
            Storage format value to set.
        """
        value = cls.normalize(value)
        # Backend.put() will set StorageFormat.
        Backend.put(
            Backend.get_backend_for_execution(
                Execution(engine=Engine.get(), storage_format=value)
            )
        )

    @classmethod
    def get(cls) -> str:
        """
        Get the storage format value.

        Returns
        -------
        str
            Storage format value.
        """
        # We have to override get() because StorageFormat may need to get its
        # value from the OS's environment variables for Backend or StorageFormat.

        cls._warn_if_deprecated()

        # First, check if we've already set the engine value.
        if cls._value is not _UNSET:
            return cls._value

        storage_format_config_value = cls._get_value_from_config()
        backend_config_value = Backend._get_value_from_config()

        # If StorageFormat is in the OS's configuration, use the configured
        # StorageFormat value. Otherwise, use the Backend config value if that
        # exists. If it doesn't, fall back to the default StorageFormat value.
        cls._value = (
            storage_format_config_value
            if storage_format_config_value is not _UNSET
            else (
                Backend.get_execution_for_backend(backend_config_value).storage_format
                if backend_config_value is not _UNSET
                else cls._get_default()
            )
        )

        return cls._value

    varname = "MODIN_STORAGE_FORMAT"
    default = "Pandas"
    choices = ("Pandas", "Native")


Execution = namedtuple("Execution", ["storage_format", "engine"])


class Backend(EnvironmentVariableDisallowingExecutionAndBackendBothSet, type=str):
    """
    An alias for execution, i.e. the combination of StorageFormat and Engine.

    Setting backend may change StorageFormat and/or Engine to the corresponding
    respective values, and setting Engine or StorageFormat may change Backend.

    Modin's built-in backends include:
        - "Ray" <-> (StorageFormat="Pandas", Engine="Ray")
        - "Dask" <-> (StorageFormat="Pandas", Engine="Dask")
        - "Python_Test" <-> (StorageFormat="Pandas", Engine="Python")
            - This execution mode is meant for testing only.
        - "Unidist" <-> (StorageFormat="Pandas", Engine="Unidist")
        - "Pandas" <-> (StorageFormat="Native", Engine="Native")
    """

    _BACKEND_TO_EXECUTION: dict[str, Execution] = {}
    _EXECUTION_TO_BACKEND: dict[Execution, str] = {}
    varname: str = "MODIN_BACKEND"
    choices: tuple[str, ...] = ("Ray", "Dask", "Python_Test", "Unidist", "Pandas")

    @classmethod
    def put(cls, value: str) -> None:
        """
        Set the backend value.

        Parameters
        ----------
        value : str
            Backend value to set.
        """
        execution = cls.get_execution_for_backend(value)
        set_execution(execution.engine, execution.storage_format)

    @classmethod
    def _get_default(cls) -> str:
        """
        Get the default backend value.

        Returns
        -------
        str
            Default backend value.
        """
        return cls._EXECUTION_TO_BACKEND[
            Execution(StorageFormat._get_default(), Engine._get_default())
        ]

    @classmethod
    def register_backend(cls: type["Backend"], name: str, execution: Execution) -> None:
        """
        Register a new backend.

        Parameters
        ----------
        name : str
            Backend name.
        execution : Execution
            Execution that corresponds to the backend.
        """
        name = cls.normalize(name)
        super().add_option(name)
        if name in cls._BACKEND_TO_EXECUTION:
            raise ValueError(
                f"Backend '{name}' is already registered with the execution {cls._BACKEND_TO_EXECUTION[name]}."
            )
        if execution in cls._EXECUTION_TO_BACKEND:
            raise ValueError(
                f"{execution} is already registered with the backend {cls._EXECUTION_TO_BACKEND[execution]}."
            )
        cls._BACKEND_TO_EXECUTION[name] = execution
        cls._EXECUTION_TO_BACKEND[execution] = name

    @classmethod
    def add_option(cls, choice: str) -> NoReturn:
        """
        Raise an exception for trying to add an option to Backend directly.

        Parameters
        ----------
        choice : str
            Choice to add. Unused.

        Raises
        ------
        ValueError
            Always.
        """
        raise ValueError(
            "Cannot add an option to Backend directly. Use Backend.register_backend instead."
        )

    @classmethod
    def set_active_backends(cls, new_choices: tuple) -> None:
        """
        Set the active backends available for manual and automatic switching.

        Other backends may have been registered, and those backends remain registered, but the
        set of engines that can be used is dynamically modified.

        Parameters
        ----------
        new_choices : tuple
            Choices to add.

        Raises
        ------
        ValueError
            Raises a ValueError when the set of new_choices are not already registered
        """
        registered_backends = cls._BACKEND_TO_EXECUTION
        for i in new_choices:
            if i not in registered_backends:
                raise ValueError(
                    f"Active backend choices {new_choices} are not all registered."
                )
        cls.choices = new_choices

    @classmethod
    def activate(cls, backend: str) -> None:
        """
        Activate a backend that was previously registered.

        This is a no-op if the backend is already active.

        Parameters
        ----------
        backend : str
            Backend to activate.

        Raises
        ------
        ValueError
            Raises a ValueError if backend was not previously registered.
        """
        if backend not in cls._BACKEND_TO_EXECUTION:
            raise ValueError(f"Unknown backend '{backend}' is not registered.")
        cls.choices = (*cls.choices, backend)

    @classmethod
    def get_active_backends(cls) -> tuple[str, ...]:
        """
        Get the active backends available for manual and automatic switching.

        Returns
        -------
        tuple[str, ...]
            returns the active set of backends for switching
        """
        return cls.choices

    @classmethod
    def get_backend_for_execution(cls, execution: Execution) -> str:
        """
        Get the backend for the execution.

        Parameters
        ----------
        execution : Execution
            Execution to get the backend for.

        Returns
        -------
        str
            Backend for the execution.
        """
        if execution not in cls._EXECUTION_TO_BACKEND:
            raise ValueError(
                f"{execution} has no known backend. Please register a "
                + "backend for it with Backend.register_backend()"
            )
        return cls._EXECUTION_TO_BACKEND[execution]

    @classmethod
    def get_execution_for_backend(cls, backend: str) -> Execution:
        """
        Get the execution for the given backend.

        Parameters
        ----------
        backend : str
            Backend to get the execution for.

        Returns
        -------
        execution : Execution
            The execution for the given backend
        """
        if not isinstance(backend, str):
            raise TypeError(
                "Backend value should be a string, but instead it is "
                + f"{repr(backend)} of type {type(backend)}."
            )
        normalized_value = cls.normalize(backend)
        if normalized_value not in cls.choices:
            if normalized_value in cls._BACKEND_TO_EXECUTION:
                raise ValueError(
                    f"Backend '{backend}' is not currently active. Activate it first with Backend.activate('{backend})'."
                )
            backend_choice_string = ", ".join(f"'{choice}'" for choice in cls.choices)
            raise ValueError(
                f"Unknown backend '{backend}'. Available backends are: "
                + backend_choice_string
            )
        if normalized_value not in cls._BACKEND_TO_EXECUTION:
            raise ValueError(
                f"Backend '{backend}' has no known execution. Please "
                + "register an execution for it with Backend.register_backend()."
            )
        return cls._BACKEND_TO_EXECUTION[normalized_value]

    @classmethod
    def get(cls) -> str:
        """
        Get the backend.

        Returns
        -------
        str
            Backend.
        """
        # We have to override get() because Backend may need to get its value
        # from the OS's environment variables for Backend or Engine.

        cls._warn_if_deprecated()

        # First, check if we've already set the Backend value.
        if cls._value is not _UNSET:
            return cls._value

        backend_config_value = Backend._get_value_from_config()

        # If Backend is in the OS's configuration, use the configured Backend
        # value. Otherwise, we need to figure out the Backend value based on
        # the Engine and StorageFormat values.
        cls._value = (
            backend_config_value
            if backend_config_value is not _UNSET
            else cls.get_backend_for_execution(
                Execution(storage_format=StorageFormat.get(), engine=Engine.get())
            )
        )

        return cls._value


Backend.register_backend("Ray", Execution("Pandas", "Ray"))
Backend.register_backend("Dask", Execution("Pandas", "Dask"))
Backend.register_backend("Python_Test", Execution("Pandas", "Python"))
Backend.register_backend("Unidist", Execution("Pandas", "Unidist"))
Backend.register_backend("Pandas", Execution("Native", "Native"))


class AutoSwitchBackend(EnvironmentVariable, type=bool):
    """
    Whether automatic backend switching is allowed.

    When this flag is set, a Modin backend can attempt to automatically choose an appropriate backend
    for different operations based on features of the input data. When disabled, backends should
    avoid implicit backend switching outside of explicit operations like `to_pandas` and `to_ray`.
    """

    varname = "MODIN_AUTO_SWITCH_BACKENDS"
    default = False

    @classmethod
    def enable(cls) -> None:
        """Enable automatic backend switching."""
        cls.put(True)

    @classmethod
    def disable(cls) -> None:
        """Disable automatic backend switching."""
        cls.put(False)


class ShowBackendSwitchProgress(EnvironmentVariable, type=bool):
    """
    Whether to show progress when switching between backends.

    When enabled, progress messages are displayed during backend switches to inform users
    about data transfer operations. When disabled, backend switches occur silently.
    """

    varname = "MODIN_BACKEND_SWITCH_PROGRESS"
    default = True

    @classmethod
    def enable(cls) -> None:
        """Enable backend switch progress display."""
        cls.put(True)

    @classmethod
    def disable(cls) -> None:
        """Disable backend switch progress display."""
        cls.put(False)


class IsExperimental(EnvironmentVariable, type=bool):
    """Whether to Turn on experimental features."""

    varname = "MODIN_EXPERIMENTAL"


class IsRayCluster(EnvironmentVariable, type=bool):
    """Whether Modin is running on pre-initialized Ray cluster."""

    varname = "MODIN_RAY_CLUSTER"


class RayRedisAddress(EnvironmentVariable, type=ExactStr):
    """Redis address to connect to when running in Ray cluster."""

    varname = "MODIN_REDIS_ADDRESS"


class RayRedisPassword(EnvironmentVariable, type=ExactStr):
    """What password to use for connecting to Redis."""

    varname = "MODIN_REDIS_PASSWORD"
    default = secrets.token_hex(32)


class RayInitCustomResources(EnvironmentVariable, type=dict):
    """
    Ray node's custom resources to initialize with.

    Visit Ray documentation for more details:
    https://docs.ray.io/en/latest/ray-core/scheduling/resources.html#custom-resources

    Notes
    -----
    Relying on Modin to initialize Ray, you should set this config
    for the proper initialization with custom resources.
    """

    varname = "MODIN_RAY_INIT_CUSTOM_RESOURCES"
    default = None


class RayTaskCustomResources(EnvironmentVariable, type=dict):
    """
    Ray node's custom resources to request them in tasks or actors.

    Visit Ray documentation for more details:
    https://docs.ray.io/en/latest/ray-core/scheduling/resources.html#custom-resources

    Notes
    -----
    You can use this config to limit the parallelism for the entire workflow
    by setting the config at the very beginning.
    >>> import modin.config as cfg
    >>> cfg.RayTaskCustomResources.put({"special_hardware": 0.001})
    This way each single remote task or actor will require 0.001 of "special_hardware" to run.
    You can also use this config to limit the parallelism for a certain operation
    by setting the config with context.
    >>> with context(RayTaskCustomResources={"special_hardware": 0.001}):
    ...     df.<op>
    This way each single remote task or actor will require 0.001 of "special_hardware" to run
    within the context only.
    """

    varname = "MODIN_RAY_TASK_CUSTOM_RESOURCES"
    default = None


class CpuCount(EnvironmentVariable, type=int):
    """How many CPU cores to use during initialization of the Modin engine."""

    varname = "MODIN_CPUS"

    @classmethod
    def _put(cls, value: int) -> None:
        """
        Put specific value if CpuCount wasn't set by a user yet.

        Parameters
        ----------
        value : int
            Config value to set.

        Notes
        -----
        This method is used to set CpuCount from cluster resources internally
        and should not be called by a user.
        """
        if cls.get_value_source() == ValueSource.DEFAULT:
            cls.put(value)

    @classmethod
    def _get_default(cls) -> int:
        """
        Get default value of the config.

        Returns
        -------
        int
        """
        import multiprocessing

        return multiprocessing.cpu_count()

    @classmethod
    def get(cls) -> int:
        """
        Get ``CpuCount`` with extra checks.

        Returns
        -------
        int
        """
        cpu_count = super().get()
        if cpu_count <= 0:
            raise ValueError(f"`CpuCount` should be > 0; current value: {cpu_count}")
        return cpu_count


class GpuCount(EnvironmentVariable, type=int):
    """How may GPU devices to utilize across the whole distribution."""

    varname = "MODIN_GPUS"


class Memory(EnvironmentVariable, type=int):
    """
    How much memory (in bytes) give to an execution engine.

    Notes
    -----
    * In Ray case: the amount of memory to start the Plasma object store with.
    * In Dask case: the amount of memory that is given to each worker depending on CPUs used.
    """

    varname = "MODIN_MEMORY"


class NPartitions(EnvironmentVariable, type=int):
    """How many partitions to use for a Modin DataFrame (along each axis)."""

    varname = "MODIN_NPARTITIONS"

    @classmethod
    def _put(cls, value: int) -> None:
        """
        Put specific value if NPartitions wasn't set by a user yet.

        Parameters
        ----------
        value : int
            Config value to set.

        Notes
        -----
        This method is used to set NPartitions from cluster resources internally
        and should not be called by a user.
        """
        if cls.get_value_source() == ValueSource.DEFAULT:
            cls.put(value)

    @classmethod
    def _get_default(cls) -> int:
        """
        Get default value of the config.

        Returns
        -------
        int
        """
        return CpuCount.get()

    @classmethod
    def get(cls) -> int:
        """
        Get ``NPartitions`` with extra checks.

        Returns
        -------
        int
        """
        nparts = super().get()
        if nparts <= 0:
            raise ValueError(f"`NPartitions` should be > 0; current value: {nparts}")
        return nparts


class TestDatasetSize(EnvironmentVariable, type=str):
    """Dataset size for running some tests."""

    varname = "MODIN_TEST_DATASET_SIZE"
    choices = ("Small", "Normal", "Big")


class TrackFileLeaks(EnvironmentVariable, type=bool):
    """Whether to track for open file handles leakage during testing."""

    varname = "MODIN_TEST_TRACK_FILE_LEAKS"
    # Turn off tracking on Windows by default because
    # psutil's open_files() can be extremely slow on Windows (up to adding a few hours).
    # see https://github.com/giampaolo/psutil/pull/597
    default = sys.platform != "win32"


class AsvImplementation(EnvironmentVariable, type=ExactStr):
    """Allows to select a library that we will use for testing performance."""

    varname = "MODIN_ASV_USE_IMPL"
    choices = ("modin", "pandas")

    default = "modin"


class AsvDataSizeConfig(EnvironmentVariable, type=ExactStr):
    """Allows to override default size of data (shapes)."""

    varname = "MODIN_ASV_DATASIZE_CONFIG"
    default = None


class ProgressBar(EnvironmentVariable, type=bool):
    """Whether or not to show the progress bar."""

    varname = "MODIN_PROGRESS_BAR"
    default = False

    @classmethod
    def enable(cls) -> None:
        """Enable ``ProgressBar`` feature."""
        cls.put(True)

    @classmethod
    def disable(cls) -> None:
        """Disable ``ProgressBar`` feature."""
        cls.put(False)

    @classmethod
    def put(cls, value: bool) -> None:
        """
        Set ``ProgressBar`` value only if synchronous benchmarking is disabled.

        Parameters
        ----------
        value : bool
            Config value to set.
        """
        if value and BenchmarkMode.get():
            raise ValueError("ProgressBar isn't compatible with BenchmarkMode")
        super().put(value)


class BenchmarkMode(EnvironmentVariable, type=bool):
    """Whether or not to perform computations synchronously."""

    varname = "MODIN_BENCHMARK_MODE"
    default = False

    @classmethod
    def put(cls, value: bool) -> None:
        """
        Set ``BenchmarkMode`` value only if progress bar feature is disabled.

        Parameters
        ----------
        value : bool
            Config value to set.
        """
        if value and ProgressBar.get():
            raise ValueError("BenchmarkMode isn't compatible with ProgressBar")
        super().put(value)


class LogMode(EnvironmentVariable, type=ExactStr):
    """Set ``LogMode`` value if users want to opt-in."""

    varname = "MODIN_LOG_MODE"
    choices = ("enable", "disable")
    default = "disable"

    @classmethod
    def enable(cls) -> None:
        """Enable all logging levels."""
        cls.put("enable")

    @classmethod
    def disable(cls) -> None:
        """Disable logging feature."""
        cls.put("disable")


class LogMemoryInterval(EnvironmentVariable, type=int):
    """Interval (in seconds) to profile memory utilization for logging."""

    varname = "MODIN_LOG_MEMORY_INTERVAL"
    default = 5

    @classmethod
    def put(cls, value: int) -> None:
        """
        Set ``LogMemoryInterval`` with extra checks.

        Parameters
        ----------
        value : int
            Config value to set.
        """
        if value <= 0:
            raise ValueError(f"Log memory Interval should be > 0, passed value {value}")
        super().put(value)

    @classmethod
    def get(cls) -> int:
        """
        Get ``LogMemoryInterval`` with extra checks.

        Returns
        -------
        int
        """
        log_memory_interval = super().get()
        if log_memory_interval <= 0:
            raise ValueError(
                f"`LogMemoryInterval` should be > 0; current value: {log_memory_interval}"
            )
        return log_memory_interval


class LogFileSize(EnvironmentVariable, type=int):
    """Max size of logs (in MBs) to store per Modin job."""

    varname = "MODIN_LOG_FILE_SIZE"
    default = 10

    @classmethod
    def put(cls, value: int) -> None:
        """
        Set ``LogFileSize`` with extra checks.

        Parameters
        ----------
        value : int
            Config value to set.
        """
        if value <= 0:
            raise ValueError(f"Log file size should be > 0 MB, passed value {value}")
        super().put(value)

    @classmethod
    def get(cls) -> int:
        """
        Get ``LogFileSize`` with extra checks.

        Returns
        -------
        int
        """
        log_file_size = super().get()
        if log_file_size <= 0:
            raise ValueError(
                f"`LogFileSize` should be > 0; current value: {log_file_size}"
            )
        return log_file_size


class MetricsMode(EnvironmentVariable, type=ExactStr):
    """
    Set ``MetricsMode`` value to disable/enable metrics collection.

    Metric handlers are registered through `add_metric_handler` and can
    be used to record graphite-style timings or values. It is the
    responsibility of the handler to define how those emitted metrics
    are handled.
    """

    varname = "MODIN_METRICS_MODE"
    choices = ("enable", "disable")
    default = "enable"

    @classmethod
    def enable(cls) -> None:
        """Enable all metric collection."""
        cls.put("enable")

    @classmethod
    def disable(cls) -> None:
        """Disable all metric collection."""
        cls.put("disable")


class PersistentPickle(EnvironmentVariable, type=bool):
    """Whether serialization should be persistent."""

    varname = "MODIN_PERSISTENT_PICKLE"
    # When set to off, it allows faster serialization which is only
    # valid in current run (i.e. useless for saving to disk).
    # When set to on, Modin objects could be saved to disk and loaded
    # but serialization/deserialization could take more time.
    default = False


class MinPartitionSize(EnvironmentVariable, type=int):
    """
    Minimum number of rows/columns in a single pandas partition split.

    Once a partition for a pandas dataframe has more than this many elements,
    Modin adds another partition.
    """

    varname = "MODIN_MIN_PARTITION_SIZE"
    default = 32

    @classmethod
    def put(cls, value: int) -> None:
        """
        Set ``MinPartitionSize`` with extra checks.

        Parameters
        ----------
        value : int
            Config value to set.
        """
        if value <= 0:
            raise ValueError(f"Min partition size should be > 0, passed value {value}")
        super().put(value)

    @classmethod
    def get(cls) -> int:
        """
        Get ``MinPartitionSize`` with extra checks.

        Returns
        -------
        int
        """
        from modin.error_message import ErrorMessage

        ErrorMessage.single_warning(
            "`MinPartitionSize` is deprecated and will be removed in a future version. "
            + "This config has no longer effect, "
            + "use `MinRowPartitionSize` and `MinColumnPartitionSize` instead.",
            FutureWarning,
        )
        min_partition_size = super().get()
        if min_partition_size <= 0:
            raise ValueError(
                f"`MinPartitionSize` should be > 0; current value: {min_partition_size}"
            )
        return min_partition_size


class MinRowPartitionSize(EnvironmentVariable, type=int):
    """
    Minimum number of rows in a single pandas partition split.

    Once a partition for a pandas dataframe has more than this many elements,
    Modin adds another partition.
    """

    varname = "MODIN_MIN_ROW_PARTITION_SIZE"
    default = 32

    @classmethod
    def put(cls, value: int) -> None:
        """
        Set ``MinRowPartitionSize`` with extra checks.

        Parameters
        ----------
        value : int
            Config value to set.
        """
        if value <= 0:
            raise ValueError(
                f"Min row partition size should be > 0, passed value {value}"
            )
        super().put(value)

    @classmethod
    def get(cls) -> int:
        """
        Get ``MinRowPartitionSize`` with extra checks.

        Returns
        -------
        int
        """
        min_row_partition_size = super().get()
        if min_row_partition_size <= 0:
            raise ValueError(
                f"`MinRowPartitionSize` should be > 0; current value: {min_row_partition_size}"
            )
        return min_row_partition_size


class MinColumnPartitionSize(EnvironmentVariable, type=int):
    """
    Minimum number of columns in a single pandas partition split.

    Once a partition for a pandas dataframe has more than this many elements,
    Modin adds another partition.
    """

    varname = "MODIN_MIN_COLUMN_PARTITION_SIZE"
    default = 32

    @classmethod
    def put(cls, value: int) -> None:
        """
        Set ``MinColumnPartitionSize`` with extra checks.

        Parameters
        ----------
        value : int
            Config value to set.
        """
        if value <= 0:
            raise ValueError(
                f"Min column partition size should be > 0, passed value {value}"
            )
        super().put(value)

    @classmethod
    def get(cls) -> int:
        """
        Get ``MinColumnPartitionSize`` with extra checks.

        Returns
        -------
        int
        """
        min_column_partition_size = super().get()
        if min_column_partition_size <= 0:
            raise ValueError(
                f"`MinColumnPartitionSize` should be > 0; current value: {min_column_partition_size}"
            )
        return min_column_partition_size


class TestReadFromSqlServer(EnvironmentVariable, type=bool):
    """Set to true to test reading from SQL server."""

    varname = "MODIN_TEST_READ_FROM_SQL_SERVER"
    default = False


class TestReadFromPostgres(EnvironmentVariable, type=bool):
    """Set to true to test reading from Postgres."""

    varname = "MODIN_TEST_READ_FROM_POSTGRES"
    default = False


class GithubCI(EnvironmentVariable, type=bool):
    """Set to true when running Modin in GitHub CI."""

    varname = "MODIN_GITHUB_CI"
    default = False


class ModinNumpy(EnvironmentVariable, type=bool):
    """Set to true to use Modin's implementation of NumPy API."""

    varname = "MODIN_NUMPY"
    default = False


class RangePartitioning(EnvironmentVariable, type=bool):
    """
    Set to true to use Modin's range-partitioning implementation where possible.

    Please refer to documentation for cases where enabling this options would be beneficial:
    https://modin.readthedocs.io/en/stable/flow/modin/experimental/range_partitioning_groupby.html
    """

    varname = "MODIN_RANGE_PARTITIONING"
    default = False


class CIAWSSecretAccessKey(EnvironmentVariable, type=str):
    """Set to AWS_SECRET_ACCESS_KEY when running mock S3 tests for Modin in GitHub CI."""

    varname = "AWS_SECRET_ACCESS_KEY"
    default = "foobar_secret"


class CIAWSAccessKeyID(EnvironmentVariable, type=str):
    """Set to AWS_ACCESS_KEY_ID when running mock S3 tests for Modin in GitHub CI."""

    varname = "AWS_ACCESS_KEY_ID"
    default = "foobar_key"


class AsyncReadMode(EnvironmentVariable, type=bool):
    """
    It does not wait for the end of reading information from the source.

    It basically means, that the reading function only launches tasks for the dataframe
    to be read/created, but not ensures that the construction is finalized by the time
    the reading function returns a dataframe.

    This option was brought to improve performance of reading/construction
    of Modin DataFrames, however it may also:

    1. Increase the peak memory consumption. Since the garbage collection of the
    temporary objects created during the reading is now also lazy and will only
    be performed when the reading/construction is actually finished.

    2. Can break situations when the source is manually deleted after the reading
    function returns a result, for example, when reading inside of a context-block
    that deletes the file on ``__exit__()``.
    """

    varname = "MODIN_ASYNC_READ_MODE"
    default = False


class ReadSqlEngine(EnvironmentVariable, type=str):
    """Engine to run `read_sql`."""

    varname = "MODIN_READ_SQL_ENGINE"
    default = "Pandas"
    choices = ("Pandas", "Connectorx")


class LazyExecution(EnvironmentVariable, type=str):
    """
    Lazy execution mode.

    Supported values:
        `Auto` - the execution mode is chosen by the engine for each operation (default value).
        `On`   - the lazy execution is performed wherever it's possible.
        `Off`  - the lazy execution is disabled.
    """

    varname = "MODIN_LAZY_EXECUTION"
    choices = ("Auto", "On", "Off")
    default = "Auto"


class DocModule(EnvironmentVariable, type=ExactStr):
    """
    The module to use that will be used for docstrings.

    The value set here must be a valid, importable module. It should have
    a `DataFrame`, `Series`, and/or several APIs directly (e.g. `read_csv`).
    """

    varname = "MODIN_DOC_MODULE"
    default = "pandas"


class DaskThreadsPerWorker(EnvironmentVariable, type=int):
    """Number of threads per Dask worker."""

    varname = "MODIN_DASK_THREADS_PER_WORKER"
    default = 1


class NativePandasMaxRows(EnvironmentVariable, type=int):
    """Maximum number of rows which can be processed using local, native, pandas."""

    varname = "MODIN_NATIVE_MAX_ROWS"
    default = 10_000_000


class NativePandasTransferThreshold(EnvironmentVariable, type=int):
    """
    Targeted max number of dataframe rows which should be transferred between engines.

    This is often the same value as MODIN_NATIVE_MAX_ROWS but it can be independently
    set to change how transfer costs are considered.
    """

    varname = "MODIN_NATIVE_MAX_XFER_ROWS"
    default = 10_000_000


class NativePandasDeepCopy(EnvironmentVariable, type=bool):
    """
    Whether to perform deep copies when transferring data with the native pandas backend.

    Copies occur when constructing a Modin frame from a native pandas object with
    `pd.DataFrame(pandas.DataFrame([]))`, or when creating a native pandas frame from a Modin one
    via `df.modin.to_pandas()`.

    Leaving this flag disabled produces significant performance improvements by reducing the number
    of copy operations performed. However, it may create unexpected results if the user mutates
    the Modin frame or native pandas frame in-place.

    >>> import pandas  # doctest: +SKIP
    >>> import modin.pandas as pd  # doctest: +SKIP
    >>> from modin.config import Backend  # doctest: + SKIP
    >>> Backend.put("Pandas")  # doctest: +SKIP
    >>> pandas.set_option("mode.copy_on_write", False)  # doctest: +SKIP
    >>> native_df = pandas.DataFrame([0])  # doctest: +SKIP
    >>> modin_df = pd.DataFrame(native_df)  # doctest: +SKIP
    >>> native_df.loc[0, 0] = -1  # doctest: +SKIP
    >>> modin_df  # doctest: +SKIP
       0
    0 -1
    """

    varname = "MODIN_NATIVE_DEEP_COPY"
    default = False

    @classmethod
    def enable(cls) -> None:
        """Enable deep copy on frames with the native pandas backend."""
        cls.put(True)

    @classmethod
    def disable(cls) -> None:
        """Disable deep copy on frames with the native pandas backend."""
        cls.put(False)


class BackendMergeCastInPlace(EnvironmentVariable, type=bool):
    """
    Whether to cast a DataFrame in-place when performing a merge when using hybrid mode.

    This flag modifies the behavior of a cast performed on operations involving more
    than one type of query compiler. If enabled the actual cast will be performed in-place
    and the input DataFrame will have a new backend. If disabled the original DataFrame
    will remain on the same underlying engine.
    """

    varname = "MODIN_BACKEND_MERGE_CAST_IN_PLACE"
    default = True

    @classmethod
    def enable(cls) -> None:
        """Enable casting in place when performing a merge operation betwen two different compilers."""
        cls.put(True)

    @classmethod
    def disable(cls) -> None:
        """Disable casting in place when performing a merge operation betwen two different compilers."""
        cls.put(False)


class BackendJoinConsiderAllBackends(EnvironmentVariable, type=bool):
    """
    Whether to consider all active backends when performing a pre-operation switch for join operations.

    Only used when AutoSwitchBackend is active.
    By default, only backends already present in the arguments of a join operation are considered when
    switching backends. Enabling this flag will allow join operations that are registered
    as pre-op switches to consider backends other than those directly present in the arguments.
    """

    varname = "MODIN_BACKEND_JOIN_CONSIDER_ALL_BACKENDS"
    default = True

    @classmethod
    def enable(cls) -> None:
        """Enable casting in place when performing a merge operation betwen two different compilers."""
        cls.put(True)

    @classmethod
    def disable(cls) -> None:
        """Disable casting in place when performing a merge operation betwen two different compilers."""
        cls.put(False)


class DynamicPartitioning(EnvironmentVariable, type=bool):
    """
    Set to true to use Modin's dynamic-partitioning implementation where possible.

    Please refer to documentation for cases where enabling this options would be beneficial:
    https://modin.readthedocs.io/en/stable/usage_guide/optimization_notes/index.html#dynamic-partitioning-in-modin
    """

    varname = "MODIN_DYNAMIC_PARTITIONING"
    default = False


def _check_vars() -> None:
    """
    Check validity of environment variables.

    Look out for any environment variables that start with "MODIN_" prefix
    that are unknown - they might be a typo, so warn a user.
    """
    valid_names = {
        obj.varname
        for obj in globals().values()
        if isinstance(obj, type)
        and issubclass(obj, EnvironmentVariable)
        and not obj.is_abstract
    }
    found_names = {name for name in os.environ if name.startswith("MODIN_")}
    unknown = found_names - valid_names
    deprecated: dict[str, DeprecationDescriptor] = {
        obj.varname: obj._deprecation_descriptor
        for obj in globals().values()
        if isinstance(obj, type)
        and issubclass(obj, EnvironmentVariable)
        and not obj.is_abstract
        and obj.varname is not None
        and obj._deprecation_descriptor is not None
    }
    found_deprecated = found_names & deprecated.keys()
    if unknown:
        warnings.warn(
            f"Found unknown environment variable{'s' if len(unknown) > 1 else ''},"
            + f" please check {'their' if len(unknown) > 1 else 'its'} spelling: "
            + ", ".join(sorted(unknown))
        )
    for depr_var in found_deprecated:
        warnings.warn(
            deprecated[depr_var].deprecation_message(use_envvar_names=True),
            FutureWarning,
        )


_check_vars()


================================================
FILE: modin/config/pubsub.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses ``Parameter`` class - base class for all configs."""

import contextlib
import warnings
from collections import defaultdict
from enum import IntEnum
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    DefaultDict,
    Iterator,
    NamedTuple,
    Optional,
    Tuple,
    cast,
)

if TYPE_CHECKING:
    from modin.config.envvars import EnvironmentVariable


class DeprecationDescriptor:
    """
    Describe deprecated parameter.

    Parameters
    ----------
    parameter : type[Parameter]
        Deprecated parameter.
    new_parameter : type[Parameter], optional
        If there's a replacement parameter for the deprecated one, specify it here.
    when_removed : str, optional
        If known, the exact release when the deprecated parameter is planned to be removed.
    """

    _parameter: type["Parameter"]
    _new_parameter: Optional[type["Parameter"]]
    _when_removed: str

    def __init__(
        self,
        parameter: type["Parameter"],
        new_parameter: Optional[type["Parameter"]] = None,
        when_removed: Optional[str] = None,
    ):
        self._parameter = parameter
        self._new_parameter = new_parameter
        self._when_removed = "a future" if when_removed is None else when_removed

    def deprecation_message(self, use_envvar_names: bool = False) -> str:
        """
        Generate a message to be used in a warning raised when using the deprecated parameter.

        Parameters
        ----------
        use_envvar_names : bool, default: False
            Whether to use environment variable names in the warning. If ``True``, both
            ``self._parameter`` and ``self._new_parameter`` have to be a type of ``EnvironmentVariable``.

        Returns
        -------
        str
        """
        name = (
            cast("EnvironmentVariable", self._parameter).varname
            if use_envvar_names
            else self._parameter.__name__
        )
        msg = f"'{name}' is deprecated and will be removed in {self._when_removed} version."
        if self._new_parameter is not None:
            new_name = (
                cast("EnvironmentVariable", self._new_parameter).varname
                if use_envvar_names
                else self._new_parameter.__name__
            )
            msg += f" Use '{new_name}' instead."
        return msg


class TypeDescriptor(NamedTuple):
    """
    Class for config data manipulating of exact type.

    Parameters
    ----------
    decode : callable
        Callable to decode config value from the raw data.
    normalize : callable
        Callable to bring different config value variations to
        the single form.
    verify : callable
        Callable to check that config value satisfies given config
        type requirements.
    help : str
        Class description string.
    """

    decode: Callable[[str], object]
    normalize: Callable[[object], object]
    verify: Callable[[object], bool]
    help: str


class ExactStr(str):
    """Class to be used in type params where no transformations are needed."""


_TYPE_PARAMS = {
    str: TypeDescriptor(
        decode=lambda value: value.strip().title(),
        normalize=lambda value: str(value).strip().title(),
        verify=lambda value: True,
        help="a case-insensitive string",
    ),
    ExactStr: TypeDescriptor(
        decode=lambda value: value,
        normalize=lambda value: value,
        verify=lambda value: True,
        help="a string",
    ),
    bool: TypeDescriptor(
        decode=lambda value: value.strip().lower() in {"true", "yes", "1"},
        normalize=bool,
        verify=lambda value: isinstance(value, bool)
        or (
            isinstance(value, str)
            and value.strip().lower() in {"true", "yes", "1", "false", "no", "0"}
        ),
        help="a boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)",
    ),
    int: TypeDescriptor(
        decode=lambda value: int(value.strip()),
        normalize=int,  # type: ignore
        verify=lambda value: isinstance(value, int)
        or (isinstance(value, str) and value.strip().isdigit()),
        help="an integer value",
    ),
    dict: TypeDescriptor(
        decode=lambda value: {
            key: int(val) if val.isdigit() else val
            for key_value in value.split(",")
            for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
        },
        normalize=lambda value: (
            value
            if isinstance(value, dict)
            else {
                key: int(val) if val.isdigit() else val
                for key_value in str(value).split(",")
                for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
            }
        ),
        verify=lambda value: isinstance(value, dict)
        or (
            isinstance(value, str)
            and all(
                key_value.find("=") not in (-1, len(key_value) - 1)
                for key_value in value.split(",")
            )
        ),
        help="a sequence of KEY=VALUE values separated by comma (Example: 'KEY1=VALUE1,KEY2=VALUE2,KEY3=VALUE3')",
    ),
}

# special marker to distinguish unset value from None value
# as someone may want to use None as a real value for a parameter
_UNSET = object()


class ValueSource(IntEnum):  # noqa: PR01
    """Class that describes the method of getting the value for a parameter."""

    # got from default, i.e. neither user nor configuration source had the value
    DEFAULT = 0
    # set by user
    SET_BY_USER = 1
    # got from parameter configuration source, like environment variable
    GOT_FROM_CFG_SOURCE = 2


class Parameter(object):
    """
    Base class describing interface for configuration entities.

    Attributes
    ----------
    choices : Optional[Sequence[str]]
        Array with possible options of ``Parameter`` values.
    type : str
        String that denotes ``Parameter`` type.
    default : Optional[Any]
        ``Parameter`` default value.
    is_abstract : bool, default: True
        Whether or not ``Parameter`` is abstract.
    _value_source : Optional[ValueSource]
        Source of the ``Parameter`` value, should be set by
        ``ValueSource``.
    _deprecation_descriptor : Optional[DeprecationDescriptor]
        Indicate whether this parameter is deprecated.
    """

    choices: Optional[Tuple[str, ...]] = None
    type = str
    default: Optional[Any] = None
    is_abstract = True
    _value_source: Optional[ValueSource] = None
    _value: Any = _UNSET
    _subs: list = []
    _once: DefaultDict[Any, list] = defaultdict(list)
    _deprecation_descriptor: Optional[DeprecationDescriptor] = None

    @classmethod
    def _warn_if_deprecated(cls) -> None:
        """Warn that the variable is deprecated if it has a deprecation descriptor."""
        if cls._deprecation_descriptor is not None:
            warnings.warn(
                cls._deprecation_descriptor.deprecation_message(), FutureWarning
            )

    @classmethod
    def _get_value_from_config(cls) -> Any:
        """
        Read the value from config storage.

        Returns
        -------
        Any
            Config raw value if it's set, otherwise `_UNSET`.

        Notes
        -----
        Config storage can be config file or environment variable or whatever.
        Method should be implemented in the child class.
        """
        raise NotImplementedError()

    @classmethod
    def get_help(cls) -> str:
        """
        Generate user-presentable help for the option.

        Returns
        -------
        str

        Notes
        -----
        Method should be implemented in the child class.
        """
        raise NotImplementedError()

    def __init_subclass__(cls, type: Any, abstract: bool = False, **kw: dict):
        """
        Initialize subclass.

        Parameters
        ----------
        type : Any
            Type of the config.
        abstract : bool, default: False
            Whether config is abstract.
        **kw : dict
            Optional arguments for config initialization.
        """
        assert type in _TYPE_PARAMS, f"Unsupported variable type: {type}"
        cls.type = type
        cls.is_abstract = abstract
        cls._value = _UNSET
        cls._subs = []
        cls._once = defaultdict(list)
        super().__init_subclass__(**kw)

    @classmethod
    def subscribe(cls, callback: Callable) -> None:
        """
        Add `callback` to the `_subs` list and then execute it.

        Parameters
        ----------
        callback : callable
            Callable to execute.
        """
        cls._subs.append(callback)
        callback(cls)

    @classmethod
    def _get_default(cls) -> Any:
        """
        Get default value of the config.

        Returns
        -------
        Any
        """
        return cls.default

    @classmethod
    def get_value_source(cls) -> ValueSource:
        """
        Get value source of the config.

        Returns
        -------
        ValueSource
        """
        if cls._value_source is None:
            # dummy call to .get() to initialize the value
            cls.get()
        assert (
            cls._value_source is not None
        ), "_value_source must be initialized by now in get()"
        return cls._value_source

    @classmethod
    def get(cls) -> Any:
        """
        Get config value.

        Returns
        -------
        Any
            Decoded and verified config value.
        """
        cls._warn_if_deprecated()
        if cls._value is _UNSET:
            # get the value from env
            config_value = cls._get_value_from_config()
            if config_value is _UNSET:
                cls._value = cls._get_default()
                cls._value_source = ValueSource.DEFAULT
            else:
                cls._value = config_value
                cls._value_source = ValueSource.GOT_FROM_CFG_SOURCE
        return cls._value

    @classmethod
    def put(cls, value: Any) -> None:
        """
        Set config value.

        Parameters
        ----------
        value : Any
            Config value to set.
        """
        cls._warn_if_deprecated()
        cls._check_callbacks(cls._put_nocallback(value))
        cls._value_source = ValueSource.SET_BY_USER

    @classmethod
    def normalize(cls, value: Any) -> Any:
        """
        Normalize config value.

        Parameters
        ----------
        value : Any
            Config value to normalize.

        Returns
        -------
        Any
            Normalized config value.
        """
        return _TYPE_PARAMS[cls.type].normalize(value)

    @classmethod
    def once(cls, onvalue: Any, callback: Callable) -> None:
        """
        Execute `callback` if config value matches `onvalue` value.

        Otherwise accumulate callbacks associated with the given `onvalue`
        in the `_once` container.

        Parameters
        ----------
        onvalue : Any
            Config value to set.
        callback : callable
            Callable that should be executed if config value matches `onvalue`.
        """
        onvalue = cls.normalize(onvalue)
        if onvalue == cls.get():
            callback(cls)
        else:
            cls._once[onvalue].append(callback)

    @classmethod
    def _put_nocallback(cls, value: Any) -> Any:
        """
        Set config value without executing callbacks.

        Parameters
        ----------
        value : Any
            Config value to set.

        Returns
        -------
        Any
            Replaced (old) config value.
        """
        if not _TYPE_PARAMS[cls.type].verify(value):
            raise ValueError(f"Unsupported value: {value}")
        value = cls.normalize(value)
        oldvalue, cls._value = cls.get(), value
        return oldvalue

    @classmethod
    def _check_callbacks(cls, oldvalue: Any) -> None:
        """
        Execute all needed callbacks if config value was changed.

        Parameters
        ----------
        oldvalue : Any
            Previous (old) config value.
        """
        if oldvalue == cls.get():
            return
        for callback in cls._subs:
            callback(cls)
        for callback in cls._once.pop(cls.get(), ()):
            callback(cls)

    @classmethod
    def add_option(cls, choice: Any) -> Any:
        """
        Add a new choice for the parameter.

        Parameters
        ----------
        choice : Any
            New choice to add to the available choices.

        Returns
        -------
        Any
            Added choice normalized according to the parameter type.
        """
        if cls.choices is not None:
            if not _TYPE_PARAMS[cls.type].verify(choice):
                raise ValueError(f"Unsupported choice value: {choice}")
            choice = cls.normalize(choice)
            if choice not in cls.choices:
                cls.choices += (choice,)
            return choice
        raise TypeError("Cannot add a choice to a parameter where choices is None")


@contextlib.contextmanager
def context(**config: dict[str, Any]) -> Iterator[None]:
    """
    Set a value(s) for the specified config(s) from ``modin.config`` in the scope of the context.

    Parameters
    ----------
    **config : dict[str, Any]
        Keyword describing a name of a config variable from ``modin.config`` as a key
        and a new value as a value.

    Examples
    --------
    >>> RangePartitioning.get()
    False
    >>> with context(RangePartitioning=True):
    ...     print(RangePartitioning.get()) # True
    True
    False
    >>> RangePartitioning.get()
    False
    >>> with context(RangePartitioning=True, AsyncReadMode=True):
    ...     print(RangePartitioning.get()) # True
    ...     print(AsyncReadMode.get()) # True
    True
    True
    >>> RangePartitioning.get()
    False
    >>> AsyncReadMode.get()
    False
    """
    import modin.config as cfg

    old_values = {}
    for name, val in config.items():
        var = getattr(cfg, name)
        old_values[var] = var.get()
        var.put(val)
    try:
        yield
    finally:
        for var, val in old_values.items():
            var.put(val)


__all__ = ["Parameter", "context"]


================================================
FILE: modin/conftest.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# We turn off mypy type checks in this file because it's not imported anywhere
# type: ignore

import copy
import logging
import os
import platform
import shutil
import subprocess
import sys
import time
from collections import defaultdict
from contextlib import contextmanager
from typing import Iterable, Optional

import boto3
import numpy as np
import pandas
import pytest
import requests
import s3fs
from pandas.util._decorators import doc

from modin.config import Backend, Execution

assert (
    "modin.utils" not in sys.modules
), "Do not import modin.utils before patching, or tests could fail"
# every import under this assert has to be postfixed with 'noqa: E402'
# as flake8 complains about that... but we _have_ to make sure we
# monkey-patch at the right spot, otherwise testing doc URLs might
# not catch all of them
import modin.utils  # noqa: E402

_generated_doc_urls = set()


def _saving_make_api_url(token, _make_api_url=modin.utils._make_api_url):
    url = _make_api_url(token)
    _generated_doc_urls.add(url)
    return url


modin.utils._make_api_url = _saving_make_api_url

import uuid  # noqa: E402

import modin  # noqa: E402
import modin.config  # noqa: E402
import modin.pandas as pd  # noqa: E402
import modin.tests.config  # noqa: E402
from modin.config import (  # noqa: E402
    AsyncReadMode,
    BenchmarkMode,
    GithubCI,
    IsExperimental,
    MinRowPartitionSize,
    NPartitions,
)
from modin.core.execution.dispatching.factories import factories  # noqa: E402
from modin.core.execution.python.implementations.pandas_on_python.io import (  # noqa: E402
    PandasOnPythonIO,
)
from modin.core.storage_formats import (  # noqa: E402
    BaseQueryCompiler,
    PandasQueryCompiler,
)
from modin.core.storage_formats.pandas.query_compiler_caster import (  # noqa: E402
    _CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS,
    _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS,
    _GENERAL_EXTENSIONS,
)
from modin.tests.pandas.utils import (  # noqa: E402
    NROWS,
    _make_csv_file,
    get_unique_filename,
    make_default_file,
)


def pytest_addoption(parser):
    parser.addoption(
        "--execution",
        action="store",
        default=None,
        help="specifies execution to run tests on",
    )


def set_experimental_env(mode):
    IsExperimental.put(mode == "experimental")


@pytest.fixture(scope="session", autouse=True)
def enforce_config():
    """
    A fixture that ensures that all checks for MODIN_* variables
    are done using modin.config to prevent leakage
    """
    orig_env = os.environ
    modin_start = os.path.dirname(modin.__file__)
    modin_exclude = [
        os.path.dirname(modin.config.__file__),
        os.path.dirname(modin.tests.config.__file__),
    ]

    class PatchedEnv:
        @staticmethod
        def __check_var(name):
            if name.upper().startswith("MODIN_"):
                frame = sys._getframe()
                try:
                    # get the path to module where caller of caller is defined;
                    # caller of this function is inside PatchedEnv, and we're
                    # interested in whomever called a method on PatchedEnv
                    caller_file = frame.f_back.f_back.f_code.co_filename
                finally:
                    del frame
                pkg_name = os.path.dirname(caller_file)
                if pkg_name.startswith(modin_start):
                    assert any(
                        pkg_name.startswith(excl) for excl in modin_exclude
                    ), "Do not access MODIN_ environment variable bypassing modin.config"

        def __getitem__(self, name):
            self.__check_var(name)
            return orig_env[name]

        def __setitem__(self, name, value):
            self.__check_var(name)
            orig_env[name] = value

        def __delitem__(self, name):
            self.__check_var(name)
            del orig_env[name]

        def pop(self, name, default=object()):
            self.__check_var(name)
            return orig_env.pop(name, default)

        def get(self, name, default=None):
            self.__check_var(name)
            return orig_env.get(name, default)

        def __contains__(self, name):
            self.__check_var(name)
            return name in orig_env

        def __getattr__(self, name):
            return getattr(orig_env, name)

        def __iter__(self):
            return iter(orig_env)

    os.environ = PatchedEnv()
    yield
    os.environ = orig_env


BASE_EXECUTION_NAME = "BaseOnPython"


class TestQC(BaseQueryCompiler):
    def __init__(self, modin_frame):
        self._modin_frame = modin_frame

    storage_format = property(
        lambda self: "Base", doc=BaseQueryCompiler.storage_format.__doc__
    )
    engine = property(lambda self: "Python", doc=BaseQueryCompiler.engine.__doc__)

    def finalize(self):
        self._modin_frame.finalize()

    def execute(self):
        self.finalize()
        self._modin_frame.wait_computations()

    @classmethod
    def from_pandas(cls, df, data_cls):
        return cls(data_cls.from_pandas(df))

    @classmethod
    def from_arrow(cls, at, data_cls):
        return cls(data_cls.from_arrow(at))

    def free(self):
        pass

    def to_interchange_dataframe(
        self, nan_as_null: bool = False, allow_copy: bool = True
    ):
        raise NotImplementedError(
            "The selected execution does not implement the DataFrame exchange protocol."
        )

    @classmethod
    def from_interchange_dataframe(cls, df, data_cls):
        raise NotImplementedError(
            "The selected execution does not implement the DataFrame exchange protocol."
        )

    to_pandas = PandasQueryCompiler.to_pandas
    default_to_pandas = PandasQueryCompiler.default_to_pandas


class BaseOnPythonIO(PandasOnPythonIO):
    query_compiler_cls = TestQC


class BaseOnPythonFactory(factories.BaseFactory):
    @classmethod
    def prepare(cls):
        cls.io_cls = BaseOnPythonIO


def set_base_execution(name=BASE_EXECUTION_NAME):
    setattr(factories, f"{name}Factory", BaseOnPythonFactory)
    Backend.register_backend(
        "BaseOnPython",
        Execution(
            engine="Python",
            storage_format="Base",
        ),
    )
    modin.set_execution(engine="python", storage_format=name.split("On")[0])


@pytest.fixture(scope="function")
def get_unique_base_execution():
    """Setup unique execution for a single function and yield its QueryCompiler that's suitable for inplace modifications."""
    # It's better to use decimal IDs rather than hex ones due to factory names formatting
    execution_id = int(uuid.uuid4().hex, 16)
    format_name = f"Base{execution_id}"
    engine_name = "Python"
    execution_name = f"{format_name}On{engine_name}"

    # Dynamically building all the required classes to form a new execution
    base_qc = type(
        format_name, (TestQC,), {"get_backend": (lambda self: execution_name)}
    )
    base_io = type(
        f"{execution_name}IO", (BaseOnPythonIO,), {"query_compiler_cls": base_qc}
    )
    base_factory = type(
        f"{execution_name}Factory",
        (BaseOnPythonFactory,),
        {"prepare": classmethod(lambda cls: setattr(cls, "io_cls", base_io))},
    )

    # Setting up the new execution
    setattr(factories, f"{execution_name}Factory", base_factory)
    Backend.register_backend(
        execution_name, Execution(engine=engine_name, storage_format=format_name)
    )
    old_engine, old_format = modin.set_execution(
        engine=engine_name, storage_format=format_name
    )
    yield base_qc

    # Teardown the new execution
    modin.set_execution(engine=old_engine, storage_format=old_format)
    try:
        delattr(factories, f"{execution_name}Factory")
    except AttributeError:
        pass


def pytest_configure(config):
    execution = config.option.execution

    if execution is None:
        return

    if execution == BASE_EXECUTION_NAME:
        set_base_execution(BASE_EXECUTION_NAME)
        config.addinivalue_line(
            "filterwarnings", "default:.*defaulting to pandas.*:UserWarning"
        )
    else:
        partition, engine = execution.split("On")
        modin.set_execution(engine=engine, storage_format=partition)


def pytest_runtest_call(item):
    custom_markers = ["xfail", "skip"]

    # dynamicly adding custom markers to tests
    for custom_marker in custom_markers:
        for marker in item.iter_markers(name=f"{custom_marker}_executions"):
            executions = marker.args[0]
            if not isinstance(executions, list):
                executions = [executions]

            current_execution = modin.utils.get_current_execution()
            reason = marker.kwargs.pop("reason", "")

            item.add_marker(
                getattr(pytest.mark, custom_marker)(
                    condition=current_execution in executions,
                    reason=f"Execution {current_execution} does not pass this test. {reason}",
                    **marker.kwargs,
                )
            )


_doc_pytest_fixture = """
Pytest fixture factory that makes temp {file_type} files for testing.

Yields:
    Function that generates {file_type} files
"""


@pytest.fixture(scope="class")
def TestReadCSVFixture(tmp_path_factory):
    tmp_path = tmp_path_factory.mktemp("TestReadCSVFixture")

    creator = _make_csv_file(data_dir=tmp_path)
    # each xdist worker spawned in separate process with separate namespace and dataset
    pytest.csvs_names = {}
    # test_read_csv_col_handling, test_read_csv_parsing
    pytest.csvs_names["test_read_csv_regular"] = creator()
    # test_read_csv_parsing
    pytest.csvs_names["test_read_csv_yes_no"] = creator(
        additional_col_values=["Yes", "true", "No", "false"],
    )
    # test_read_csv_col_handling
    pytest.csvs_names["test_read_csv_blank_lines"] = creator(
        add_blank_lines=True,
    )
    # test_read_csv_nans_handling
    pytest.csvs_names["test_read_csv_nans"] = creator(
        add_blank_lines=True,
        additional_col_values=["<NA>", "N/A", "NA", "NULL", "custom_nan", "73"],
    )
    # test_read_csv_error_handling
    pytest.csvs_names["test_read_csv_bad_lines"] = creator(
        add_bad_lines=True,
    )
    yield


@pytest.fixture
@doc(_doc_pytest_fixture, file_type="csv")
def make_csv_file(tmp_path):
    yield _make_csv_file(data_dir=tmp_path)


def create_fixture(file_type):
    @doc(_doc_pytest_fixture, file_type=file_type)
    def fixture(tmp_path):
        yield make_default_file(file_type=file_type, data_dir=tmp_path)

    return fixture


for file_type in ("json", "html", "excel", "feather", "stata", "hdf", "pickle", "fwf"):
    fixture = create_fixture(file_type)
    fixture.__name__ = f"make_{file_type}_file"
    globals()[fixture.__name__] = pytest.fixture(fixture)


@pytest.fixture
def make_parquet_file():
    """Pytest fixture factory that makes a parquet file/dir for testing.

    Yields:
        Function that generates a parquet file/dir
    """
    filenames = []

    def _make_parquet_file(
        filename,
        nrows=NROWS,
        ncols=2,
        force=True,
        range_index_start=0,
        range_index_step=1,
        range_index_name=None,
        partitioned_columns=[],
        row_group_size: Optional[int] = None,
    ):
        """Helper function to generate parquet files/directories.

        Args:
            filename: The name of test file, that should be created.
            nrows: Number of rows for the dataframe.
            ncols: Number of cols for the dataframe.
            force: Create a new file/directory even if one already exists.
            partitioned_columns: Create a partitioned directory using pandas.
            row_group_size: Maximum size of each row group.
        """
        if force or not os.path.exists(filename):
            df = pandas.DataFrame(
                {f"col{x + 1}": np.arange(nrows) for x in range(ncols)}
            )
            index = pandas.RangeIndex(
                start=range_index_start,
                stop=range_index_start + (nrows * range_index_step),
                step=range_index_step,
                name=range_index_name,
            )
            if (
                range_index_start == 0
                and range_index_step == 1
                and range_index_name is None
            ):
                assert df.index.equals(index)
            else:
                df.index = index
            if len(partitioned_columns) > 0:
                df.to_parquet(
                    filename,
                    partition_cols=partitioned_columns,
                    row_group_size=row_group_size,
                )
            else:
                df.to_parquet(filename, row_group_size=row_group_size)
            filenames.append(filename)

    # Return function that generates parquet files
    yield _make_parquet_file

    # Delete parquet file that was created
    for path in filenames:
        if os.path.exists(path):
            if os.path.isdir(path):
                shutil.rmtree(path)
            else:
                os.remove(path)


@pytest.fixture
def make_sql_connection():
    """Sets up sql connections and takes them down after the caller is done.

    Yields:
        Factory that generates sql connection objects
    """

    def _sql_connection(filename, table=""):
        # Remove file if exists
        if os.path.exists(filename):
            os.remove(filename)
        # Create connection and, if needed, table
        conn = "sqlite:///{}".format(filename)
        if table:
            df = pandas.DataFrame(
                {
                    "col1": [0, 1, 2, 3, 4, 5, 6],
                    "col2": [7, 8, 9, 10, 11, 12, 13],
                    "col3": [14, 15, 16, 17, 18, 19, 20],
                    "col4": [21, 22, 23, 24, 25, 26, 27],
                    "col5": [0, 0, 0, 0, 0, 0, 0],
                }
            )
            df.to_sql(table, conn)
        return conn

    yield _sql_connection


@pytest.fixture(scope="class")
def TestReadGlobCSVFixture(tmp_path_factory):
    tmp_path = tmp_path_factory.mktemp("TestReadGlobCSVFixture")

    base_name = get_unique_filename(extension="")
    pytest.glob_path = str(tmp_path / "{}_*.csv".format(base_name))
    pytest.files = [str(tmp_path / "{}_{}.csv".format(base_name, i)) for i in range(11)]
    for fname in pytest.files:
        # Glob does not guarantee ordering so we have to remove the randomness in the generated csvs.
        _make_csv_file(data_dir=tmp_path)(fname, row_size=11, remove_randomness=True)

    yield


@pytest.fixture
def get_generated_doc_urls():
    return lambda: _generated_doc_urls


@pytest.fixture
def set_num_partitions(request):
    old_num_partitions = NPartitions.get()
    NPartitions.put(request.param)
    yield
    NPartitions.put(old_num_partitions)


@pytest.fixture()
def set_benchmark_mode(request):
    old_benchmark_mode = BenchmarkMode.get()
    BenchmarkMode.put(request.param)
    yield
    BenchmarkMode.put(old_benchmark_mode)


@pytest.fixture
def set_async_read_mode(request):
    old_async_read_mode = AsyncReadMode.get()
    AsyncReadMode.put(request.param)
    yield
    AsyncReadMode.put(old_async_read_mode)


@pytest.fixture
def set_min_row_partition_size(request):
    old_min_row_partition_size = MinRowPartitionSize.get()
    MinRowPartitionSize.put(request.param)
    yield
    MinRowPartitionSize.put(old_min_row_partition_size)


ray_client_server = None


@pytest.fixture
def s3_storage_options(worker_id):
    # # copied from pandas conftest.py:
    # https://github.com/pandas-dev/pandas/blob/32f789fbc5d5a72d9d1ac14935635289eeac9009/pandas/tests/io/conftest.py#L45
    # worker_id is a pytest fixture
    if GithubCI.get():
        url = "http://localhost:5000/"
    else:
        # If we hit this else-case, this test is being run locally. In that case, we want
        # each worker to point to a different port for its mock S3 service. The easiest way
        # to do that is to use the `worker_id`, which is unique, to determine what port to point
        # to. We arbitrarily assign `5` as a worker id to the master worker, since we need a number
        # for each worker, and we never run tests with more than `pytest -n 4`.
        worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
        url = f"http://127.0.0.1:555{worker_id}/"
    return {"client_kwargs": {"endpoint_url": url}}


@pytest.fixture(scope="session")
def monkeysession():
    with pytest.MonkeyPatch.context() as mp:
        yield mp


@pytest.fixture(scope="session")
def s3_base(worker_id, monkeysession):
    """
    Fixture for mocking S3 interaction.

    Sets up moto server in separate process locally.

    Yields
    ------
    str
        URL for motoserver/moto CI service.
    """
    # copied from pandas conftest.py
    # still need access keys for https://github.com/getmoto/moto/issues/1924
    monkeysession.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
    monkeysession.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
    monkeysession.setenv("AWS_REGION", "us-west-2")
    if GithubCI.get():
        if sys.platform in ("darwin", "win32", "cygwin") or (
            platform.machine() in ("arm64", "aarch64")
            or platform.machine().startswith("armv")
        ):
            # pandas comments say:
            # DO NOT RUN on Windows/macOS/ARM, only Ubuntu
            # - subprocess in CI can cause timeouts
            # - GitHub Actions do not support
            #   container services for the above OSs
            pytest.skip(
                "S3 tests do not have a corresponding service in Windows, macOS "
                + "or ARM platforms"
            )
        else:
            # assume CI has started moto in docker container:
            # https://docs.getmoto.org/en/latest/docs/server_mode.html#run-using-docker
            # It would be nice to start moto on another thread as in the
            # instructions here:
            # https://docs.getmoto.org/en/latest/docs/server_mode.html#start-within-python
            # but that gives 403 forbidden error when we try to create the bucket
            yield "http://localhost:5000"
    else:
        # Launching moto in server mode, i.e., as a separate process
        # with an S3 endpoint on localhost

        # If we hit this else-case, this test is being run locally. In that case, we want
        # each worker to point to a different port for its mock S3 service. The easiest way
        # to do that is to use the `worker_id`, which is unique, to determine what port to point
        # to.
        endpoint_port = (
            5500 if worker_id == "master" else (5550 + int(worker_id.lstrip("gw")))
        )
        endpoint_uri = f"http://127.0.0.1:{endpoint_port}/"

        # pipe to null to avoid logging in terminal
        # TODO any way to throw the error from here? e.g. i had an annoying problem
        # where I didn't have flask-cors and moto just failed .if there's an error
        # in the popen command and we throw an error within the body of the context
        # manager, the test just hangs forever.
        with subprocess.Popen(
            ["moto_server", "s3", "-p", str(endpoint_port)],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.PIPE,
        ) as proc:
            for _ in range(50):
                try:
                    # OK to go once server is accepting connections
                    if requests.get(endpoint_uri).ok:
                        break
                except Exception:
                    # try again while we still have retries
                    time.sleep(0.1)
            else:
                proc.terminate()
                _, errs = proc.communicate()
                raise RuntimeError(
                    "Could not connect to moto server after 50 tries. "
                    + f"See stderr for extra info: {errs}"
                )
            yield endpoint_uri

            proc.terminate()


@pytest.fixture
def s3_resource(s3_base):
    """
    Set up S3 bucket with contents. The primary bucket name is "modin-test".

    When running locally, this function should be safe even if there are multiple pytest
    workers running in parallel because each worker gets its own endpoint. When running
    in CI, we use a single endpoint for all workers, so we can't have multiple pytest
    workers running in parallel.
    """
    bucket = "modin-test"
    conn = boto3.resource("s3", endpoint_url=s3_base)
    cli = boto3.client("s3", endpoint_url=s3_base)

    # https://github.com/getmoto/moto/issues/3292
    # without location, I get
    # botocore.exceptions.ClientError: An error occurred
    # (IllegalLocationConstraintException) when calling the CreateBucket operation:
    # The unspecified location constraint is incompatible for the region specific
    # endpoint this request was sent to.
    # even if I delete os.environ['AWS_REGION'] but somehow pandas can get away with
    # this.
    try:
        cli.create_bucket(
            Bucket=bucket, CreateBucketConfiguration={"LocationConstraint": "us-west-2"}
        )
    except Exception as e:
        # OK if bucket already exists, but want to raise other exceptions.
        # The exception raised by `create_bucket` is made using a factory,
        # so we need to check using this method of reading the response rather
        # than just checking the type of the exception.
        response = getattr(e, "response", {})
        error_code = response.get("Error", {}).get("Code", "")
        if error_code not in ("BucketAlreadyOwnedByYou", "BucketAlreadyExists"):
            raise
    for _ in range(20):
        # We want to wait until bucket creation is finished.
        if cli.list_buckets()["Buckets"]:
            break
        time.sleep(0.1)
    if not cli.list_buckets()["Buckets"]:
        raise RuntimeError("Could not create bucket")

    s3fs.S3FileSystem.clear_instance_cache()

    s3 = s3fs.S3FileSystem(client_kwargs={"endpoint_url": s3_base})

    test_s3_files = [
        ("modin-bugs/multiple_csv/", "modin/tests/pandas/data/multiple_csv/"),
        (
            "modin-bugs/test_data_dir.parquet/",
            "modin/tests/pandas/data/test_data_dir.parquet/",
        ),
        ("modin-bugs/test_data.parquet", "modin/tests/pandas/data/test_data.parquet"),
        ("modin-bugs/test_data.json", "modin/tests/pandas/data/test_data.json"),
        ("modin-bugs/test_data.fwf", "modin/tests/pandas/data/test_data.fwf"),
        ("modin-bugs/test_data.feather", "modin/tests/pandas/data/test_data.feather"),
        ("modin-bugs/issue5159.parquet/", "modin/tests/pandas/data/issue5159.parquet/"),
    ]
    for s3_key, file_name in test_s3_files:
        s3.put(file_name, f"{bucket}/{s3_key}", recursive=s3_key.endswith("/"))

    yield conn

    s3.rm(bucket, recursive=True)
    for _ in range(20):
        # We want to wait until the deletion finishes.
        if not cli.list_buckets()["Buckets"]:
            break
        time.sleep(0.1)


@pytest.fixture
def modify_config(request):
    values = request.param
    old_values = {}

    for key, value in values.items():
        old_values[key] = key.get()
        key.put(value)

    yield  # waiting for the test to be completed
    # restoring old parameters
    for key, value in old_values.items():
        try:
            key.put(value)
        except ValueError as e:
            # sometimes bool env variables have 'None' as a default value, which
            # causes a ValueError when we try to set this value back, as technically,
            # only bool values are allowed (and 'None' is not a bool), in this case
            # we try to set 'False' instead
            if key.type == bool and value is None:
                key.put(False)
            else:
                raise e


@contextmanager
def copy_and_restore(
    dicts: Iterable[defaultdict],
) -> None:
    """
    Make deep copies of defaultdicts and restore them upon exiting this context.

    Ideally this function would be a fixture, but we want to pass it parameters
    and use it in other fixtures, and it does not seem to be possible to pass
    parameters from one fixture to another.

    Parameters
    ----------
    dicts : Iterable[defaultdict]
        The dicts to copy and restore.
    """
    try:
        # Use a tuples of tuples instead of a dict mapping each original dict
        # to its copy, because the original dict is not hashable.
        original_dict_to_copy = tuple(
            (original_dict, copy.deepcopy(original_dict)) for original_dict in dicts
        )
        yield
    finally:
        for original_dict, dict_copy in original_dict_to_copy:
            original_dict.clear()
            original_dict.update(dict_copy)


@pytest.fixture(autouse=True)
def clean_up_extensions():

    with copy_and_restore(
        (
            pd.dataframe.DataFrame._extensions,
            pd.Series._extensions,
            pd.base.BasePandasDataset._extensions,
            _GENERAL_EXTENSIONS,
            pd.groupby.DataFrameGroupBy._extensions,
            pd.groupby.SeriesGroupBy._extensions,
        )
    ):
        yield

    from modin.pandas.api.extensions.extensions import _attrs_to_delete_on_test

    for k, v in _attrs_to_delete_on_test.items():
        for obj in v:
            delattr(k, obj)
    _attrs_to_delete_on_test.clear()


@pytest.fixture(autouse=True)
def clean_up_auto_backend_switching():

    with copy_and_restore(
        (
            _CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS,
            _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS,
        )
    ):
        yield


@pytest.fixture(autouse=True)
def assert_no_root_logging(caplog):
    try:
        import xgboost
    except ImportError:
        xgboost_path = None
    else:
        xgboost_path = os.path.dirname(xgboost.__file__)
    root_logger = logging.getLogger()
    # Capture logs at any level, i.e. at level >= logging.NOTSET.
    with caplog.at_level(logging.NOTSET):
        yield
    # Note that because this code is in a fixture, we have to use
    # caplog.get_records(when="call") instead of caplog.records:
    # https://github.com/pytest-dev/pytest/issues/4033
    assert not any(
        record.name == root_logger.name
        # Allow xgboost to log to root.
        # TODO(https://github.com/modin-project/modin/issues/5194): Check
        # whether we can remove this exception once we use a newer version of
        # xgboost.
        and not (xgboost_path is not None and record.pathname.startswith(xgboost_path))
        for record in caplog.get_records(when="call")
    )


================================================
FILE: modin/core/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's core functionality."""


================================================
FILE: modin/core/computation/__init__.py
================================================


================================================
FILE: modin/core/computation/align.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Core eval alignment algorithms. Forked from pandas.core.computation.align
"""

from __future__ import annotations

import warnings
from collections.abc import Sequence
from functools import (
    partial,
    wraps,
)
from typing import (
    Callable,
)

import numpy as np
import pandas
import pandas.core.common as com
from pandas._typing import F
from pandas.core.base import PandasObject
from pandas.errors import PerformanceWarning

from modin.core.computation.common import result_type_many
from modin.pandas import DataFrame, Series
from modin.pandas.base import BasePandasDataset


def _align_core_single_unary_op(
    term,
) -> tuple[partial | type[BasePandasDataset], dict[str, pandas.Index] | None]:
    typ: partial | type[BasePandasDataset]
    axes: dict[str, pandas.Index] | None = None

    if isinstance(term.value, np.ndarray):
        typ = partial(np.asanyarray, dtype=term.value.dtype)
    else:
        typ = type(term.value)
        if hasattr(term.value, "axes"):
            axes = _zip_axes_from_type(typ, term.value.axes)

    return typ, axes


def _zip_axes_from_type(
    typ: type[BasePandasDataset], new_axes: Sequence[pandas.Index]
) -> dict[str, pandas.Index]:
    return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)}


def _any_pandas_objects(terms) -> bool:
    """
    Check a sequence of terms for instances of PandasObject.
    """
    return any(isinstance(term.value, PandasObject) for term in terms)


def _filter_special_cases(f) -> Callable[[F], F]:
    @wraps(f)
    def wrapper(terms):
        # single unary operand
        if len(terms) == 1:
            return _align_core_single_unary_op(terms[0])

        term_values = (term.value for term in terms)

        # we don't have any pandas objects
        if not _any_pandas_objects(terms):
            return result_type_many(*term_values), None

        return f(terms)

    return wrapper


@_filter_special_cases
def _align_core(terms):
    term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")]
    term_dims = [terms[i].value.ndim for i in term_index]

    ndims = pandas.Series(dict(zip(term_index, term_dims)))

    # initial axes are the axes of the largest-axis'd term
    biggest = terms[ndims.idxmax()].value
    typ = biggest._constructor
    axes = biggest.axes
    naxes = len(axes)
    gt_than_one_axis = naxes > 1

    for value in (terms[i].value for i in term_index):
        is_series = isinstance(value, Series)
        is_series_and_gt_one_axis = is_series and gt_than_one_axis

        for axis, items in enumerate(value.axes):
            if is_series_and_gt_one_axis:
                ax, itm = naxes - 1, value.index
            else:
                ax, itm = axis, items

            if not axes[ax].is_(itm):
                axes[ax] = axes[ax].union(itm)

    for i, ndim in ndims.items():
        for axis, items in zip(range(ndim), axes):
            ti = terms[i].value

            if hasattr(ti, "reindex"):
                transpose = isinstance(ti, Series) and naxes > 1
                reindexer = axes[naxes - 1] if transpose else items

                term_axis_size = len(ti.axes[axis])
                reindexer_size = len(reindexer)

                ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
                if ordm >= 1 and reindexer_size >= 10000:
                    w = (
                        f"Alignment difference on axis {axis} is larger "
                        + f"than an order of magnitude on term {repr(terms[i].name)}, "
                        + f"by more than {ordm:.4g}; performance may suffer."
                    )
                    warnings.warn(w, category=PerformanceWarning)

                obj = ti.reindex(reindexer, axis=axis, copy=False)
                terms[i].update(obj)

        terms[i].update(terms[i].value.values)

    return typ, _zip_axes_from_type(typ, axes)


def align_terms(terms):
    """
    Align a set of terms.
    """
    try:
        # flatten the parse tree (a nested list, really)
        terms = list(com.flatten(terms))
    except TypeError:
        # can't iterate so it must just be a constant or single variable
        if isinstance(terms.value, (Series, DataFrame)):
            typ = type(terms.value)
            return typ, _zip_axes_from_type(typ, terms.value.axes)
        return np.result_type(terms.type), None

    # if all resolved variables are numeric scalars
    if all(term.is_scalar for term in terms):
        return result_type_many(*(term.value for term in terms)).type, None

    # perform the main alignment
    typ, axes = _align_core(terms)
    return typ, axes


def reconstruct_object(typ, obj, axes, dtype):
    """
    Reconstruct an object given its type, raw value, and possibly empty
    (None) axes.

    Parameters
    ----------
    typ : object
        A type
    obj : object
        The value to use in the type constructor
    axes : dict
        The axes to use to construct the resulting pandas object

    Returns
    -------
    ret : typ
        An object of type ``typ`` with the value `obj` and possible axes
        `axes`.
    """
    try:
        typ = typ.type
    except AttributeError:
        pass

    res_t = np.result_type(obj.dtype, dtype)

    if not isinstance(typ, partial) and issubclass(typ, PandasObject):
        return typ(obj, dtype=res_t, **axes)

    # special case for pathological things like ~True/~False
    if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:
        ret_value = res_t.type(obj)
    else:
        ret_value = typ(obj).astype(res_t)
        # The condition is to distinguish 0-dim array (returned in case of
        # scalar) and 1 element array
        # e.g. np.array(0) and np.array([0])
        if (
            len(obj.shape) == 1
            and len(obj) == 1
            and not isinstance(ret_value, np.ndarray)
        ):
            ret_value = np.array([ret_value]).astype(res_t)

    return ret_value


================================================
FILE: modin/core/computation/check.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Forked from pandas.core.computation.check
"""

from __future__ import annotations

from pandas.compat._optional import import_optional_dependency

ne = import_optional_dependency("numexpr", errors="warn")
NUMEXPR_INSTALLED = ne is not None

__all__ = ["NUMEXPR_INSTALLED"]


================================================
FILE: modin/core/computation/common.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Forked from pandas.core.computation.common
"""

from __future__ import annotations

from functools import reduce

import numpy as np
from pandas._config import get_option
from pandas.core.dtypes.cast import find_common_type
from pandas.core.dtypes.common import is_extension_array_dtype


def ensure_decoded(s) -> str:
    """
    If we have bytes, decode them to unicode.
    """
    if isinstance(s, (np.bytes_, bytes)):
        s = s.decode(get_option("display.encoding"))
    return s


def result_type_many(*arrays_and_dtypes):
    """
    Wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32)
    argument limit.
    """
    try:
        return np.result_type(*arrays_and_dtypes)
    except ValueError:
        # we have > NPY_MAXARGS terms in our expression
        return reduce(np.result_type, arrays_and_dtypes)
    except TypeError:
        arr_and_dtypes = list(arrays_and_dtypes)
        ea_dtypes, non_ea_dtypes = [], []
        for arr_or_dtype in arr_and_dtypes:
            if is_extension_array_dtype(arr_or_dtype):
                ea_dtypes.append(arr_or_dtype)
            else:
                non_ea_dtypes.append(arr_or_dtype)

        if non_ea_dtypes:
            try:
                np_dtype = np.result_type(*non_ea_dtypes)
            except ValueError:
                np_dtype = reduce(np.result_type, arrays_and_dtypes)
            return find_common_type(ea_dtypes + [np_dtype])

        return find_common_type(ea_dtypes)


================================================
FILE: modin/core/computation/engines.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Engine classes for :func:`~pandas.eval`. Forked from pandas.core.computation.engines
"""

from __future__ import annotations

import abc
from typing import TYPE_CHECKING

from pandas.errors import NumExprClobberingError
from pandas.io.formats import printing

from modin.core.computation.align import (
    align_terms,
    reconstruct_object,
)
from modin.core.computation.ops import (
    MATHOPS,
    REDUCTIONS,
)

if TYPE_CHECKING:
    from modin.core.computation.expr import Expr

_ne_builtins = frozenset(MATHOPS + REDUCTIONS)


def _check_ne_builtin_clash(expr: Expr) -> None:
    """
    Attempt to prevent foot-shooting in a helpful way.

    Parameters
    ----------
    expr : Expr
        Terms can contain
    """
    names = expr.names
    overlap = names & _ne_builtins

    if overlap:
        s = ", ".join([repr(x) for x in overlap])
        raise NumExprClobberingError(
            f'Variables in expression "{expr}" overlap with builtins: ({s})'
        )


class AbstractEngine(metaclass=abc.ABCMeta):
    """Object serving as a base class for all engines."""

    has_neg_frac = False

    def __init__(self, expr) -> None:
        self.expr = expr
        self.aligned_axes = None
        self.result_type = None

    def convert(self) -> str:
        """
        Convert an expression for evaluation.

        Defaults to return the expression as a string.
        """
        return printing.pprint_thing(self.expr)

    def evaluate(self) -> object:
        """
        Run the engine on the expression.

        This method performs alignment which is necessary no matter what engine
        is being used, thus its implementation is in the base class.

        Returns
        -------
        object
            The result of the passed expression.
        """
        if not self._is_aligned:
            self.result_type, self.aligned_axes = align_terms(self.expr.terms)

        # make sure no names in resolvers and locals/globals clash
        res = self._evaluate()
        return reconstruct_object(
            self.result_type, res, self.aligned_axes, self.expr.terms.return_type
        )

    @property
    def _is_aligned(self) -> bool:
        return self.aligned_axes is not None and self.result_type is not None

    @abc.abstractmethod
    def _evaluate(self):
        """
        Return an evaluated expression.

        Parameters
        ----------
        env : Scope
            The local and global environment in which to evaluate an
            expression.

        Notes
        -----
        Must be implemented by subclasses.
        """


class NumExprEngine(AbstractEngine):
    """NumExpr engine class"""

    has_neg_frac = True

    def _evaluate(self):
        import numexpr as ne

        # convert the expression to a valid numexpr expression
        s = self.convert()

        env = self.expr.env
        scope = env.full_scope
        _check_ne_builtin_clash(self.expr)
        return ne.evaluate(s, local_dict=scope)


class PythonEngine(AbstractEngine):
    """
    Evaluate an expression in Python space.

    Mostly for testing purposes.
    """

    has_neg_frac = False

    def evaluate(self):
        return self.expr()

    def _evaluate(self) -> None:
        pass


ENGINES: dict[str, type[AbstractEngine]] = {
    "numexpr": NumExprEngine,
    "python": PythonEngine,
}


================================================
FILE: modin/core/computation/eval.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Top level ``eval`` module. Forked from pandas.core.computation.eval
"""

from __future__ import annotations

import tokenize
import warnings

from pandas.core.dtypes.common import is_extension_array_dtype
from pandas.io.formats.printing import pprint_thing
from pandas.util._validators import validate_bool_kwarg

from modin.core.computation.check import NUMEXPR_INSTALLED
from modin.core.computation.engines import ENGINES
from modin.core.computation.expr import (
    PARSERS,
    Expr,
)
from modin.core.computation.ops import BinOp
from modin.core.computation.parsing import tokenize_string
from modin.core.computation.scope import ensure_scope
from modin.pandas.base import BasePandasDataset


def _check_engine(engine: str | None) -> str:
    """
    Make sure a valid engine is passed.

    Parameters
    ----------
    engine : str
        String to validate.

    Raises
    ------
    KeyError
      * If an invalid engine is passed.
    ImportError
      * If numexpr was requested but doesn't exist.

    Returns
    -------
    str
        Engine name.
    """

    if engine is None:
        engine = "numexpr" if NUMEXPR_INSTALLED else "python"

    if engine not in ENGINES:
        valid_engines = list(ENGINES.keys())
        raise KeyError(
            f"Invalid engine '{engine}' passed, valid engines are {valid_engines}"
        )

    # TODO: validate this in a more general way (thinking of future engines
    # that won't necessarily be import-able)
    # Could potentially be done on engine instantiation
    if engine == "numexpr" and not NUMEXPR_INSTALLED:
        raise ImportError(
            "'numexpr' is not installed or an unsupported version. Cannot use "
            + "engine='numexpr' for query/eval if 'numexpr' is not installed"
        )

    return engine


def _check_parser(parser: str):
    """
    Make sure a valid parser is passed.

    Parameters
    ----------
    parser : str

    Raises
    ------
    KeyError
      * If an invalid parser is passed
    """
    if parser not in PARSERS:
        raise KeyError(
            f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}"
        )


def _check_resolvers(resolvers):
    if resolvers is not None:
        for resolver in resolvers:
            if not hasattr(resolver, "__getitem__"):
                name = type(resolver).__name__
                raise TypeError(
                    f"Resolver of type '{name}' does not "
                    + "implement the __getitem__ method"
                )


def _check_expression(expr):
    """
    Make sure an expression is not an empty string

    Parameters
    ----------
    expr : object
        An object that can be converted to a string

    Raises
    ------
    ValueError
      * If expr is an empty string
    """
    if not expr:
        raise ValueError("expr cannot be an empty string")


def _convert_expression(expr) -> str:
    """
    Convert an object to an expression.

    This function converts an object to an expression (a unicode string) and
    checks to make sure it isn't empty after conversion. This is used to
    convert operators to their string representation for recursive calls to
    :func:`~pandas.eval`.

    Parameters
    ----------
    expr : object
        The object to be converted to a string.

    Returns
    -------
    str
        The string representation of an object.

    Raises
    ------
    ValueError
      * If the expression is empty.
    """
    s = pprint_thing(expr)
    _check_expression(s)
    return s


def _check_for_locals(expr: str, stack_level: int, parser: str):
    at_top_of_stack = stack_level == 0
    not_pandas_parser = parser != "pandas"

    if not_pandas_parser:
        msg = "The '@' prefix is only supported by the pandas parser"
    elif at_top_of_stack:
        msg = (
            "The '@' prefix is not allowed in top-level eval calls.\n"
            + "please refer to your variables by name without the '@' prefix."
        )

    if at_top_of_stack or not_pandas_parser:
        for toknum, tokval in tokenize_string(expr):
            if toknum == tokenize.OP and tokval == "@":
                raise SyntaxError(msg)


def eval(
    expr: str | BinOp,  # we leave BinOp out of the docstr bc it isn't for users
    parser: str = "pandas",
    engine: str | None = None,
    local_dict=None,
    global_dict=None,
    resolvers=(),
    level: int = 0,
    target=None,
    inplace: bool = False,
):
    """
    Evaluate a Python expression as a string using various backends.

    The following arithmetic operations are supported: ``+``, ``-``, ``*``,
    ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
    boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
    Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
    :keyword:`or`, and :keyword:`not` with the same semantics as the
    corresponding bitwise operators.  :class:`~pandas.Series` and
    :class:`~pandas.DataFrame` objects are supported and behave as they would
    with plain ol' Python evaluation.

    Parameters
    ----------
    expr : str
        The expression to evaluate. This string cannot contain any Python
        `statements
        <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
        only Python `expressions
        <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
    parser : {'pandas', 'python'}, default 'pandas'
        The parser to use to construct the syntax tree from the expression. The
        default of ``'pandas'`` parses code slightly different than standard
        Python. Alternatively, you can parse an expression using the
        ``'python'`` parser to retain strict Python semantics.  See the
        :ref:`enhancing performance <enhancingperf.eval>` documentation for
        more details.
    engine : {'python', 'numexpr'}, default 'numexpr'

        The engine used to evaluate the expression. Supported engines are

        - None : tries to use ``numexpr``, falls back to ``python``
        - ``'numexpr'`` : This default engine evaluates pandas objects using
          numexpr for large speed ups in complex expressions with large frames.
        - ``'python'`` : Performs operations as if you had ``eval``'d in top
          level python. This engine is generally not that useful.

        More backends may be available in the future.
    local_dict : dict or None, optional
        A dictionary of local variables, taken from locals() by default.
    global_dict : dict or None, optional
        A dictionary of global variables, taken from globals() by default.
    resolvers : list of dict-like or None, optional
        A list of objects implementing the ``__getitem__`` special method that
        you can use to inject an additional collection of namespaces to use for
        variable lookup. For example, this is used in the
        :meth:`~DataFrame.query` method to inject the
        ``DataFrame.index`` and ``DataFrame.columns``
        variables that refer to their respective :class:`~pandas.DataFrame`
        instance attributes.
    level : int, optional
        The number of prior stack frames to traverse and add to the current
        scope. Most users will **not** need to change this parameter.
    target : object, optional, default None
        This is the target object for assignment. It is used when there is
        variable assignment in the expression. If so, then `target` must
        support item assignment with string keys, and if a copy is being
        returned, it must also support `.copy()`.
    inplace : bool, default False
        If `target` is provided, and the expression mutates `target`, whether
        to modify `target` inplace. Otherwise, return a copy of `target` with
        the mutation.

    Returns
    -------
    ndarray, numeric scalar, DataFrame, Series, or None
        The completion value of evaluating the given code or None if ``inplace=True``.

    Raises
    ------
    ValueError
        There are many instances where such an error can be raised:

        - `target=None`, but the expression is multiline.
        - The expression is multiline, but not all them have item assignment.
          An example of such an arrangement is this:

          a = b + 1
          a + 2

          Here, there are expressions on different lines, making it multiline,
          but the last line has no variable assigned to the output of `a + 2`.
        - `inplace=True`, but the expression is missing item assignment.
        - Item assignment is provided, but the `target` does not support
          string item assignment.
        - Item assignment is provided and `inplace=False`, but the `target`
          does not support the `.copy()` method

    See Also
    --------
    DataFrame.query : Evaluates a boolean expression to query the columns
            of a frame.
    DataFrame.eval : Evaluate a string describing operations on
            DataFrame columns.

    Notes
    -----
    The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
    recursively cast to ``float64``.

    See the :ref:`enhancing performance <enhancingperf.eval>` documentation for
    more details.

    Examples
    --------
    >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]})
    >>> df
      animal  age
    0    dog   10
    1    pig   20

    We can add a new column using ``pd.eval``:

    >>> pd.eval("double_age = df.age * 2", target=df)
      animal  age  double_age
    0    dog   10          20
    1    pig   20          40
    """
    inplace = validate_bool_kwarg(inplace, "inplace")

    exprs: list[str | BinOp]
    if isinstance(expr, str):
        _check_expression(expr)
        exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
    else:
        # ops.BinOp; for internal compat, not intended to be passed by users
        exprs = [expr]
    multi_line = len(exprs) > 1

    if multi_line and target is None:
        raise ValueError(
            "multi-line expressions are only valid in the "
            + "context of data, use DataFrame.eval"
        )
    engine = _check_engine(engine)
    _check_parser(parser)
    _check_resolvers(resolvers)

    ret = None
    first_expr = True
    target_modified = False

    for expr in exprs:
        expr = _convert_expression(expr)
        _check_for_locals(expr, level, parser)

        # get our (possibly passed-in) scope
        env = ensure_scope(
            level + 1,
            global_dict=global_dict,
            local_dict=local_dict,
            resolvers=resolvers,
            target=target,
        )

        parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)

        if engine == "numexpr" and (
            is_extension_array_dtype(parsed_expr.terms.return_type)
            or getattr(parsed_expr.terms, "operand_types", None) is not None
            and any(
                is_extension_array_dtype(elem)
                for elem in parsed_expr.terms.operand_types
            )
        ):
            warnings.warn(
                "Engine has switched to 'python' because numexpr does not support "
                + "extension array dtypes. Please set your engine to python manually.",
                RuntimeWarning,
            )
            engine = "python"

        # construct the engine and evaluate the parsed expression
        eng = ENGINES[engine]
        eng_inst = eng(parsed_expr)
        ret = eng_inst.evaluate()

        if parsed_expr.assigner is None:
            if multi_line:
                raise ValueError(
                    "Multi-line expressions are only valid "
                    + "if all expressions contain an assignment"
                )
            if inplace:
                raise ValueError("Cannot operate inplace if there is no assignment")

        # assign if needed
        assigner = parsed_expr.assigner
        if env.target is not None and assigner is not None:
            target_modified = True

            # if returning a copy, copy only on the first assignment
            if not inplace and first_expr:
                try:
                    target = env.target
                    if isinstance(target, BasePandasDataset):
                        target = target.copy(deep=True)
                    else:
                        target = target.copy()
                except AttributeError as err:
                    raise ValueError("Cannot return a copy of the target") from err
            else:
                target = env.target

            # TypeError is most commonly raised (e.g. int, list), but you
            # get IndexError if you try to do this assignment on np.ndarray.
            # we will ignore numpy warnings here; e.g. if trying
            # to use a non-numeric indexer
            try:
                if inplace and isinstance(target, BasePandasDataset):
                    target.loc[:, assigner] = ret
                else:
                    target[assigner] = ret  # pyright: ignore[reportGeneralTypeIssues]
            except (TypeError, IndexError) as err:
                raise ValueError("Cannot assign expression output to target") from err

            if not resolvers:
                resolvers = ({assigner: ret},)
            else:
                # existing resolver needs updated to handle
                # case of mutating existing column in copy
                for resolver in resolvers:
                    if assigner in resolver:
                        resolver[assigner] = ret
                        break
                else:
                    resolvers += ({assigner: ret},)

            ret = None
            first_expr = False

    # We want to exclude `inplace=None` as being False.
    return (target if target_modified else ret) if inplace is False else None


================================================
FILE: modin/core/computation/expr.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
:func:`~pandas.eval` parsers.

Forked from pandas.core.computation.expr
"""

from __future__ import annotations

import ast
import tokenize
from functools import (
    partial,
    reduce,
)
from keyword import iskeyword
from typing import (
    Callable,
    ClassVar,
    TypeVar,
)

import numpy as np
import pandas.core.common as com
from pandas.errors import UndefinedVariableError
from pandas.io.formats import printing

from modin.core.computation.ops import (
    ARITH_OPS_SYMS,
    BOOL_OPS_SYMS,
    CMP_OPS_SYMS,
    LOCAL_TAG,
    UNARY_OPS_SYMS,
    BinOp,
    Constant,
    FuncNode,
    Op,
    Term,
    UnaryOp,
    is_term,
)
from modin.core.computation.parsing import (
    clean_backtick_quoted_toks,
    tokenize_string,
)
from modin.core.computation.scope import Scope


def _rewrite_assign(tok: tuple[int, str]) -> tuple[int, str]:
    """
    Rewrite the assignment operator for PyTables expressions that use ``=``
    as a substitute for ``==``.

    Parameters
    ----------
    tok : tuple of int, str
        ints correspond to the all caps constants in the tokenize module

    Returns
    -------
    tuple of int, str
        Either the input or token or the replacement values
    """
    toknum, tokval = tok
    return toknum, "==" if tokval == "=" else tokval


def _replace_booleans(tok: tuple[int, str]) -> tuple[int, str]:
    """
    Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise
    precedence is changed to boolean precedence.

    Parameters
    ----------
    tok : tuple of int, str
        ints correspond to the all caps constants in the tokenize module

    Returns
    -------
    tuple of int, str
        Either the input or token or the replacement values
    """
    toknum, tokval = tok
    if toknum == tokenize.OP:
        if tokval == "&":
            return tokenize.NAME, "and"
        elif tokval == "|":
            return tokenize.NAME, "or"
        return toknum, tokval
    return toknum, tokval


def _replace_locals(tok: tuple[int, str]) -> tuple[int, str]:
    """
    Replace local variables with a syntactically valid name.

    Parameters
    ----------
    tok : tuple of int, str
        ints correspond to the all caps constants in the tokenize module

    Returns
    -------
    tuple of int, str
        Either the input or token or the replacement values

    Notes
    -----
    This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as
    ``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_``
    is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it.
    """
    toknum, tokval = tok
    if toknum == tokenize.OP and tokval == "@":
        return tokenize.OP, LOCAL_TAG
    return toknum, tokval


def _compose2(f, g):
    """
    Compose 2 callables.
    """
    return lambda *args, **kwargs: f(g(*args, **kwargs))


def _compose(*funcs):
    """
    Compose 2 or more callables.
    """
    assert len(funcs) > 1, "At least 2 callables must be passed to compose"
    return reduce(_compose2, funcs)


def _preparse(
    source: str,
    f=_compose(
        _replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks
    ),
) -> str:
    """
    Compose a collection of tokenization functions.

    Parameters
    ----------
    source : str
        A Python source code string
    f : callable
        This takes a tuple of (toknum, tokval) as its argument and returns a
        tuple with the same structure but possibly different elements. Defaults
        to the composition of ``_rewrite_assign``, ``_replace_booleans``, and
        ``_replace_locals``.

    Returns
    -------
    str
        Valid Python source code

    Notes
    -----
    The `f` parameter can be any callable that takes *and* returns input of the
    form ``(toknum, tokval)``, where ``toknum`` is one of the constants from
    the ``tokenize`` module and ``tokval`` is a string.
    """
    assert callable(f), "f must be callable"
    return tokenize.untokenize(f(x) for x in tokenize_string(source))


def _is_type(t):
    """
    Factory for a type checking function of type ``t`` or tuple of types.
    """
    return lambda x: isinstance(x.value, t)


_is_list = _is_type(list)
_is_str = _is_type(str)


# partition all AST nodes
_all_nodes = frozenset(
    node
    for node in (getattr(ast, name) for name in dir(ast))
    if isinstance(node, type) and issubclass(node, ast.AST)
)


def _filter_nodes(superclass, all_nodes=_all_nodes):
    """
    Filter out AST nodes that are subclasses of ``superclass``.
    """
    node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass))
    return frozenset(node_names)


_all_node_names = frozenset(x.__name__ for x in _all_nodes)
_mod_nodes = _filter_nodes(ast.mod)
_stmt_nodes = _filter_nodes(ast.stmt)
_expr_context_nodes = _filter_nodes(ast.expr_context)
_boolop_nodes = _filter_nodes(ast.boolop)
_handler_nodes = _filter_nodes(ast.excepthandler)
_arguments_nodes = _filter_nodes(ast.arguments)
_keyword_nodes = _filter_nodes(ast.keyword)
_alias_nodes = _filter_nodes(ast.alias)


# nodes that we don't support directly but are needed for parsing
_hacked_nodes = frozenset(["Assign", "Module", "Expr"])


_unsupported_expr_nodes = frozenset(
    [
        "Yield",
        "GeneratorExp",
        "IfExp",
        "DictComp",
        "SetComp",
        "Repr",
        "Lambda",
        "Set",
        "AST",
        "Is",
        "IsNot",
    ]
)

# these nodes are low priority or won't ever be supported (e.g., AST)
_unsupported_nodes = (
    _stmt_nodes
    | _mod_nodes
    | _handler_nodes
    | _arguments_nodes
    | _keyword_nodes
    | _alias_nodes
    | _expr_context_nodes
    | _unsupported_expr_nodes
) - _hacked_nodes

# we're adding a different assignment in some cases to be equality comparison
# and we don't want `stmt` and friends in their so get only the class whose
# names are capitalized
_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes
intersection = _unsupported_nodes & _base_supported_nodes
_msg = f"cannot both support and not support {intersection}"
assert not intersection, _msg


def _node_not_implemented(node_name: str) -> Callable[..., None]:
    """
    Return a function that raises a NotImplementedError with a passed node name.
    """

    def f(self, *args, **kwargs):
        raise NotImplementedError(f"'{node_name}' nodes are not implemented")

    return f


_T = TypeVar("_T")


def disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]:
    """
    Decorator to disallow certain nodes from parsing. Raises a
    NotImplementedError instead.

    Returns
    -------
    callable
    """

    def disallowed(cls: type[_T]) -> type[_T]:
        # error: "Type[_T]" has no attribute "unsupported_nodes"
        cls.unsupported_nodes = ()  # type: ignore[attr-defined]
        for node in nodes:
            new_method = _node_not_implemented(node)
            name = f"visit_{node}"
            # error: "Type[_T]" has no attribute "unsupported_nodes"
            cls.unsupported_nodes += (name,)  # type: ignore[attr-defined]
            setattr(cls, name, new_method)
        return cls

    return disallowed


def _op_maker(op_class, op_symbol):
    """
    Return a function to create an op class with its symbol already passed.

    Returns
    -------
    callable
    """

    def f(self, node, *args, **kwargs):
        """
        Return a partial function with an Op subclass with an operator already passed.

        Returns
        -------
        callable
        """
        return partial(op_class, op_symbol, *args, **kwargs)

    return f


_op_classes = {"binary": BinOp, "unary": UnaryOp}


def add_ops(op_classes):
    """
    Decorator to add default implementation of ops.
    """

    def f(cls):
        for op_attr_name, op_class in op_classes.items():
            ops = getattr(cls, f"{op_attr_name}_ops")
            ops_map = getattr(cls, f"{op_attr_name}_op_nodes_map")
            for op in ops:
                op_node = ops_map[op]
                if op_node is not None:
                    made_op = _op_maker(op_class, op)
                    setattr(cls, f"visit_{op_node}", made_op)
        return cls

    return f


@disallow(_unsupported_nodes)
@add_ops(_op_classes)
class BaseExprVisitor(ast.NodeVisitor):
    """
    Custom ast walker. Parsers of other engines should subclass this class
    if necessary.

    Parameters
    ----------
    env : Scope
    engine : str
    parser : str
    preparser : callable
    """

    const_type: ClassVar[type[Term]] = Constant
    term_type: ClassVar[type[Term]] = Term

    binary_ops = CMP_OPS_SYMS + BOOL_OPS_SYMS + ARITH_OPS_SYMS
    binary_op_nodes = (
        "Gt",
        "Lt",
        "GtE",
        "LtE",
        "Eq",
        "NotEq",
        "In",
        "NotIn",
        "BitAnd",
        "BitOr",
        "And",
        "Or",
        "Add",
        "Sub",
        "Mult",
        "Div",
        "Pow",
        "FloorDiv",
        "Mod",
    )
    binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))

    unary_ops = UNARY_OPS_SYMS
    unary_op_nodes = "UAdd", "USub", "Invert", "Not"
    unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))

    rewrite_map = {
        ast.Eq: ast.In,
        ast.NotEq: ast.NotIn,
        ast.In: ast.In,
        ast.NotIn: ast.NotIn,
    }

    unsupported_nodes: tuple[str, ...]

    def __init__(self, env, engine, parser, preparser=_preparse) -> None:
        self.env = env
        self.engine = engine
        self.parser = parser
        self.preparser = preparser
        self.assigner = None

    def visit(self, node, **kwargs):
        if isinstance(node, str):
            clean = self.preparser(node)
            try:
                node = ast.fix_missing_locations(ast.parse(clean))
            except SyntaxError as e:
                if any(iskeyword(x) for x in clean.split()):
                    e.msg = "Python keyword not valid identifier in numexpr query"
                raise e

        method = f"visit_{type(node).__name__}"
        visitor = getattr(self, method)
        return visitor(node, **kwargs)

    def visit_Module(self, node, **kwargs):
        if len(node.body) != 1:
            raise SyntaxError("only a single expression is allowed")
        expr = node.body[0]
        return self.visit(expr, **kwargs)

    def visit_Expr(self, node, **kwargs):
        return self.visit(node.value, **kwargs)

    def _rewrite_membership_op(self, node, left, right):
        # the kind of the operator (is actually an instance)
        op_instance = node.op
        op_type = type(op_instance)

        # must be two terms and the comparison operator must be ==/!=/in/not in
        if is_term(left) and is_term(right) and op_type in self.rewrite_map:
            left_list, right_list = map(_is_list, (left, right))
            left_str, right_str = map(_is_str, (left, right))

            # if there are any strings or lists in the expression
            if left_list or right_list or left_str or right_str:
                op_instance = self.rewrite_map[op_type]()

            # pop the string variable out of locals and replace it with a list
            # of one string, kind of a hack
            if right_str:
                name = self.env.add_tmp([right.value])
                right = self.term_type(name, self.env)

            if left_str:
                name = self.env.add_tmp([left.value])
                left = self.term_type(name, self.env)

        op = self.visit(op_instance)
        return op, op_instance, left, right

    def _maybe_transform_eq_ne(self, node, left=None, right=None):
        if left is None:
            left = self.visit(node.left, side="left")
        if right is None:
            right = self.visit(node.right, side="right")
        op, op_class, left, right = self._rewrite_membership_op(node, left, right)
        return op, op_class, left, right

    def _maybe_downcast_constants(self, left, right):
        f32 = np.dtype(np.float32)
        if (
            left.is_scalar
            and hasattr(left, "value")
            and not right.is_scalar
            and right.return_type == f32
        ):
            # right is a float32 array, left is a scalar
            name = self.env.add_tmp(np.float32(left.value))
            left = self.term_type(name, self.env)
        if (
            right.is_scalar
            and hasattr(right, "value")
            and not left.is_scalar
            and left.return_type == f32
        ):
            # left is a float32 array, right is a scalar
            name = self.env.add_tmp(np.float32(right.value))
            right = self.term_type(name, self.env)

        return left, right

    def _maybe_eval(self, binop, eval_in_python):
        # eval `in` and `not in` (for now) in "partial" python space
        # things that can be evaluated in "eval" space will be turned into
        # temporary variables. for example,
        # [1,2] in a + 2 * b
        # in that case a + 2 * b will be evaluated using numexpr, and the "in"
        # call will be evaluated using isin (in python space)
        return binop.evaluate(
            self.env, self.engine, self.parser, self.term_type, eval_in_python
        )

    def _maybe_evaluate_binop(
        self,
        op,
        op_class,
        lhs,
        rhs,
        eval_in_python=("in", "not in"),
        maybe_eval_in_python=("==", "!=", "<", ">", "<=", ">="),
    ):
        res = op(lhs, rhs)

        if res.has_invalid_return_type:
            raise TypeError(
                f"unsupported operand type(s) for {res.op}: "
                + f"'{lhs.type}' and '{rhs.type}'"
            )

        if self.engine != "pytables" and (
            res.op in CMP_OPS_SYMS
            and getattr(lhs, "is_datetime", False)
            or getattr(rhs, "is_datetime", False)
        ):
            # all date ops must be done in python bc numexpr doesn't work
            # well with NaT
            return self._maybe_eval(res, self.binary_ops)

        if res.op in eval_in_python:
            # "in"/"not in" ops are always evaluated in python
            return self._maybe_eval(res, eval_in_python)
        elif self.engine != "pytables":
            if (
                getattr(lhs, "return_type", None) == object
                or getattr(rhs, "return_type", None) == object
            ):
                # evaluate "==" and "!=" in python if either of our operands
                # has an object return type
                return self._maybe_eval(res, eval_in_python + maybe_eval_in_python)
        return res

    def visit_BinOp(self, node, **kwargs):
        op, op_class, left, right = self._maybe_transform_eq_ne(node)
        left, right = self._maybe_downcast_constants(left, right)
        return self._maybe_evaluate_binop(op, op_class, left, right)

    def visit_UnaryOp(self, node, **kwargs):
        op = self.visit(node.op)
        operand = self.visit(node.operand)
        return op(operand)

    def visit_Name(self, node, **kwargs) -> Term:
        return self.term_type(node.id, self.env, **kwargs)

    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
    def visit_NameConstant(self, node, **kwargs) -> Term:
        return self.const_type(node.value, self.env)

    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
    def visit_Num(self, node, **kwargs) -> Term:
        return self.const_type(node.value, self.env)

    def visit_Constant(self, node, **kwargs) -> Term:
        return self.const_type(node.value, self.env)

    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
    def visit_Str(self, node, **kwargs) -> Term:
        name = self.env.add_tmp(node.s)
        return self.term_type(name, self.env)

    def visit_List(self, node, **kwargs) -> Term:
        name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts])
        return self.term_type(name, self.env)

    visit_Tuple = visit_List

    def visit_Index(self, node, **kwargs):
        """df.index[4]"""
        return self.visit(node.value)

    def visit_Subscript(self, node, **kwargs) -> Term:
        from modin.core.computation.eval import eval as pd_eval

        value = self.visit(node.value)
        slobj = self.visit(node.slice)
        result = pd_eval(
            slobj, local_dict=self.env, engine=self.engine, parser=self.parser
        )
        try:
            # a Term instance
            v = value.value[result]
        except AttributeError:
            # an Op instance
            lhs = pd_eval(
                value, local_dict=self.env, engine=self.engine, parser=self.parser
            )
            v = lhs[result]
        name = self.env.add_tmp(v)
        return self.term_type(name, env=self.env)

    def visit_Slice(self, node, **kwargs) -> slice:
        """df.index[slice(4,6)]"""
        lower = node.lower
        if lower is not None:
            lower = self.visit(lower).value
        upper = node.upper
        if upper is not None:
            upper = self.visit(upper).value
        step = node.step
        if step is not None:
            step = self.visit(step).value

        return slice(lower, upper, step)

    def visit_Assign(self, node, **kwargs):
        """
        support a single assignment node, like

        c = a + b

        set the assigner at the top level, must be a Name node which
        might or might not exist in the resolvers

        """
        if len(node.targets) != 1:
            raise SyntaxError("can only assign a single expression")
        if not isinstance(node.targets[0], ast.Name):
            raise SyntaxError("left hand side of an assignment must be a single name")
        if self.env.target is None:
            raise ValueError("cannot assign without a target object")

        try:
            assigner = self.visit(node.targets[0], **kwargs)
        except UndefinedVariableError:
            assigner = node.targets[0].id

        self.assigner = getattr(assigner, "name", assigner)
        if self.assigner is None:
            raise SyntaxError(
                "left hand side of an assignment must be a single resolvable name"
            )

        return self.visit(node.value, **kwargs)

    def visit_Attribute(self, node, **kwargs):
        attr = node.attr
        value = node.value

        ctx = node.ctx
        if isinstance(ctx, ast.Load):
            # resolve the value
            resolved = self.visit(value).value
            try:
                v = getattr(resolved, attr)
                name = self.env.add_tmp(v)
                return self.term_type(name, self.env)
            except AttributeError:
                # something like datetime.datetime where scope is overridden
                if isinstance(value, ast.Name) and value.id == attr:
                    return resolved
                raise

        raise ValueError(f"Invalid Attribute context {type(ctx).__name__}")

    def visit_Call(self, node, side=None, **kwargs):
        if isinstance(node.func, ast.Attribute) and node.func.attr != "__call__":
            res = self.visit_Attribute(node.func)
        elif not isinstance(node.func, ast.Name):
            raise TypeError("Only named functions are supported")
        else:
            try:
                res = self.visit(node.func)
            except UndefinedVariableError:
                # Check if this is a supported function name
                try:
                    res = FuncNode(node.func.id)
                except ValueError:
                    # Raise original error
                    raise

        if res is None:
            # error: "expr" has no attribute "id"
            raise ValueError(
                f"Invalid function call {node.func.id}"  # type: ignore[attr-defined]
            )
        if hasattr(res, "value"):
            res = res.value

        if isinstance(res, FuncNode):
            new_args = [self.visit(arg) for arg in node.args]

            if node.keywords:
                raise TypeError(
                    f'Function "{res.name}" does not support keyword arguments'
                )

            return res(*new_args)

        else:
            new_args = [self.visit(arg)(self.env) for arg in node.args]

            for key in node.keywords:
                if not isinstance(key, ast.keyword):
                    # error: "expr" has no attribute "id"
                    raise ValueError(
                        "keyword error in function call "
                        + f"'{node.func.id}'"  # type: ignore[attr-defined]
                    )

                if key.arg:
                    kwargs[key.arg] = self.visit(key.value)(self.env)

            name = self.env.add_tmp(res(*new_args, **kwargs))
            return self.term_type(name=name, env=self.env)

    def translate_In(self, op):
        return op

    def visit_Compare(self, node, **kwargs):
        ops = node.ops
        comps = node.comparators

        # base case: we have something like a CMP b
        if len(comps) == 1:
            op = self.translate_In(ops[0])
            binop = ast.BinOp(op=op, left=node.left, right=comps[0])
            return self.visit(binop)

        # recursive case: we have a chained comparison, a CMP b CMP c, etc.
        left = node.left
        values = []
        for op, comp in zip(ops, comps):
            new_node = self.visit(
                ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)])
            )
            left = comp
            values.append(new_node)
        return self.visit(ast.BoolOp(op=ast.And(), values=values))

    def _try_visit_binop(self, bop):
        if isinstance(bop, (Op, Term)):
            return bop
        return self.visit(bop)

    def visit_BoolOp(self, node, **kwargs):
        def visitor(x, y):
            lhs = self._try_visit_binop(x)
            rhs = self._try_visit_binop(y)

            op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs)
            return self._maybe_evaluate_binop(op, node.op, lhs, rhs)

        operands = node.values
        return reduce(visitor, operands)


_python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"])


@disallow(
    (_unsupported_nodes | _python_not_supported)
    - (_boolop_nodes | frozenset(["BoolOp", "Attribute", "In", "NotIn", "Tuple"]))
)
class PandasExprVisitor(BaseExprVisitor):
    def __init__(
        self,
        env,
        engine,
        parser,
        preparser=partial(
            _preparse,
            f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks),
        ),
    ) -> None:
        super().__init__(env, engine, parser, preparser)


@disallow(_unsupported_nodes | _python_not_supported | frozenset(["Not"]))
class PythonExprVisitor(BaseExprVisitor):
    def __init__(
        self, env, engine, parser, preparser=lambda source, f=None: source
    ) -> None:
        super().__init__(env, engine, parser, preparser=preparser)


class Expr:
    """
    Object encapsulating an expression.

    Parameters
    ----------
    expr : str
    engine : str, optional, default 'numexpr'
    parser : str, optional, default 'pandas'
    env : Scope, optional, default None
    level : int, optional, default 2
    """

    env: Scope
    engine: str
    parser: str

    def __init__(
        self,
        expr,
        engine: str = "numexpr",
        parser: str = "pandas",
        env: Scope | None = None,
        level: int = 0,
    ) -> None:
        self.expr = expr
        self.env = env or Scope(level=level + 1)
        self.engine = engine
        self.parser = parser
        self._visitor = PARSERS[parser](self.env, self.engine, self.parser)
        self.terms = self.parse()

    @property
    def assigner(self):
        return getattr(self._visitor, "assigner", None)

    def __call__(self):
        return self.terms(self.env)

    def __repr__(self) -> str:
        return printing.pprint_thing(self.terms)

    def __len__(self) -> int:
        return len(self.expr)

    def parse(self):
        """
        Parse an expression.
        """
        return self._visitor.visit(self.expr)

    @property
    def names(self):
        """
        Get the names in an expression.
        """
        if is_term(self.terms):
            return frozenset([self.terms.name])
        return frozenset(term.name for term in com.flatten(self.terms))


PARSERS = {"python": PythonExprVisitor, "pandas": PandasExprVisitor}


================================================
FILE: modin/core/computation/ops.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Operator classes for eval. Forked from pandas.core.computation.ops
"""

from __future__ import annotations

import operator
from datetime import datetime
from functools import partial
from typing import (
    TYPE_CHECKING,
    Callable,
    Literal,
)

import numpy as np
import pandas
import pandas.core.common as com
from pandas.core.dtypes.cast import find_common_type
from pandas.core.dtypes.common import (
    is_list_like,
    is_scalar,
)
from pandas.io.formats.printing import (
    pprint_thing,
    pprint_thing_encoded,
)

from modin.core.computation.common import (
    ensure_decoded,
    result_type_many,
)
from modin.core.computation.scope import DEFAULT_GLOBALS

if TYPE_CHECKING:
    from collections.abc import (
        Iterable,
        Iterator,
    )

REDUCTIONS = ("sum", "prod", "min", "max")

_unary_math_ops = (
    "sin",
    "cos",
    "exp",
    "log",
    "expm1",
    "log1p",
    "sqrt",
    "sinh",
    "cosh",
    "tanh",
    "arcsin",
    "arccos",
    "arctan",
    "arccosh",
    "arcsinh",
    "arctanh",
    "abs",
    "log10",
    "floor",
    "ceil",
)
_binary_math_ops = ("arctan2",)

MATHOPS = _unary_math_ops + _binary_math_ops


LOCAL_TAG = "__pd_eval_local_"


class Term:
    def __new__(cls, name, env, side=None, encoding=None):
        klass = Constant if not isinstance(name, str) else cls
        # error: Argument 2 for "super" not an instance of argument 1
        supr_new = super(Term, klass).__new__  # type: ignore[misc]
        return supr_new(klass)

    is_local: bool

    def __init__(self, name, env, side=None, encoding=None) -> None:
        # name is a str for Term, but may be something else for subclasses
        self._name = name
        self.env = env
        self.side = side
        tname = str(name)
        self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS
        self._value = self._resolve_name()
        self.encoding = encoding

    @property
    def local_name(self) -> str:
        return self.name.replace(LOCAL_TAG, "")

    def __repr__(self) -> str:
        return pprint_thing(self.name)

    def __call__(self, *args, **kwargs):
        return self.value

    def evaluate(self, *args, **kwargs) -> Term:
        return self

    def _resolve_name(self):
        local_name = str(self.local_name)
        is_local = self.is_local
        if local_name in self.env.scope and isinstance(
            self.env.scope[local_name], type
        ):
            is_local = False

        res = self.env.resolve(local_name, is_local=is_local)
        self.update(res)

        if hasattr(res, "ndim") and res.ndim > 2:
            raise NotImplementedError(
                "N-dimensional objects, where N > 2, are not supported with eval"
            )
        return res

    def update(self, value) -> None:
        """
        search order for local (i.e., @variable) variables:

        scope, key_variable
        [('locals', 'local_name'),
         ('globals', 'local_name'),
         ('locals', 'key'),
         ('globals', 'key')]
        """
        key = self.name

        # if it's a variable name (otherwise a constant)
        if isinstance(key, str):
            self.env.swapkey(self.local_name, key, new_value=value)

        self.value = value

    @property
    def is_scalar(self) -> bool:
        return is_scalar(self._value)

    @property
    def type(self):
        try:
            # potentially very slow for large, mixed dtype frames
            return find_common_type(self._value.dtypes.values)
        except AttributeError:
            try:
                # ndarray
                return self._value.dtype
            except AttributeError:
                # scalar
                return type(self._value)

    return_type = type

    @property
    def raw(self) -> str:
        return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})"

    @property
    def is_datetime(self) -> bool:
        try:
            t = self.type.type
        except AttributeError:
            t = self.type

        return issubclass(t, (datetime, np.datetime64))

    @property
    def value(self):
        return self._value

    @value.setter
    def value(self, new_value) -> None:
        self._value = new_value

    @property
    def name(self):
        return self._name

    @property
    def ndim(self) -> int:
        return self._value.ndim


class Constant(Term):
    def _resolve_name(self):
        return self._name

    @property
    def name(self):
        return self.value

    def __repr__(self) -> str:
        # in python 2 str() of float
        # can truncate shorter than repr()
        return repr(self.name)


_bool_op_map = {"not": "~", "and": "&", "or": "|"}


class Op:
    """
    Hold an operator of arbitrary arity.
    """

    op: str

    def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None) -> None:
        self.op = _bool_op_map.get(op, op)
        self.operands = operands
        self.encoding = encoding

    def __iter__(self) -> Iterator:
        return iter(self.operands)

    def __repr__(self) -> str:
        """
        Print a generic n-ary operator and its operands using infix notation.
        """
        # recurse over the operands
        parened = (f"({pprint_thing(opr)})" for opr in self.operands)
        return pprint_thing(f" {self.op} ".join(parened))

    @property
    def return_type(self):
        # clobber types to bool if the op is a boolean operator
        if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS):
            return np.bool_
        return result_type_many(*(term.type for term in com.flatten(self)))

    @property
    def has_invalid_return_type(self) -> bool:
        types = self.operand_types
        obj_dtype_set = frozenset([np.dtype("object")])
        return self.return_type == object and types - obj_dtype_set

    @property
    def operand_types(self):
        return frozenset(term.type for term in com.flatten(self))

    @property
    def is_scalar(self) -> bool:
        return all(operand.is_scalar for operand in self.operands)

    @property
    def is_datetime(self) -> bool:
        try:
            t = self.return_type.type
        except AttributeError:
            t = self.return_type

        return issubclass(t, (datetime, np.datetime64))


def _in(x, y):
    """
    Compute the vectorized membership of ``x in y`` if possible, otherwise
    use Python.
    """
    try:
        return x.isin(y)
    except AttributeError:
        if is_list_like(x):
            try:
                return y.isin(x)
            except AttributeError:
                pass
        return x in y


def _not_in(x, y):
    """
    Compute the vectorized membership of ``x not in y`` if possible,
    otherwise use Python.
    """
    try:
        return ~x.isin(y)
    except AttributeError:
        if is_list_like(x):
            try:
                return ~y.isin(x)
            except AttributeError:
                pass
        return x not in y


CMP_OPS_SYMS = (">", "<", ">=", "<=", "==", "!=", "in", "not in")
_cmp_ops_funcs = (
    operator.gt,
    operator.lt,
    operator.ge,
    operator.le,
    operator.eq,
    operator.ne,
    _in,
    _not_in,
)
_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs))

BOOL_OPS_SYMS = ("&", "|", "and", "or")
_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_)
_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs))

ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%")
_arith_ops_funcs = (
    operator.add,
    operator.sub,
    operator.mul,
    operator.truediv,
    operator.pow,
    operator.floordiv,
    operator.mod,
)
_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs))

SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%")


_binary_ops_dict = {}

for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):
    _binary_ops_dict.update(d)


def is_term(obj) -> bool:
    return isinstance(obj, Term)


class BinOp(Op):
    """
    Hold a binary operator and its operands.

    Parameters
    ----------
    op : str
    lhs : Term or Op
    rhs : Term or Op
    """

    def __init__(self, op: str, lhs, rhs) -> None:
        super().__init__(op, (lhs, rhs))
        self.lhs = lhs
        self.rhs = rhs

        self._disallow_scalar_only_bool_ops()

        self.convert_values()

        try:
            self.func = _binary_ops_dict[op]
        except KeyError as err:
            # has to be made a list for python3
            keys = list(_binary_ops_dict.keys())
            raise ValueError(
                f"Invalid binary operator {repr(op)}, valid operators are {keys}"
            ) from err

    def __call__(self, env):
        """
        Recursively evaluate an expression in Python space.

        Parameters
        ----------
        env : Scope

        Returns
        -------
        object
            The result of an evaluated expression.
        """
        # recurse over the left/right nodes
        left = self.lhs(env)
        right = self.rhs(env)

        return self.func(left, right)

    def evaluate(self, env, engine: str, parser, term_type, eval_in_python):
        """
        Evaluate a binary operation *before* being passed to the engine.

        Parameters
        ----------
        env : Scope
        engine : str
        parser : str
        term_type : type
        eval_in_python : list

        Returns
        -------
        term_type
            The "pre-evaluated" expression as an instance of ``term_type``
        """
        if engine == "python":
            res = self(env)
        else:
            # recurse over the left/right nodes

            left = self.lhs.evaluate(
                env,
                engine=engine,
                parser=parser,
                term_type=term_type,
                eval_in_python=eval_in_python,
            )

            right = self.rhs.evaluate(
                env,
                engine=engine,
                parser=parser,
                term_type=term_type,
                eval_in_python=eval_in_python,
            )

            # base cases
            if self.op in eval_in_python:
                res = self.func(left.value, right.value)
            else:
                from modin.core.computation.eval import eval

                res = eval(self, local_dict=env, engine=engine, parser=parser)

        name = env.add_tmp(res)
        return term_type(name, env=env)

    def convert_values(self) -> None:
        """
        Convert datetimes to a comparable value in an expression.
        """

        def stringify(value):
            encoder: Callable
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded, encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        lhs, rhs = self.lhs, self.rhs

        if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
            v = rhs.value
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = pandas.Timestamp(ensure_decoded(v))
            if v.tz is not None:
                v = v.tz_convert("UTC")
            self.rhs.update(v)

        if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
            v = lhs.value
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = pandas.Timestamp(ensure_decoded(v))
            if v.tz is not None:
                v = v.tz_convert("UTC")
            self.lhs.update(v)

    def _disallow_scalar_only_bool_ops(self):
        rhs = self.rhs
        lhs = self.lhs

        # GH#24883 unwrap dtype if necessary to ensure we have a type object
        rhs_rt = rhs.return_type
        rhs_rt = getattr(rhs_rt, "type", rhs_rt)
        lhs_rt = lhs.return_type
        lhs_rt = getattr(lhs_rt, "type", lhs_rt)
        if (
            (lhs.is_scalar or rhs.is_scalar)
            and self.op in _bool_ops_dict
            and (
                not (
                    issubclass(rhs_rt, (bool, np.bool_))
                    and issubclass(lhs_rt, (bool, np.bool_))
                )
            )
        ):
            raise NotImplementedError("cannot evaluate scalar only bool ops")


def isnumeric(dtype) -> bool:
    return issubclass(np.dtype(dtype).type, np.number)


UNARY_OPS_SYMS = ("+", "-", "~", "not")
_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)
_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs))


class UnaryOp(Op):
    """
    Hold a unary operator and its operands.

    Parameters
    ----------
    op : str
        The token used to represent the operator.
    operand : Term or Op
        The Term or Op operand to the operator.

    Raises
    ------
    ValueError
        * If no function associated with the passed operator token is found.
    """

    def __init__(self, op: Literal["+", "-", "~", "not"], operand) -> None:
        super().__init__(op, (operand,))
        self.operand = operand

        try:
            self.func = _unary_ops_dict[op]
        except KeyError as err:
            raise ValueError(
                f"Invalid unary operator {repr(op)}, valid operators are {UNARY_OPS_SYMS}"
            ) from err

    def __call__(self, env) -> MathCall:
        operand = self.operand(env)
        # error: Cannot call function of unknown type
        return self.func(operand)  # type: ignore[operator]

    def __repr__(self) -> str:
        return pprint_thing(f"{self.op}({self.operand})")

    @property
    def return_type(self) -> np.dtype:
        operand = self.operand
        if operand.return_type == np.dtype("bool"):
            return np.dtype("bool")
        if isinstance(operand, Op) and (
            operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict
        ):
            return np.dtype("bool")
        return np.dtype("int")


class MathCall(Op):
    def __init__(self, func, args) -> None:
        super().__init__(func.name, args)
        self.func = func

    def __call__(self, env):
        # error: "Op" not callable
        operands = [op(env) for op in self.operands]  # type: ignore[operator]
        return self.func.func(*operands)

    def __repr__(self) -> str:
        operands = map(str, self.operands)
        return pprint_thing(f"{self.op}({','.join(operands)})")


class FuncNode:
    def __init__(self, name: str) -> None:
        if name not in MATHOPS:
            raise ValueError(f'"{name}" is not a supported function')
        self.name = name
        self.func = getattr(np, name)

    def __call__(self, *args) -> MathCall:
        return MathCall(self, args)


================================================
FILE: modin/core/computation/parsing.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
:func:`~pandas.eval` source string parsing functions. Forked from pandas.core.computation.parsing
"""

from __future__ import annotations

import token
import tokenize
from io import StringIO
from keyword import iskeyword
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from collections.abc import (
        Hashable,
        Iterator,
    )

# A token value Python's tokenizer probably will never use.
BACKTICK_QUOTED_STRING = 100


def create_valid_python_identifier(name: str) -> str:
    """
    Create valid Python identifiers from any string.

    Check if name contains any special characters. If it contains any
    special characters, the special characters will be replaced by
    a special string and a prefix is added.

    Raises
    ------
    SyntaxError
        If the returned name is not a Python valid identifier, raise an exception.
        This can happen if there is a hashtag in the name, as the tokenizer will
        than terminate and not find the backtick.
        But also for characters that fall out of the range of (U+0001..U+007F).
    """
    if name.isidentifier() and not iskeyword(name):
        return name

    # Create a dict with the special characters and their replacement string.
    # EXACT_TOKEN_TYPES contains these special characters
    # token.tok_name contains a readable description of the replacement string.
    special_characters_replacements = {
        char: f"_{token.tok_name[tokval]}_"
        for char, tokval in (tokenize.EXACT_TOKEN_TYPES.items())
    }
    special_characters_replacements.update(
        {
            " ": "_",
            "?": "_QUESTIONMARK_",
            "!": "_EXCLAMATIONMARK_",
            "$": "_DOLLARSIGN_",
            "€": "_EUROSIGN_",
            "°": "_DEGREESIGN_",
            # Including quotes works, but there are exceptions.
            "'": "_SINGLEQUOTE_",
            '"': "_DOUBLEQUOTE_",
            # Currently not possible. Terminates parser and won't find backtick.
            # "#": "_HASH_",
        }
    )

    name = "".join([special_characters_replacements.get(char, char) for char in name])
    name = f"BACKTICK_QUOTED_STRING_{name}"

    if not name.isidentifier():
        raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.")

    return name


def clean_backtick_quoted_toks(tok: tuple[int, str]) -> tuple[int, str]:
    """
    Clean up a column name if surrounded by backticks.

    Backtick quoted string are indicated by a certain tokval value. If a string
    is a backtick quoted token it will processed by
    :func:`_create_valid_python_identifier` so that the parser can find this
    string when the query is executed.
    In this case the tok will get the NAME tokval.

    Parameters
    ----------
    tok : tuple of int, str
        ints correspond to the all caps constants in the tokenize module

    Returns
    -------
    tok : Tuple[int, str]
        Either the input or token or the replacement values
    """
    toknum, tokval = tok
    if toknum == BACKTICK_QUOTED_STRING:
        return tokenize.NAME, create_valid_python_identifier(tokval)
    return toknum, tokval


def clean_column_name(name: Hashable) -> Hashable:
    """
    Function to emulate the cleaning of a backtick quoted name.

    The purpose for this function is to see what happens to the name of
    identifier if it goes to the process of being parsed a Python code
    inside a backtick quoted string and than being cleaned
    (removed of any special characters).

    Parameters
    ----------
    name : hashable
        Name to be cleaned.

    Returns
    -------
    name : hashable
        Returns the name after tokenizing and cleaning.

    Notes
    -----
        For some cases, a name cannot be converted to a valid Python identifier.
        In that case :func:`tokenize_string` raises a SyntaxError.
        In that case, we just return the name unmodified.

        If this name was used in the query string (this makes the query call impossible)
        an error will be raised by :func:`tokenize_backtick_quoted_string` instead,
        which is not caught and propagates to the user level.
    """
    try:
        tokenized = tokenize_string(f"`{name}`")
        tokval = next(tokenized)[1]
        return create_valid_python_identifier(tokval)
    except SyntaxError:
        return name


def tokenize_backtick_quoted_string(
    token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int
) -> tuple[int, str]:
    """
    Creates a token from a backtick quoted string.

    Moves the token_generator forwards till right after the next backtick.

    Parameters
    ----------
    token_generator : Iterator[tokenize.TokenInfo]
        The generator that yields the tokens of the source string (Tuple[int, str]).
        The generator is at the first token after the backtick (`)

    source : str
        The Python source code string.

    string_start : int
        This is the start of backtick quoted string inside the source string.

    Returns
    -------
    tok: Tuple[int, str]
        The token that represents the backtick quoted string.
        The integer is equal to BACKTICK_QUOTED_STRING (100).
    """
    string_end = None
    for _, tokval, start, _, _ in token_generator:
        if tokval == "`":
            string_end = start[1]
            break

    assert string_end is not None
    return BACKTICK_QUOTED_STRING, source[string_start:string_end]


def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
    """
    Tokenize a Python source code string.

    Parameters
    ----------
    source : str
        The Python source code string.

    Returns
    -------
    tok_generator : Iterator[Tuple[int, str]]
        An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).
    """
    line_reader = StringIO(source).readline
    token_generator = tokenize.generate_tokens(line_reader)

    # Loop over all tokens till a backtick (`) is found.
    # Then, take all tokens till the next backtick to form a backtick quoted string
    for toknum, tokval, start, _, _ in token_generator:
        if tokval == "`":
            try:
                yield tokenize_backtick_quoted_string(
                    token_generator, source, string_start=start[1] + 1
                )
            except Exception as err:
                raise SyntaxError(f"Failed to parse backticks in '{source}'.") from err
        else:
            yield toknum, tokval


================================================
FILE: modin/core/computation/scope.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module for scope operations. Forked from pandas.core.computation.scope
"""

from __future__ import annotations

import datetime
import inspect
import itertools
import pprint
import struct
import sys
from collections import ChainMap
from io import StringIO
from typing import TypeVar

import numpy as np
import pandas
from pandas.errors import UndefinedVariableError

_KT = TypeVar("_KT")
_VT = TypeVar("_VT")


# https://docs.python.org/3/library/collections.html#chainmap-examples-and-recipes
class DeepChainMap(ChainMap[_KT, _VT]):
    """
    Variant of ChainMap that allows direct updates to inner scopes.

    Only works when all passed mapping are mutable.
    """

    def __setitem__(self, key: _KT, value: _VT) -> None:
        for mapping in self.maps:
            if key in mapping:
                mapping[key] = value
                return
        self.maps[0][key] = value

    def __delitem__(self, key: _KT) -> None:
        """
        Raises
        ------
        KeyError
            If `key` doesn't exist.
        """
        for mapping in self.maps:
            if key in mapping:
                del mapping[key]
                return
        raise KeyError(key)


def ensure_scope(
    level: int, global_dict=None, local_dict=None, resolvers=(), target=None
) -> Scope:
    """Ensure that we are grabbing the correct scope."""
    return Scope(
        level + 1,
        global_dict=global_dict,
        local_dict=local_dict,
        resolvers=resolvers,
        target=target,
    )


def _replacer(x) -> str:
    """
    Replace a number with its hexadecimal representation. Used to tag
    temporary variables with their calling scope's id.
    """
    # get the hex repr of the binary char and remove 0x and pad by pad_size
    # zeros
    try:
        hexin = ord(x)
    except TypeError:
        # bytes literals masquerade as ints when iterating in py3
        hexin = x

    return hex(hexin)


def _raw_hex_id(obj) -> str:
    """Return the padded hexadecimal id of ``obj``."""
    # interpret as a pointer since that's what really what id returns
    packed = struct.pack("@P", id(obj))
    return "".join([_replacer(x) for x in packed])


DEFAULT_GLOBALS = {
    "Timestamp": pandas.Timestamp,
    "datetime": datetime.datetime,
    "True": True,
    "False": False,
    "list": list,
    "tuple": tuple,
    "inf": np.inf,
    "Inf": np.inf,
}


def _get_pretty_string(obj) -> str:
    """
    Return a prettier version of obj.

    Parameters
    ----------
    obj : object
        Object to pretty print

    Returns
    -------
    str
        Pretty print object repr
    """
    sio = StringIO()
    pprint.pprint(obj, stream=sio)  # noqa: T203
    return sio.getvalue()


class Scope:
    """
    Object to hold scope, with a few bells to deal with some custom syntax
    and contexts added by pandas.

    Parameters
    ----------
    level : int
    global_dict : dict or None, optional, default None
    local_dict : dict or Scope or None, optional, default None
    resolvers : list-like or None, optional, default None
    target : object

    Attributes
    ----------
    level : int
    scope : DeepChainMap
    target : object
    temps : dict
    """

    __slots__ = ["level", "scope", "target", "resolvers", "temps"]
    level: int
    scope: DeepChainMap
    resolvers: DeepChainMap
    temps: dict

    def __init__(
        self, level: int, global_dict=None, local_dict=None, resolvers=(), target=None
    ) -> None:
        self.level = level + 1

        # shallow copy because we don't want to keep filling this up with what
        # was there before if there are multiple calls to Scope/_ensure_scope
        self.scope = DeepChainMap(DEFAULT_GLOBALS.copy())
        self.target = target

        if isinstance(local_dict, Scope):
            self.scope.update(local_dict.scope)
            if local_dict.target is not None:
                self.target = local_dict.target
            self._update(local_dict.level)

        frame = sys._getframe(self.level)

        try:
            # shallow copy here because we don't want to replace what's in
            # scope when we align terms (alignment accesses the underlying
            # numpy array of pandas objects)
            scope_global = self.scope.new_child(
                (global_dict if global_dict is not None else frame.f_globals).copy()
            )
            self.scope = DeepChainMap(scope_global)
            if not isinstance(local_dict, Scope):
                scope_local = self.scope.new_child(
                    (local_dict if local_dict is not None else frame.f_locals).copy()
                )
                self.scope = DeepChainMap(scope_local)
        finally:
            del frame

        # assumes that resolvers are going from outermost scope to inner
        if isinstance(local_dict, Scope):
            resolvers += tuple(local_dict.resolvers.maps)
        self.resolvers = DeepChainMap(*resolvers)
        self.temps = {}

    def __repr__(self) -> str:
        scope_keys = _get_pretty_string(list(self.scope.keys()))
        res_keys = _get_pretty_string(list(self.resolvers.keys()))
        return f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})"

    @property
    def has_resolvers(self) -> bool:
        """
        Return whether we have any extra scope.

        For example, DataFrames pass Their columns as resolvers during calls to
        ``DataFrame.eval()`` and ``DataFrame.query()``.

        Returns
        -------
        hr : bool
        """
        return bool(len(self.resolvers))

    def resolve(self, key: str, is_local: bool):
        """
        Resolve a variable name in a possibly local context.

        Parameters
        ----------
        key : str
            A variable name
        is_local : bool
            Flag indicating whether the variable is local or not (prefixed with
            the '@' symbol)

        Returns
        -------
        value : object
            The value of a particular variable
        """
        try:
            # only look for locals in outer scope
            if is_local:
                return self.scope[key]

            # not a local variable so check in resolvers if we have them
            if self.has_resolvers:
                return self.resolvers[key]

            # if we're here that means that we have no locals and we also have
            # no resolvers
            assert not is_local and not self.has_resolvers
            return self.scope[key]
        except KeyError:
            try:
                # last ditch effort we look in temporaries
                # these are created when parsing indexing expressions
                # e.g., df[df > 0]
                return self.temps[key]
            except KeyError as err:
                raise UndefinedVariableError(key, is_local) from err

    def swapkey(self, old_key: str, new_key: str, new_value=None) -> None:
        """
        Replace a variable name, with a potentially new value.

        Parameters
        ----------
        old_key : str
            Current variable name to replace
        new_key : str
            New variable name to replace `old_key` with
        new_value : object
            Value to be replaced along with the possible renaming
        """
        if self.has_resolvers:
            maps = self.resolvers.maps + self.scope.maps
        else:
            maps = self.scope.maps

        maps.append(self.temps)

        for mapping in maps:
            if old_key in mapping:
                mapping[new_key] = new_value
                return

    def _get_vars(self, stack, scopes: list[str]) -> None:
        """
        Get specifically scoped variables from a list of stack frames.

        Parameters
        ----------
        stack : list
            A list of stack frames as returned by ``inspect.stack()``
        scopes : sequence of strings
            A sequence containing valid stack frame attribute names that
            evaluate to a dictionary. For example, ('locals', 'globals')
        """
        variables = itertools.product(scopes, stack)
        for scope, (frame, _, _, _, _, _) in variables:
            try:
                d = getattr(frame, f"f_{scope}")
                self.scope = DeepChainMap(self.scope.new_child(d))
            finally:
                # won't remove it, but DECREF it
                # in Py3 this probably isn't necessary since frame won't be
                # scope after the loop
                del frame

    def _update(self, level: int) -> None:
        """
        Update the current scope by going back `level` levels.

        Parameters
        ----------
        level : int
        """
        sl = level + 1

        # add sl frames to the scope starting with the
        # most distant and overwriting with more current
        # makes sure that we can capture variable scope
        stack = inspect.stack()

        try:
            self._get_vars(stack[:sl], scopes=["locals"])
        finally:
            # explcitly delete the stack according to the advice here:
            # https://docs.python.org/3/library/inspect.html#inspect.Traceback
            del stack[:], stack

    def add_tmp(self, value) -> str:
        """
        Add a temporary variable to the scope.

        Parameters
        ----------
        value : object
            An arbitrary object to be assigned to a temporary variable.

        Returns
        -------
        str
            The name of the temporary variable created.
        """
        name = f"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}"

        # add to inner most scope
        assert name not in self.temps
        self.temps[name] = value
        assert name in self.temps

        # only increment if the variable gets put in the scope
        return name

    @property
    def ntemps(self) -> int:
        """The number of temporary variables in this scope"""
        return len(self.temps)

    @property
    def full_scope(self) -> DeepChainMap:
        """
        Return the full scope for use with passing to engines transparently
        as a mapping.

        Returns
        -------
        vars : DeepChainMap
            All variables in this scope.
        """
        maps = [self.temps] + self.resolvers.maps + self.scope.maps
        return DeepChainMap(*maps)


================================================
FILE: modin/core/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe functionality."""


================================================
FILE: modin/core/dataframe/algebra/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin Dataframe algebra (core operators)."""

from .binary import Binary
from .fold import Fold
from .groupby import GroupByReduce
from .map import Map
from .operator import Operator
from .reduce import Reduce
from .tree_reduce import TreeReduce

__all__ = [
    "Operator",
    "Map",
    "TreeReduce",
    "Reduce",
    "Fold",
    "Binary",
    "GroupByReduce",
]


================================================
FILE: modin/core/dataframe/algebra/binary.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses builder class for Binary operator."""

from __future__ import annotations

import warnings
from typing import TYPE_CHECKING, Any, Callable, Optional, Union

import numpy as np
import pandas
from pandas.api.types import is_bool_dtype, is_scalar

from modin.error_message import ErrorMessage

from .operator import Operator

if TYPE_CHECKING:
    from pandas._typing import DtypeObj

    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


def maybe_compute_dtypes_common_cast(
    first: PandasQueryCompiler,
    second: Union[PandasQueryCompiler, dict, list, tuple, np.ndarray, str, DtypeObj],
    trigger_computations: bool = False,
    axis: int = 0,
    func: Optional[
        Callable[[pandas.DataFrame, pandas.DataFrame], pandas.DataFrame]
    ] = None,
) -> Optional[pandas.Series]:
    """
    Precompute data types for binary operations by finding common type between operands.

    Parameters
    ----------
    first : PandasQueryCompiler
        First operand for which the binary operation would be performed later.
    second : PandasQueryCompiler, dict, list, tuple, np.ndarray, str or DtypeObj
        Second operand for which the binary operation would be performed later.
    trigger_computations : bool, default: False
        Whether to trigger computation of the lazy metadata for `first` and `second`.
        If False is specified this method will return None if any of the operands doesn't
        have materialized dtypes.
    axis : int, default: 0
        Axis to perform the binary operation along.
    func : callable(pandas.DataFrame, pandas.DataFrame) -> pandas.DataFrame, optional
        If specified, will use this function to perform the "try_sample" method
        (see ``Binary.register()`` docs for more details).

    Returns
    -------
    pandas.Series
        The pandas series with precomputed dtypes or None if there's not enough metadata to compute it.

    Notes
    -----
    The dtypes of the operands are supposed to be known.
    """
    if not trigger_computations:
        if not first.frame_has_materialized_dtypes:
            return None

        if isinstance(second, type(first)) and not second.frame_has_materialized_dtypes:
            return None

    dtypes_first = first.dtypes.to_dict()
    if isinstance(second, type(first)):
        dtypes_second = second.dtypes.to_dict()
        columns_first = set(first.columns)
        columns_second = set(second.columns)
        common_columns = columns_first.intersection(columns_second)
        # Here we want to XOR the sets in order to find the columns that do not
        # belong to the intersection, these will be NaN columns in the result
        mismatch_columns = columns_first ^ columns_second
    elif isinstance(second, dict):
        dtypes_second = {
            key: pandas.api.types.pandas_dtype(type(value))
            for key, value in second.items()
        }
        columns_first = set(first.columns)
        columns_second = set(second.keys())
        common_columns = columns_first.intersection(columns_second)
        # Here we want to find the difference between the sets in order to find columns
        # that are missing in the dictionary, this will be NaN columns in the result
        mismatch_columns = columns_first.difference(columns_second)
    else:
        if isinstance(second, (list, tuple)):
            second_dtypes_list = (
                [pandas.api.types.pandas_dtype(type(value)) for value in second]
                if axis == 1
                # Here we've been given a column so it has only one dtype,
                # Infering the dtype using `np.array`, TODO: maybe there's more efficient way?
                else [np.array(second).dtype] * len(dtypes_first)
            )
        elif is_scalar(second) or isinstance(second, np.ndarray):
            try:
                dtype = getattr(second, "dtype", None) or pandas.api.types.pandas_dtype(
                    type(second)
                )
            except TypeError:
                # For example, dtype '<class 'datetime.datetime'>' not understood
                dtype = pandas.Series(second).dtype
            second_dtypes_list = [dtype] * len(dtypes_first)
        else:
            raise NotImplementedError(
                f"Can't compute common type for {type(first)} and {type(second)}."
            )
        # We verify operands shapes at the front-end, invalid operands shouldn't be
        # propagated to the query compiler level
        ErrorMessage.catch_bugs_and_request_email(
            failure_condition=len(second_dtypes_list) != len(dtypes_first),
            extra_log="Shapes of the operands of a binary operation don't match",
        )
        dtypes_second = {
            key: second_dtypes_list[idx] for idx, key in enumerate(dtypes_first.keys())
        }
        common_columns = first.columns
        mismatch_columns = []

    # If at least one column doesn't match, the result of the non matching column would be nan.
    nan_dtype = pandas.api.types.pandas_dtype(type(np.nan))
    dtypes = None
    if func is not None:
        try:
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore")
                df1 = pandas.DataFrame([[1] * len(common_columns)]).astype(
                    {i: dtypes_first[col] for i, col in enumerate(common_columns)}
                )
                df2 = pandas.DataFrame([[1] * len(common_columns)]).astype(
                    {i: dtypes_second[col] for i, col in enumerate(common_columns)}
                )
                dtypes = func(df1, df2).dtypes.set_axis(common_columns)
        # it sometimes doesn't work correctly with strings, so falling back to
        # the "common_cast" method in this case
        except TypeError:
            pass
    if dtypes is None:
        dtypes = pandas.Series(
            [
                pandas.core.dtypes.cast.find_common_type(
                    [
                        dtypes_first[x],
                        dtypes_second[x],
                    ]
                )
                for x in common_columns
            ],
            index=common_columns,
        )
    dtypes: pandas.Series = pandas.concat(
        [
            dtypes,
            pandas.Series(
                [nan_dtype] * (len(mismatch_columns)),
                index=mismatch_columns,
            ),
        ]
    )
    return dtypes


def maybe_build_dtypes_series(
    first: PandasQueryCompiler,
    second: Union[PandasQueryCompiler, Any],
    dtype: DtypeObj,
    trigger_computations: bool = False,
) -> Optional[pandas.Series]:
    """
    Build a ``pandas.Series`` describing dtypes of the result of a binary operation.

    Parameters
    ----------
    first : PandasQueryCompiler
        First operand for which the binary operation would be performed later.
    second : PandasQueryCompiler, list-like or scalar
        Second operand for which the binary operation would be performed later.
    dtype : DtypeObj
        Dtype of the result.
    trigger_computations : bool, default: False
        Whether to trigger computation of the lazy metadata for `first` and `second`.
        If False is specified this method will return None if any of the operands doesn't
        have materialized columns.

    Returns
    -------
    pandas.Series or None
        The pandas series with precomputed dtypes or None if there's not enough metadata to compute it.

    Notes
    -----
    Finds a union of columns and finds dtypes for all these columns.
    """
    if not trigger_computations:
        if not first.frame_has_columns_cache:
            return None

        if isinstance(second, type(first)) and not second.frame_has_columns_cache:
            return None

    columns_first = set(first.columns)
    if isinstance(second, type(first)):
        columns_second = set(second.columns)
        columns_union = columns_first.union(columns_second)
    else:
        columns_union = columns_first

    dtypes = pandas.Series([dtype] * len(columns_union), index=columns_union)
    return dtypes


def try_compute_new_dtypes(
    first: PandasQueryCompiler,
    second: Union[PandasQueryCompiler, Any],
    infer_dtypes: Optional[str] = None,
    result_dtype: Optional[Union[DtypeObj, str]] = None,
    axis: int = 0,
    func: Optional[
        Callable[[pandas.DataFrame, pandas.DataFrame], pandas.DataFrame]
    ] = None,
) -> Optional[pandas.Series]:
    """
    Precompute resulting dtypes of the binary operation if possible.

    The dtypes won't be precomputed if any of the operands doesn't have their dtypes materialized
    or if the second operand type is not supported. Supported types: PandasQueryCompiler, list,
    dict, tuple, np.ndarray.

    Parameters
    ----------
    first : PandasQueryCompiler
        First operand of the binary operation.
    second : PandasQueryCompiler, list-like or scalar
        Second operand of the binary operation.
    infer_dtypes : {"common_cast", "try_sample", "bool", None}, default: None
        How dtypes should be infered (see ``Binary.register`` doc for more info).
    result_dtype : np.dtype, optional
        NumPy dtype of the result. If not specified it will be inferred from the `infer_dtypes` parameter.
    axis : int, default: 0
        Axis to perform the binary operation along.
    func : callable(pandas.DataFrame, pandas.DataFrame) -> pandas.DataFrame, optional
        A callable to be used for the "try_sample" method.

    Returns
    -------
    pandas.Series or None
    """
    if infer_dtypes is None and result_dtype is None:
        return None

    try:
        if infer_dtypes == "bool" or is_bool_dtype(result_dtype):
            dtypes = maybe_build_dtypes_series(
                first, second, dtype=pandas.api.types.pandas_dtype(bool)
            )
        elif infer_dtypes == "common_cast":
            dtypes = maybe_compute_dtypes_common_cast(
                first, second, axis=axis, func=None
            )
        elif infer_dtypes == "try_sample":
            if func is None:
                raise ValueError(
                    "'func' must be specified if dtypes infering method is 'try_sample'"
                )
            dtypes = maybe_compute_dtypes_common_cast(
                first, second, axis=axis, func=func
            )
        else:
            # For now we only know how to handle `result_dtype == bool` as that's
            # the only value that is being passed here right now, it's unclear
            # how we should behave in case of an arbitrary dtype, so let's wait
            # for at least one case to appear for this regard.
            dtypes = None
    except NotImplementedError:
        dtypes = None

    return dtypes


class Binary(Operator):
    """Builder class for Binary operator."""

    @classmethod
    def register(
        cls,
        func: Callable[..., pandas.DataFrame],
        join_type: str = "outer",
        sort: bool = None,
        labels: str = "replace",
        infer_dtypes: Optional[str] = None,
    ) -> Callable[..., PandasQueryCompiler]:
        """
        Build template binary operator.

        Parameters
        ----------
        func : callable(pandas.DataFrame, [pandas.DataFrame, list-like, scalar]) -> pandas.DataFrame
            Binary function to execute. Have to be able to accept at least two arguments.
        join_type : {'left', 'right', 'outer', 'inner', None}, default: 'outer'
            Type of join that will be used if indices of operands are not aligned.
        sort : bool, default: None
            Whether to sort index and columns or not.
        labels : {"keep", "replace", "drop"}, default: "replace"
            Whether keep labels from left Modin DataFrame, replace them with labels
            from joined DataFrame or drop altogether to make them be computed lazily later.
        infer_dtypes : {"common_cast", "try_sample", "bool", None}, default: None
            How dtypes should be inferred.
                * If "common_cast", casts to common dtype of operand columns.
                * If "try_sample", creates small pandas DataFrames with dtypes of operands and
                  runs the `func` on them to determine output dtypes. If a ``TypeError`` is raised
                  during this process, fallback to "common_cast" method.
                * If "bool", dtypes would be a boolean series with same size as that of operands.
                * If ``None``, do not infer new dtypes (they will be computed manually once accessed).

        Returns
        -------
        callable
            Function that takes query compiler and executes binary operation.
        """

        def caller(
            query_compiler: PandasQueryCompiler,
            other: Union[PandasQueryCompiler, Any],
            broadcast: bool = False,
            *args: tuple,
            dtypes: Optional[Union[DtypeObj, str]] = None,
            **kwargs: dict,
        ) -> PandasQueryCompiler:
            """
            Apply binary `func` to passed operands.

            Parameters
            ----------
            query_compiler : PandasQueryCompiler
                Left operand of `func`.
            other : PandasQueryCompiler, list-like object or scalar
                Right operand of `func`.
            broadcast : bool, default: False
                If `other` is a one-column query compiler, indicates whether it is a Series or not.
                Frames and Series have to be processed differently, however we can't distinguish them
                at the query compiler level, so this parameter is a hint that passed from a high level API.
            *args : tuple,
                Arguments that will be passed to `func`.
            dtypes : "copy", scalar dtype or None, default: None
                Dtypes of the result. "copy" to keep old dtypes and None to compute them on demand.
            **kwargs : dict,
                Arguments that will be passed to `func`.

            Returns
            -------
            PandasQueryCompiler
                Result of binary function.
            """
            axis: int = kwargs.get("axis", 0)
            if isinstance(other, type(query_compiler)) and broadcast:
                assert (
                    len(other.columns) == 1
                ), "Invalid broadcast argument for `broadcast_apply`, too many columns: {}".format(
                    len(other.columns)
                )
                # Transpose on `axis=1` because we always represent an individual
                # column or row as a single-column Modin DataFrame
                if axis == 1:
                    other = other.transpose()
            if dtypes != "copy":
                dtypes = try_compute_new_dtypes(
                    query_compiler, other, infer_dtypes, dtypes, axis, func
                )

            shape_hint = None
            if isinstance(other, type(query_compiler)):
                if broadcast:
                    if (
                        query_compiler.frame_has_materialized_columns
                        and other.frame_has_materialized_columns
                    ):
                        if (
                            len(query_compiler.columns) == 1
                            and len(other.columns) == 1
                            and query_compiler.columns.equals(other.columns)
                        ):
                            shape_hint = "column"
                    return query_compiler.__constructor__(
                        query_compiler._modin_frame.broadcast_apply(
                            axis,
                            lambda left, right: func(
                                left, right.squeeze(), *args, **kwargs
                            ),
                            other._modin_frame,
                            join_type=join_type,
                            labels=labels,
                            dtypes=dtypes,
                        ),
                        shape_hint=shape_hint,
                    )
                else:
                    if (
                        query_compiler.frame_has_materialized_columns
                        and other.frame_has_materialized_columns
                    ):
                        if (
                            len(query_compiler.columns) == 1
                            and len(other.columns) == 1
                            and query_compiler.columns.equals(other.columns)
                        ):
                            shape_hint = "column"
                    return query_compiler.__constructor__(
                        query_compiler._modin_frame.n_ary_op(
                            lambda x, y: func(x, y, *args, **kwargs),
                            [other._modin_frame],
                            join_type=join_type,
                            sort=sort,
                            labels=labels,
                            dtypes=dtypes,
                        ),
                        shape_hint=shape_hint,
                    )
            else:
                # TODO: it's possible to chunk the `other` and broadcast them to partitions
                # accordingly, in that way we will be able to use more efficient `._modin_frame.map()`
                if isinstance(other, (dict, list, np.ndarray, pandas.Series)):
                    new_modin_frame = query_compiler._modin_frame.apply_full_axis(
                        axis,
                        lambda df: func(df, other, *args, **kwargs),
                        new_index=query_compiler.index,
                        new_columns=query_compiler.columns,
                        dtypes=dtypes,
                    )
                else:
                    if (
                        query_compiler.frame_has_materialized_columns
                        and len(query_compiler._modin_frame.columns) == 1
                        and is_scalar(other)
                    ):
                        shape_hint = "column"
                    new_modin_frame = query_compiler._modin_frame.map(
                        func,
                        func_args=(other, *args),
                        func_kwargs=kwargs,
                        dtypes=dtypes,
                        lazy=True,
                    )
                return query_compiler.__constructor__(
                    new_modin_frame, shape_hint=shape_hint
                )

        return caller


================================================
FILE: modin/core/dataframe/algebra/default2pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module default2pandas provides templates for a query compiler default-to-pandas methods."""

from .binary import BinaryDefault
from .cat import CatDefault
from .dataframe import DataFrameDefault
from .datetime import DateTimeDefault
from .default import DefaultMethod
from .groupby import GroupByDefault, SeriesGroupByDefault
from .list import ListDefault
from .resample import ResampleDefault
from .rolling import ExpandingDefault, RollingDefault
from .series import SeriesDefault
from .str import StrDefault
from .struct import StructDefault

__all__ = [
    "DataFrameDefault",
    "DateTimeDefault",
    "SeriesDefault",
    "StrDefault",
    "BinaryDefault",
    "ResampleDefault",
    "RollingDefault",
    "ExpandingDefault",
    "DefaultMethod",
    "CatDefault",
    "GroupByDefault",
    "SeriesGroupByDefault",
    "ListDefault",
    "StructDefault",
]


================================================
FILE: modin/core/dataframe/algebra/default2pandas/binary.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default binary functions builder class."""

import pandas
from pandas.core.dtypes.common import is_list_like

from .default import DefaultMethod


class BinaryDefault(DefaultMethod):
    """Build default-to-pandas methods which executes binary functions."""

    @classmethod
    def build_default_to_pandas(cls, fn, fn_name):
        """
        Build function that do fallback to pandas for passed binary `fn`.

        Parameters
        ----------
        fn : callable
            Binary function to apply to the casted to pandas frame and other operand.
        fn_name : str
            Function name which will be shown in default-to-pandas warning message.

        Returns
        -------
        callable
            Function that takes query compiler, does fallback to pandas and applies binary `fn`
            to the casted to pandas frame.
        """

        def bin_ops_wrapper(df, other, *args, **kwargs):
            """Apply specified binary function to the passed operands."""
            squeeze_other = kwargs.pop("broadcast", False) or kwargs.pop(
                "squeeze_other", False
            )
            squeeze_self = kwargs.pop("squeeze_self", False)

            if squeeze_other:
                other = other.squeeze(axis=1)

            if squeeze_self:
                df = df.squeeze(axis=1)

            result = fn(df, other, *args, **kwargs)
            if (
                not isinstance(result, pandas.Series)
                and not isinstance(result, pandas.DataFrame)
                and is_list_like(result)
            ):
                result = pandas.DataFrame(result)
            return result

        return super().build_default_to_pandas(bin_ops_wrapper, fn_name)


================================================
FILE: modin/core/dataframe/algebra/default2pandas/cat.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default applied-on-category functions builder class."""

from .series import SeriesDefault


class CatDefault(SeriesDefault):
    """Builder for default-to-pandas methods which is executed under category accessor."""

    @classmethod
    def frame_wrapper(cls, df):
        """
        Get category accessor of the passed frame.

        Parameters
        ----------
        df : pandas.DataFrame

        Returns
        -------
        pandas.core.arrays.categorical.CategoricalAccessor
        """
        return df.squeeze(axis=1).cat


================================================
FILE: modin/core/dataframe/algebra/default2pandas/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default DataFrame functions builder class."""

import pandas

from modin.utils import _inherit_docstrings

from .default import DefaultMethod


@_inherit_docstrings(DefaultMethod)
class DataFrameDefault(DefaultMethod):
    DEFAULT_OBJECT_TYPE = pandas.DataFrame


================================================
FILE: modin/core/dataframe/algebra/default2pandas/datetime.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default applied-on-datetime functions builder class."""

from .series import SeriesDefault


class DateTimeDefault(SeriesDefault):
    """Builder for default-to-pandas methods which is executed under datetime accessor."""

    @classmethod
    def frame_wrapper(cls, df):
        """
        Get datetime accessor of the passed frame.

        Parameters
        ----------
        df : pandas.DataFrame

        Returns
        -------
        pandas.core.indexes.accessors.DatetimeProperties
        """
        return df.squeeze(axis=1).dt


================================================
FILE: modin/core/dataframe/algebra/default2pandas/default.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default functions builder class."""

import pandas
from pandas.core.dtypes.common import is_list_like

from modin.core.dataframe.algebra.operator import Operator
from modin.utils import MODIN_UNNAMED_SERIES_LABEL, try_cast_to_pandas


class ObjTypeDeterminer:
    """
    Class that routes work to the frame.

    Provides an instance which forwards all of the `__getattribute__` calls
    to an object under which `key` function is applied.
    """

    def __getattr__(self, key):
        """
        Build function that executes `key` function over passed frame.

        Parameters
        ----------
        key : str

        Returns
        -------
        callable
            Function that takes DataFrame and executes `key` function on it.
        """

        def func(df, *args, **kwargs):
            """Access specified attribute of the passed object and call it if it's callable."""
            prop = getattr(df, key)
            if callable(prop):
                return prop(*args, **kwargs)
            else:
                return prop

        return func


class DefaultMethod(Operator):
    """
    Builder for default-to-pandas methods.

    Attributes
    ----------
    OBJECT_TYPE : str
        Object type name that will be shown in default-to-pandas warning message.
    DEFAULT_OBJECT_TYPE : object
        Default place to search for a function.
    """

    OBJECT_TYPE = "DataFrame"
    DEFAULT_OBJECT_TYPE = ObjTypeDeterminer

    @classmethod
    def register(cls, func, obj_type=None, inplace=None, fn_name=None):
        """
        Build function that do fallback to default pandas implementation for passed `func`.

        Parameters
        ----------
        func : callable or str,
            Function to apply to the casted to pandas frame or its property accesed
            by ``cls.frame_wrapper``.
        obj_type : object, optional
            If `func` is a string with a function name then `obj_type` provides an
            object to search function in.
        inplace : bool, optional
            If True return an object to which `func` was applied, otherwise return
            the result of `func`.
        fn_name : str, optional
            Function name which will be shown in default-to-pandas warning message.
            If not specified, name will be deducted from `func`.

        Returns
        -------
        callable
            Function that takes query compiler, does fallback to pandas and applies `func`
            to the casted to pandas frame or its property accesed by ``cls.frame_wrapper``.
        """
        if isinstance(func, str):
            if obj_type is None:
                obj_type = cls.DEFAULT_OBJECT_TYPE
            fn = getattr(obj_type, func)
        else:
            fn = func

        if type(fn) is property:
            if fn_name is None and hasattr(fn, "fget"):
                # When `fn` is a property, `str(fn)` will be something like
                # "<property object at 0x7f8671e09d10>". We instead check its `fget` method to get
                # the name of the property.
                # Note that this method is still imperfect because we cannot get the class name
                # of the property. For example, we can only get "hour" from `Series.dt.hour`.
                fn_name = f"<property fget:{getattr(fn.fget, '__name__', 'noname')}>"
            fn = cls.build_property_wrapper(fn)
        else:
            fn_name = getattr(fn, "__name__", str(fn)) if fn_name is None else fn_name

        def applyier(df, *args, **kwargs):
            """
            Apply target function to the casted to pandas frame.

            This function is directly applied to the casted to pandas frame, executes target
            function under it and processes result so it is possible to create a valid
            query compiler from it.
            """
            # pandas default implementation doesn't know how to handle `dtypes` keyword argument
            kwargs.pop("dtypes", None)
            df = cls.frame_wrapper(df)
            result = fn(df, *args, **kwargs)

            if (
                not isinstance(result, pandas.Series)
                and not isinstance(result, pandas.DataFrame)
                and func not in ("to_numpy", pandas.DataFrame.to_numpy)
                and func not in ("align", pandas.DataFrame.align)
                and func not in ("divmod", pandas.Series.divmod)
                and func not in ("rdivmod", pandas.Series.rdivmod)
                and func not in ("to_list", pandas.Series.to_list)
                and func not in ("corr", pandas.Series.corr)
                and func not in ("to_dict", pandas.Series.to_dict)
                and func not in ("mean", pandas.DataFrame.mean)
                and func not in ("median", pandas.DataFrame.median)
                and func not in ("skew", pandas.DataFrame.skew)
                and func not in ("kurt", pandas.DataFrame.kurt)
            ):
                # When applying a DatetimeProperties or TimedeltaProperties function,
                # if we don't specify the dtype for the DataFrame, the frame might
                # get the wrong dtype, e.g. for to_pydatetime in
                # https://github.com/modin-project/modin/issues/4436
                astype_kwargs = {}
                dtype = getattr(result, "dtype", None)
                if dtype and isinstance(
                    df,
                    (
                        pandas.core.indexes.accessors.DatetimeProperties,
                        pandas.core.indexes.accessors.TimedeltaProperties,
                    ),
                ):
                    astype_kwargs["dtype"] = dtype
                result = (
                    pandas.DataFrame(result, **astype_kwargs)
                    if is_list_like(result)
                    else pandas.DataFrame([result], **astype_kwargs)
                )
            if isinstance(result, pandas.Series):
                if result.name is None:
                    result.name = MODIN_UNNAMED_SERIES_LABEL
                result = result.to_frame()

            inplace_method = kwargs.get("inplace", False)
            if inplace is not None:
                inplace_method = inplace
            return result if not inplace_method else df

        return cls.build_wrapper(applyier, fn_name)

    @classmethod
    # FIXME: this method is almost a duplicate of `cls.build_default_to_pandas`.
    # Those two methods should be merged into a single one.
    def build_wrapper(cls, fn, fn_name):
        """
        Build function that do fallback to pandas for passed `fn`.

        In comparison with ``cls.build_default_to_pandas`` this method also
        casts function arguments to pandas before doing fallback.

        Parameters
        ----------
        fn : callable
            Function to apply to the defaulted frame.
        fn_name : str
            Function name which will be shown in default-to-pandas warning message.

        Returns
        -------
        callable
            Method that does fallback to pandas and applies `fn` to the pandas frame.
        """
        wrapper = cls.build_default_to_pandas(fn, fn_name)

        def args_cast(self, *args, **kwargs):
            """
            Preprocess `default_to_pandas` function arguments and apply default function.

            Cast all Modin objects that function arguments contain to its pandas representation.
            """
            args = try_cast_to_pandas(args)
            kwargs = try_cast_to_pandas(kwargs)
            return wrapper(self, *args, **kwargs)

        return args_cast

    @classmethod
    def build_property_wrapper(cls, prop):
        """
        Build function that accesses specified property of the frame.

        Parameters
        ----------
        prop : str
            Property name to access.

        Returns
        -------
        callable
            Function that takes DataFrame and returns its value of `prop` property.
        """

        def property_wrapper(df):
            """Get specified property of the passed object."""
            return prop.fget(df)

        return property_wrapper

    @classmethod
    def build_default_to_pandas(cls, fn, fn_name):
        """
        Build function that do fallback to pandas for passed `fn`.

        Parameters
        ----------
        fn : callable
            Function to apply to the defaulted frame.
        fn_name : str
            Function name which will be shown in default-to-pandas warning message.

        Returns
        -------
        callable
            Method that does fallback to pandas and applies `fn` to the pandas frame.
        """
        fn.__name__ = f"<function {cls.OBJECT_TYPE}.{fn_name}>"

        def wrapper(self, *args, **kwargs):
            """Do fallback to pandas for the specified function."""
            return self.default_to_pandas(fn, *args, **kwargs)

        return wrapper

    @classmethod
    def frame_wrapper(cls, df):
        """
        Extract frame property to apply function on.

        This method is executed under casted to pandas frame right before applying
        a function passed to `register`, which gives an ability to transform frame somehow
        or access its properties, by overriding this method in a child class.

        Parameters
        ----------
        df : pandas.DataFrame

        Returns
        -------
        pandas.DataFrame

        Notes
        -----
        Being a base implementation, this particular method does nothing with passed frame.
        """
        return df


================================================
FILE: modin/core/dataframe/algebra/default2pandas/groupby.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default GroupBy functions builder class."""

import warnings
from typing import Any

import pandas
from pandas.core.dtypes.common import is_list_like

# Defines a set of string names of functions that are executed in a transform-way in groupby
from pandas.core.groupby.base import transformation_kernels

from modin.utils import MODIN_UNNAMED_SERIES_LABEL, hashable

from .default import DefaultMethod


# FIXME: there is no sence of keeping `GroupBy` and `GroupByDefault` logic in a different
# classes. They should be combined.
class GroupBy:
    """Builder for GroupBy aggregation functions."""

    agg_aliases = [
        "agg",
        "dict_agg",
        pandas.core.groupby.DataFrameGroupBy.agg,
        pandas.core.groupby.DataFrameGroupBy.aggregate,
    ]

    @staticmethod
    def is_transformation_kernel(agg_func: Any) -> bool:
        """
        Check whether a passed aggregation function is a transformation.

        Transformation means that the result of the function will be broadcasted
        to the frame's original shape.

        Parameters
        ----------
        agg_func : Any

        Returns
        -------
        bool
        """
        return hashable(agg_func) and agg_func in transformation_kernels.union(
            # these methods are also producing transpose-like result in a sense we understand it
            # (they're non-aggregative functions), however are missing in the pandas dictionary
            {"nth", "head", "tail"}
        )

    @classmethod
    def _call_groupby(cls, df, *args, **kwargs):  # noqa: PR01
        """Call .groupby() on passed `df`."""
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            return df.groupby(*args, **kwargs)

    @classmethod
    def validate_by(cls, by):
        """
        Build valid `by` parameter for `pandas.DataFrame.groupby`.

        Cast all DataFrames in `by` parameter to Series or list of Series in case
        of multi-column frame.

        Parameters
        ----------
        by : DateFrame, Series, index label or list of such
            Object which indicates groups for GroupBy.

        Returns
        -------
        Series, index label or list of such
            By parameter with all DataFrames casted to Series.
        """

        def try_cast_series(df):
            """Cast one-column frame to Series."""
            if isinstance(df, pandas.DataFrame):
                df = df.squeeze(axis=1)
            if not isinstance(df, pandas.Series):
                return df
            if df.name == MODIN_UNNAMED_SERIES_LABEL:
                df.name = None
            return df

        if isinstance(by, pandas.DataFrame):
            by = [try_cast_series(column) for _, column in by.items()]
        elif isinstance(by, pandas.Series):
            by = [try_cast_series(by)]
        elif isinstance(by, list):
            by = [try_cast_series(o) for o in by]
        return by

    @classmethod
    def inplace_applyier_builder(cls, key, func=None):
        """
        Bind actual aggregation function to the GroupBy aggregation method.

        Parameters
        ----------
        key : callable
            Function that takes GroupBy object and evaluates passed aggregation function.
        func : callable or str, optional
            Function that takes DataFrame and aggregate its data. Will be applied
            to each group at the grouped frame.

        Returns
        -------
        callable,
            Function that executes aggregation under GroupBy object.
        """
        inplace_args = [] if func is None else [func]

        def inplace_applyier(grp, *func_args, **func_kwargs):
            return key(grp, *inplace_args, *func_args, **func_kwargs)

        return inplace_applyier

    @classmethod
    def get_func(cls, key, **kwargs):
        """
        Extract aggregation function from groupby arguments.

        Parameters
        ----------
        key : callable or str
            Default aggregation function. If aggregation function is not specified
            via groupby arguments, then `key` function is used.
        **kwargs : dict
            GroupBy arguments that may contain aggregation function.

        Returns
        -------
        callable
            Aggregation function.

        Notes
        -----
        There are two ways of how groupby aggregation can be invoked:
            1. Explicitly with query compiler method: `qc.groupby_sum()`.
            2. By passing aggregation function as an argument: `qc.groupby_agg("sum")`.
        Both are going to produce the same result, however in the first case actual aggregation
        function can be extracted from the method name, while for the second only from the method arguments.
        """
        if "agg_func" in kwargs:
            return cls.inplace_applyier_builder(key, kwargs["agg_func"])
        elif "func_dict" in kwargs:
            return cls.inplace_applyier_builder(key, kwargs["func_dict"])
        else:
            return cls.inplace_applyier_builder(key)

    @classmethod
    def build_aggregate_method(cls, key):
        """
        Build function for `QueryCompiler.groupby_agg` that can be executed as default-to-pandas.

        Parameters
        ----------
        key : callable or str
            Default aggregation function. If aggregation function is not specified
            via groupby arguments, then `key` function is used.

        Returns
        -------
        callable
            Function that executes groupby aggregation.
        """

        def fn(
            df,
            by,
            axis,
            groupby_kwargs,
            agg_args,
            agg_kwargs,
            drop=False,
            **kwargs,
        ):
            """Group DataFrame and apply aggregation function to each group."""
            by = cls.validate_by(by)

            grp = cls._call_groupby(df, by, axis=axis, **groupby_kwargs)
            agg_func = cls.get_func(key, **kwargs)
            result = agg_func(grp, *agg_args, **agg_kwargs)

            return result

        return fn

    @classmethod
    def build_groupby_reduce_method(cls, agg_func):
        """
        Build function for `QueryCompiler.groupby_*` that can be executed as default-to-pandas.

        Parameters
        ----------
        agg_func : callable or str
            Default aggregation function. If aggregation function is not specified
            via groupby arguments, then `agg_func` function is used.

        Returns
        -------
        callable
            Function that executes groupby aggregation.
        """

        def fn(
            df, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False, **kwargs
        ):
            """Group DataFrame and apply aggregation function to each group."""
            if not isinstance(by, (pandas.Series, pandas.DataFrame)):
                by = cls.validate_by(by)
                grp = cls._call_groupby(df, by, axis=axis, **groupby_kwargs)
                grp_agg_func = cls.get_func(agg_func, **kwargs)
                return grp_agg_func(
                    grp,
                    *agg_args,
                    **agg_kwargs,
                )

            if isinstance(by, pandas.DataFrame):
                by = by.squeeze(axis=1)
            if (
                drop
                and isinstance(by, pandas.Series)
                and by.name in df
                and df[by.name].equals(by)
            ):
                by = [by.name]
            if isinstance(by, pandas.DataFrame):
                df = pandas.concat([df] + [by[[o for o in by if o not in df]]], axis=1)
                by = list(by.columns)

            groupby_kwargs = groupby_kwargs.copy()
            as_index = groupby_kwargs.pop("as_index", True)
            groupby_kwargs["as_index"] = True

            grp = cls._call_groupby(df, by, axis=axis, **groupby_kwargs)
            func = cls.get_func(agg_func, **kwargs)
            result = func(grp, *agg_args, **agg_kwargs)
            method = kwargs.get("method")

            if isinstance(result, pandas.Series):
                result = result.to_frame(
                    MODIN_UNNAMED_SERIES_LABEL if result.name is None else result.name
                )

            if not as_index:
                if isinstance(by, pandas.Series):
                    # 1. If `drop` is True then 'by' Series represents a column from the
                    #    source frame and so the 'by' is internal.
                    # 2. If method is 'size' then any 'by' is considered to be internal.
                    #    This is a hacky legacy from the ``groupby_size`` implementation:
                    #    https://github.com/modin-project/modin/issues/3739
                    internal_by = (by.name,) if drop or method == "size" else tuple()
                else:
                    internal_by = by

                cls.handle_as_index_for_dataframe(
                    result,
                    internal_by,
                    by_cols_dtypes=(
                        df.index.dtypes.values
                        if isinstance(df.index, pandas.MultiIndex)
                        else (df.index.dtype,)
                    ),
                    by_length=len(by),
                    drop=drop,
                    method=method,
                    inplace=True,
                )

            if result.index.name == MODIN_UNNAMED_SERIES_LABEL:
                result.index.name = None

            return result

        return fn

    @classmethod
    def is_aggregate(cls, key):  # noqa: PR01
        """Check whether `key` is an alias for pandas.GroupBy.aggregation method."""
        return key in cls.agg_aliases

    @classmethod
    def build_groupby(cls, func):
        """
        Build function that groups DataFrame and applies aggregation function to the every group.

        Parameters
        ----------
        func : callable or str
            Default aggregation function. If aggregation function is not specified
            via groupby arguments, then `func` function is used.

        Returns
        -------
        callable
            Function that takes pandas DataFrame and does GroupBy aggregation.
        """
        if cls.is_aggregate(func):
            return cls.build_aggregate_method(func)
        return cls.build_groupby_reduce_method(func)

    @classmethod
    def handle_as_index_for_dataframe(
        cls,
        result,
        internal_by_cols,
        by_cols_dtypes=None,
        by_length=None,
        selection=None,
        partition_idx=0,
        drop=True,
        method=None,
        inplace=False,
    ):
        """
        Handle `as_index=False` parameter for the passed GroupBy aggregation result.

        Parameters
        ----------
        result : DataFrame
            Frame containing GroupBy aggregation result computed with `as_index=True`
            parameter (group names are located at the frame's index).
        internal_by_cols : list-like
            Internal 'by' columns.
        by_cols_dtypes : list-like, optional
            Data types of the internal 'by' columns. Required to do special casing
            in case of categorical 'by'. If not specified, assume that there is no
            categorical data in 'by'.
        by_length : int, optional
            Amount of keys to group on (including frame columns and external objects like list, Series, etc.)
            If not specified, consider `by_length` to be equal ``len(internal_by_cols)``.
        selection : label or list of labels, optional
            Set of columns that were explicitly selected for aggregation (for example
            via dict-aggregation). If not specified assuming that aggregation was
            applied to all of the available columns.
        partition_idx : int, default: 0
            Positional index of the current partition.
        drop : bool, default: True
            Indicates whether or not any of the `by` data came from the same frame.
        method : str, optional
            Name of the groupby function. This is a hint to be able to do special casing.
            Note: this parameter is a legacy from the ``groupby_size`` implementation,
            it's a hacky one and probably will be removed in the future: https://github.com/modin-project/modin/issues/3739.
        inplace : bool, default: False
            Modify the DataFrame in place (do not create a new object).

        Returns
        -------
        DataFrame
            GroupBy aggregation result with the considered `as_index=False` parameter.
        """
        if not inplace:
            result = result.copy()

        reset_index, drop, lvls_to_drop, cols_to_drop = cls.handle_as_index(
            result_cols=result.columns,
            result_index_names=result.index.names,
            internal_by_cols=internal_by_cols,
            by_cols_dtypes=by_cols_dtypes,
            by_length=by_length,
            selection=selection,
            partition_idx=partition_idx,
            drop=drop,
            method=method,
        )

        if len(lvls_to_drop) > 0:
            result.index = result.index.droplevel(lvls_to_drop)
        if len(cols_to_drop) > 0:
            result.drop(columns=cols_to_drop, inplace=True)
        if reset_index:
            result.reset_index(drop=drop, inplace=True)
        return result

    @staticmethod
    def handle_as_index(
        result_cols,
        result_index_names,
        internal_by_cols,
        by_cols_dtypes=None,
        by_length=None,
        selection=None,
        partition_idx=0,
        drop=True,
        method=None,
    ):
        """
        Compute hints to process ``as_index=False`` parameter for the GroupBy result.

        This function resolves naming conflicts of the index levels to insert and the column labels
        for the GroupBy result. The logic of this function assumes that the initial GroupBy result
        was computed as ``as_index=True``.

        Parameters
        ----------
        result_cols : pandas.Index
            Columns of the GroupBy result.
        result_index_names : list-like
            Index names of the GroupBy result.
        internal_by_cols : list-like
            Internal 'by' columns.
        by_cols_dtypes : list-like, optional
            Data types of the internal 'by' columns. Required to do special casing
            in case of categorical 'by'. If not specified, assume that there is no
            categorical data in 'by'.
        by_length : int, optional
            Amount of keys to group on (including frame columns and external objects like list, Series, etc.)
            If not specified, consider `by_length` to be equal ``len(internal_by_cols)``.
        selection : label or list of labels, optional
            Set of columns that were explicitly selected for aggregation (for example
            via dict-aggregation). If not specified assuming that aggregation was
            applied to all of the available columns.
        partition_idx : int, default: 0
            Positional index of the current partition.
        drop : bool, default: True
            Indicates whether or not any of the `by` data came from the same frame.
        method : str, optional
            Name of the groupby function. This is a hint to be able to do special casing.
            Note: this parameter is a legacy from the ``groupby_size`` implementation,
            it's a hacky one and probably will be removed in the future: https://github.com/modin-project/modin/issues/3739.

        Returns
        -------
        reset_index : bool
            Indicates whether to reset index to the default one (0, 1, 2 ... n) at this partition.
        drop_index : bool
            If `reset_index` is True, indicates whether to drop all index levels (True) or insert them into the
            resulting columns (False).
        lvls_to_drop : list of ints
            Contains numeric indices of the levels of the result index to drop as intersected.
        cols_to_drop : list of labels
            Contains labels of the columns to drop from the result as intersected.

        Examples
        --------
        >>> groupby_result = compute_groupby_without_processing_as_index_parameter()
        >>> if not as_index:
        >>>     reset_index, drop, lvls_to_drop, cols_to_drop = handle_as_index(**extract_required_params(groupby_result))
        >>>     if len(lvls_to_drop) > 0:
        >>>         groupby_result.index = groupby_result.index.droplevel(lvls_to_drop)
        >>>     if len(cols_to_drop) > 0:
        >>>         groupby_result = groupby_result.drop(columns=cols_to_drop)
        >>>     if reset_index:
        >>>         groupby_result_with_processed_as_index_parameter = groupby_result.reset_index(drop=drop)
        >>> else:
        >>>     groupby_result_with_processed_as_index_parameter = groupby_result
        """
        if by_length is None:
            by_length = len(internal_by_cols)

        reset_index = method != "transform" and (by_length > 0 or selection is not None)

        # If the method is "size" then the result contains only one unique named column
        # and we don't have to worry about any naming conflicts, so inserting all of
        # the "by" into the result (just a fast-path)
        if method == "size":
            return reset_index, False, [], []

        # Pandas logic of resolving naming conflicts is the following:
        #   1. If any categorical is in 'by' and 'by' is multi-column, then the categorical
        #      index is prioritized: drop intersected columns and insert all of the 'by' index
        #      levels to the frame as columns.
        #   2. Otherwise, aggregation result is prioritized: drop intersected index levels and
        #      insert the filtered ones to the frame as columns.
        if by_cols_dtypes is not None:
            keep_index_levels = (
                by_length > 1
                and selection is None
                and any(isinstance(x, pandas.CategoricalDtype) for x in by_cols_dtypes)
            )
        else:
            keep_index_levels = False

        # 1. We insert 'by'-columns to the result at the beginning of the frame and so only to the
        #    first partition, if partition_idx != 0 we just drop the index. If there are no columns
        #    that are required to drop (keep_index_levels is True) then we can exit here.
        # 2. We don't insert 'by'-columns to the result if 'by'-data came from a different
        #    frame (drop is False), there's only one exception for this rule: if the `method` is "size",
        #    so if (drop is False) and method is not "size" we just drop the index and so can exit here.
        if (not keep_index_levels and partition_idx != 0) or (
            not drop and method != "size"
        ):
            return reset_index, True, [], []

        if not isinstance(internal_by_cols, pandas.Index):
            if not is_list_like(internal_by_cols):
                internal_by_cols = [internal_by_cols]
            internal_by_cols = pandas.Index(internal_by_cols)

        internal_by_cols = (
            internal_by_cols[
                ~internal_by_cols.str.startswith(MODIN_UNNAMED_SERIES_LABEL, na=False)
            ]
            if hasattr(internal_by_cols, "str")
            else internal_by_cols
        )

        if selection is not None and not isinstance(selection, pandas.Index):
            selection = pandas.Index(selection)

        lvls_to_drop = []
        cols_to_drop = []

        if not keep_index_levels:
            # We want to insert only these internal-by-cols that are not presented
            # in the result in order to not create naming conflicts
            if selection is None:
                cols_to_insert = frozenset(internal_by_cols) - frozenset(result_cols)
            else:
                cols_to_insert = frozenset(
                    # We have to use explicit 'not in' check and not just difference
                    # of sets because of specific '__contains__' operator in case of
                    # scalar 'col' and MultiIndex 'selection'.
                    col
                    for col in internal_by_cols
                    if col not in selection
                )
        else:
            cols_to_insert = internal_by_cols
            # We want to drop such internal-by-cols that are presented
            # in the result in order to not create naming conflicts
            cols_to_drop = frozenset(internal_by_cols) & frozenset(result_cols)

        if partition_idx == 0:
            lvls_to_drop = [
                i
                for i, name in enumerate(result_index_names)
                if name not in cols_to_insert
            ]
        else:
            lvls_to_drop = result_index_names

        drop = False
        if len(lvls_to_drop) == len(result_index_names):
            drop = True
            lvls_to_drop = []

        return reset_index, drop, lvls_to_drop, cols_to_drop


class SeriesGroupBy(GroupBy):
    """Builder for GroupBy aggregation functions for Series."""

    @classmethod
    def _call_groupby(cls, df, *args, **kwargs):  # noqa: PR01
        """Call .groupby() on passed `df` squeezed to Series."""
        # We can end up here by two means - either by "true" call
        # like Series().groupby() or by df.groupby()[item].

        if len(df.columns) == 1:
            # Series().groupby() case
            return df.squeeze(axis=1).groupby(*args, **kwargs)
        # In second case surrounding logic will supplement grouping columns,
        # so we need to drop them after grouping is over; our originally
        # selected column is always the first, so use it
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            return df.groupby(*args, **kwargs)[df.columns[0]]


class GroupByDefault(DefaultMethod):
    """Builder for default-to-pandas GroupBy aggregation functions."""

    _groupby_cls = GroupBy

    OBJECT_TYPE = "GroupBy"

    @classmethod
    def register(cls, func, **kwargs):
        """
        Build default-to-pandas GroupBy aggregation function.

        Parameters
        ----------
        func : callable or str
            Default aggregation function. If aggregation function is not specified
            via groupby arguments, then `func` function is used.
        **kwargs : kwargs
            Additional arguments that will be passed to function builder.

        Returns
        -------
        callable
            Functiom that takes query compiler and defaults to pandas to do GroupBy
            aggregation.
        """
        return super().register(
            cls._groupby_cls.build_groupby(func), fn_name=func.__name__, **kwargs
        )

    # This specifies a `pandas.DataFrameGroupBy` method to pass the `agg_func` to,
    # it's based on `how` to apply it. Going by pandas documentation:
    #   1. `.aggregate(func)` applies func row/column wise.
    #   2. `.apply(func)` applies func to a DataFrames, holding a whole group (group-wise).
    #   3. `.transform(func)` is the same as `.apply()` but also broadcast the `func`
    #      result to the group's original shape.
    #   4. 'direct' mode means that the passed `func` has to be applied directly
    #      to the `pandas.DataFrameGroupBy` object.
    _aggregation_methods_dict = {
        "axis_wise": pandas.core.groupby.DataFrameGroupBy.aggregate,
        "group_wise": pandas.core.groupby.DataFrameGroupBy.apply,
        "transform": pandas.core.groupby.DataFrameGroupBy.transform,
        "direct": lambda grp, func, *args, **kwargs: func(grp, *args, **kwargs),
    }

    @classmethod
    def get_aggregation_method(cls, how):
        """
        Return `pandas.DataFrameGroupBy` method that implements the passed `how` UDF applying strategy.

        Parameters
        ----------
        how : {"axis_wise", "group_wise", "transform"}
            `how` parameter of the ``BaseQueryCompiler.groupby_agg``.

        Returns
        -------
        callable(pandas.DataFrameGroupBy, callable, *args, **kwargs) -> [pandas.DataFrame | pandas.Series]

        Notes
        -----
        Visit ``BaseQueryCompiler.groupby_agg`` doc-string for more information about `how` parameter.
        """
        return cls._aggregation_methods_dict[how]


class SeriesGroupByDefault(GroupByDefault):
    """Builder for default-to-pandas GroupBy aggregation functions for Series."""

    _groupby_cls = SeriesGroupBy

    _aggregation_methods_dict = {
        "axis_wise": pandas.core.groupby.SeriesGroupBy.aggregate,
        "group_wise": pandas.core.groupby.SeriesGroupBy.apply,
        "transform": pandas.core.groupby.SeriesGroupBy.transform,
        "direct": lambda grp, func, *args, **kwargs: func(grp, *args, **kwargs),
    }


================================================
FILE: modin/core/dataframe/algebra/default2pandas/list.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default applied-on-list accessor functions builder class."""

from .series import SeriesDefault


class ListDefault(SeriesDefault):
    """Builder for default-to-pandas methods which is executed under list accessor."""

    @classmethod
    def frame_wrapper(cls, df):
        """
        Get list accessor of the passed frame.

        Parameters
        ----------
        df : pandas.DataFrame

        Returns
        -------
        pandas.core.arrays.arrow.ListAccessor
        """
        return df.squeeze(axis=1).list


================================================
FILE: modin/core/dataframe/algebra/default2pandas/resample.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default Resamle functions builder class."""

from .default import DefaultMethod


# FIXME: there is no sence of keeping `Resampler` and `ResampleDefault` logic in a different
# classes. They should be combined.
class Resampler:
    """Builder class for resampled aggregation functions."""

    @classmethod
    def build_resample(cls, func, squeeze_self):
        """
        Build function that resamples time-series data and does aggregation.

        Parameters
        ----------
        func : callable
            Aggregation function to execute under resampled frame.
        squeeze_self : bool
            Whether or not to squeeze frame before resampling.

        Returns
        -------
        callable
            Function that takes pandas DataFrame and applies aggregation
            to resampled time-series data.
        """

        def fn(df, resample_kwargs, *args, **kwargs):
            """Resample time-series data of the passed frame and apply specified aggregation."""
            if squeeze_self:
                df = df.squeeze(axis=1)
            resampler = df.resample(**resample_kwargs)

            if type(func) is property:
                return func.fget(resampler)

            return func(resampler, *args, **kwargs)

        return fn


class ResampleDefault(DefaultMethod):
    """Builder for default-to-pandas resampled aggregation functions."""

    OBJECT_TYPE = "Resampler"

    @classmethod
    def register(cls, func, squeeze_self=False, **kwargs):
        """
        Build function that do fallback to pandas and aggregate resampled data.

        Parameters
        ----------
        func : callable
            Aggregation function to execute under resampled frame.
        squeeze_self : bool, default: False
            Whether or not to squeeze frame before resampling.
        **kwargs : kwargs
            Additional arguments that will be passed to function builder.

        Returns
        -------
        callable
            Function that takes query compiler and does fallback to pandas to resample
            time-series data and apply aggregation on it.
        """
        return super().register(
            Resampler.build_resample(func, squeeze_self),
            fn_name=func.__name__,
            **kwargs
        )


================================================
FILE: modin/core/dataframe/algebra/default2pandas/rolling.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default Rolling functions builder class."""

from .default import DefaultMethod


class RollingDefault(DefaultMethod):
    """Builder for default-to-pandas aggregation on a rolling window functions."""

    OBJECT_TYPE = "Rolling"

    @classmethod
    def _build_rolling(cls, func):
        """
        Build function that creates a rolling window and executes `func` on it.

        Parameters
        ----------
        func : callable
            Function to execute on a rolling window.

        Returns
        -------
        callable
            Function that takes pandas DataFrame and applies `func` on a rolling window.
        """

        def fn(df, rolling_kwargs, *args, **kwargs):
            """Create rolling window for the passed frame and execute specified `func` on it."""
            roller = df.rolling(**rolling_kwargs)

            if type(func) is property:
                return func.fget(roller)

            return func(roller, *args, **kwargs)

        return fn

    @classmethod
    def register(cls, func, **kwargs):
        """
        Build function that do fallback to pandas to apply `func` on a rolling window.

        Parameters
        ----------
        func : callable
            Function to execute on a rolling window.
        **kwargs : kwargs
            Additional arguments that will be passed to function builder.

        Returns
        -------
        callable
            Function that takes query compiler and defaults to pandas to apply aggregation
            `func` on a rolling window.
        """
        return super().register(
            cls._build_rolling(func), fn_name=func.__name__, **kwargs
        )


class ExpandingDefault(DefaultMethod):
    """Builder for default-to-pandas aggregation on an expanding window functions."""

    OBJECT_TYPE = "Expanding"

    @classmethod
    def _build_expanding(cls, func, squeeze_self):
        """
        Build function that creates an expanding window and executes `func` on it.

        Parameters
        ----------
        func : callable
            Function to execute on a expanding window.
        squeeze_self : bool
            Whether or not to squeeze frame before executing the window function.

        Returns
        -------
        callable
            Function that takes pandas DataFrame and applies `func` on a expanding window.
        """

        def fn(df, rolling_args, *args, **kwargs):
            """Create rolling window for the passed frame and execute specified `func` on it."""
            if squeeze_self:
                df = df.squeeze(axis=1)
            roller = df.expanding(*rolling_args)

            if type(func) is property:
                return func.fget(roller)

            return func(roller, *args, **kwargs)

        return fn

    @classmethod
    def register(cls, func, squeeze_self=False, **kwargs):
        """
        Build function that do fallback to pandas to apply `func` on a expanding window.

        Parameters
        ----------
        func : callable
            Function to execute on an expanding window.
        squeeze_self : bool, default: False
            Whether or not to squeeze frame before executing the window function.
        **kwargs : kwargs
            Additional arguments that will be passed to function builder.

        Returns
        -------
        callable
            Function that takes query compiler and defaults to pandas to apply aggregation
            `func` on an expanding window.
        """
        return super().register(
            cls._build_expanding(func, squeeze_self=squeeze_self),
            fn_name=func.__name__,
            **kwargs
        )


================================================
FILE: modin/core/dataframe/algebra/default2pandas/series.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default Series functions builder class."""

from .default import DefaultMethod


class SeriesDefault(DefaultMethod):
    """Builder for default-to-pandas methods which is executed under Series."""

    OBJECT_TYPE = "Series"

    @classmethod
    def frame_wrapper(cls, df):
        """
        Squeeze passed DataFrame to be able to process Series-specific functions on it.

        Parameters
        ----------
        df : pandas.DataFrame
            One-column DataFrame to squeeze.

        Returns
        -------
        pandas.Series
        """
        return df.squeeze(axis=1)


================================================
FILE: modin/core/dataframe/algebra/default2pandas/str.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default applied-on-str functions builder class."""

from .series import SeriesDefault


class StrDefault(SeriesDefault):
    """Builder for default-to-pandas methods which is executed under `str` accessor."""

    @classmethod
    def frame_wrapper(cls, df):
        """
        Get `str` accessor of the passed frame.

        Parameters
        ----------
        df : pandas.DataFrame

        Returns
        -------
        pandas.core.strings.accessor.StringMethods
        """
        return df.squeeze(axis=1).str


================================================
FILE: modin/core/dataframe/algebra/default2pandas/struct.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses default applied-on-struct accessor functions builder class."""

from .series import SeriesDefault


class StructDefault(SeriesDefault):
    """Builder for default-to-pandas methods which is executed under struct accessor."""

    @classmethod
    def frame_wrapper(cls, df):
        """
        Get struct accessor of the passed frame.

        Parameters
        ----------
        df : pandas.DataFrame

        Returns
        -------
        pandas.core.arrays.arrow.StructAccessor
        """
        return df.squeeze(axis=1).struct


================================================
FILE: modin/core/dataframe/algebra/fold.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses builder class for Fold operator."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Optional

from .operator import Operator

if TYPE_CHECKING:
    import pandas

    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class Fold(Operator):
    """Builder class for Fold functions."""

    @classmethod
    def register(
        cls, fold_function: Callable[..., pandas.DataFrame], shape_preserved=False
    ) -> Callable[..., PandasQueryCompiler]:
        """
        Build Fold operator that will be performed across rows/columns.

        Parameters
        ----------
        fold_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
            Function to apply across rows/columns.
        shape_preserved : bool, default: False
            Whether the shape of the dataframe is preserved or not
            after applying a function.

        Returns
        -------
        callable
            Function that takes query compiler and executes Fold function.
        """

        def caller(
            query_compiler: PandasQueryCompiler,
            fold_axis: Optional[int] = None,
            *args: tuple,
            new_index=None,
            new_columns=None,
            **kwargs: dict,
        ) -> PandasQueryCompiler:
            """
            Execute Fold function against passed query compiler.

            Parameters
            ----------
            query_compiler : PandasQueryCompiler
                The query compiler to execute the function on.
            fold_axis : int, optional
                0 or None means apply across full column partitions. 1 means
                apply across full row partitions.
            *args : tuple
                Additional arguments passed to `fold_function`.
            new_index : list-like, optional
                The index of the result.
            new_columns : list-like, optional
                The columns of the result.
            **kwargs: dict
                Additional keyword arguments passed to `fold_function`.

            Returns
            -------
            PandasQueryCompiler
                A new query compiler representing the result of executing the
                function.
            """
            return query_compiler.__constructor__(
                query_compiler._modin_frame.fold(
                    cls.validate_axis(fold_axis),
                    lambda x: fold_function(x, *args, **kwargs),
                    new_index=new_index,
                    new_columns=new_columns,
                    shape_preserved=shape_preserved,
                )
            )

        return caller


================================================
FILE: modin/core/dataframe/algebra/groupby.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses builder class for GroupByReduce operator."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Optional, Union

import pandas

from modin.core.dataframe.pandas.metadata import ModinIndex
from modin.error_message import ErrorMessage
from modin.utils import MODIN_UNNAMED_SERIES_LABEL, hashable

from .default2pandas.groupby import GroupBy, GroupByDefault
from .tree_reduce import TreeReduce

if TYPE_CHECKING:
    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class GroupByReduce(TreeReduce):
    """
    Builder class for GroupBy aggregation functions.

    Attributes
    ----------
    ID_LEVEL_NAME : str
        It's supposed that implementations may produce multiple temporary
        columns per one source column in an intermediate phase. In order
        for these columns to be processed accordingly at the Reduce phase,
        an implementation must store unique names for such temporary
        columns in the ``ID_LEVEL_NAME`` level. Duplicated names are not allowed.
    _GROUPBY_REDUCE_IMPL_FLAG : str
        Attribute indicating that a callable should be treated as an
        implementation for one of the TreeReduce phases rather than an
        arbitrary aggregation. Note: this attribute should be considered private.
    """

    ID_LEVEL_NAME: str = "__ID_LEVEL_NAME__"
    _GROUPBY_REDUCE_IMPL_FLAG: str = "__groupby_reduce_impl_func__"

    @classmethod
    def register(
        cls,
        map_func: Union[str, dict, Callable[..., pandas.DataFrame]],
        reduce_func: Optional[Union[str, dict, Callable[..., pandas.DataFrame]]] = None,
        **call_kwds: dict,
    ) -> Callable[..., PandasQueryCompiler]:
        """
        Build template GroupBy aggregation function.

        Resulted function is applied in parallel via TreeReduce algorithm.

        Parameters
        ----------
        map_func : str, dict or callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the map phase. If ``str`` was passed it will
            be treated as a DataFrameGroupBy's method name.
        reduce_func : str, dict or callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame, optional
            Function to apply to the ``DataFrameGroupBy`` at the reduce phase. If not specified
            will be set the same as 'map_func'.
        **call_kwds : dict
            Kwargs that will be passed to the returned function.

        Returns
        -------
        callable
            Function that takes query compiler and executes GroupBy aggregation
            with TreeReduce algorithm.
        """
        if reduce_func is None:
            reduce_func = map_func

        def build_fn(name):
            return lambda df, *args, **kwargs: getattr(df, name)(*args, **kwargs)

        if isinstance(map_func, str):
            map_func = build_fn(map_func)
        if isinstance(reduce_func, str):
            reduce_func = build_fn(reduce_func)

        assert not (
            isinstance(map_func, dict) ^ isinstance(reduce_func, dict)
        ) and not (
            callable(map_func) ^ callable(reduce_func)
        ), "Map and reduce functions must be either both dict or both callable."

        return lambda *args, **kwargs: cls.caller(
            *args, map_func=map_func, reduce_func=reduce_func, **kwargs, **call_kwds
        )

    @classmethod
    def register_implementation(
        cls,
        map_func: Callable[..., pandas.DataFrame],
        reduce_func: Callable[..., pandas.DataFrame],
    ) -> None:
        """
        Register callables to be recognized as an implementations of tree-reduce phases.

        Parameters
        ----------
        map_func : callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame
            Callable to register.
        reduce_func : callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame
            Callable to register.
        """
        setattr(map_func, cls._GROUPBY_REDUCE_IMPL_FLAG, True)
        setattr(reduce_func, cls._GROUPBY_REDUCE_IMPL_FLAG, True)

    @classmethod
    def map(
        cls,
        df: pandas.DataFrame,
        map_func: Callable[..., pandas.DataFrame],
        axis: int,
        groupby_kwargs: dict,
        agg_args: list,
        agg_kwargs: dict,
        other: Optional[pandas.DataFrame] = None,
        by=None,
        drop: bool = False,
    ) -> pandas.DataFrame:
        """
        Execute Map phase of GroupByReduce.

        Groups DataFrame and applies map function. Groups will be
        preserved in the results index for the following reduce phase.

        Parameters
        ----------
        df : pandas.DataFrame
            Serialized frame to group.
        map_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject`.
        axis : {0, 1}
            Axis to group and apply aggregation function along. 0 means index axis
            when 1 means column axis.
        groupby_kwargs : dict
            Dictionary which carries arguments for `pandas.DataFrame.groupby`.
        agg_args : list-like
            Positional arguments to pass to the aggregation functions.
        agg_kwargs : dict
            Keyword arguments to pass to the aggregation functions.
        other : pandas.DataFrame, optional
            Serialized frame, whose columns are used to determine the groups.
            If not specified, `by` parameter is used.
        by : level index name or list of such labels, optional
            Index levels, that is used to determine groups.
            If not specified, `other` parameter is used.
        drop : bool, default: False
            Indicates whether or not by-data came from the `self` frame.

        Returns
        -------
        pandas.DataFrame
            GroupBy aggregation result for one particular partition.
        """
        # Set `as_index` to True to track the metadata of the grouping object
        # It is used to make sure that between phases we are constructing the
        # right index and placing columns in the correct order.
        groupby_kwargs["as_index"] = True
        groupby_kwargs["observed"] = True
        # We have to filter func-dict BEFORE inserting broadcasted 'by' columns
        # to avoid multiple aggregation results for 'by' cols in case they're
        # present in the func-dict:
        apply_func = cls.get_callable(
            map_func,
            df,
            # We won't be able to preserve the order as the Map phase would likely
            # produce some temporary columns that won't fit into the original
            # aggregation order. It doesn't matter much as we restore the original
            # order at the Reduce phase.
            preserve_aggregation_order=False,
        )
        if other is not None:
            # Other is a broadcasted partition that represents 'by' data to group on.
            # If 'drop' then the 'by' data came from the 'self' frame, thus
            # inserting missed columns to the partition to group on them.
            if drop or isinstance(
                other := other.squeeze(axis=axis ^ 1), pandas.DataFrame
            ):
                df = pandas.concat(
                    [df] + [other[[o for o in other if o not in df]]],
                    axis=1,
                )
                other = list(other.columns)
            by_part = other
        else:
            by_part = by

        result = apply_func(
            df.groupby(by=by_part, axis=axis, **groupby_kwargs), *agg_args, **agg_kwargs
        )
        # Result could not always be a frame, so wrapping it into DataFrame
        return pandas.DataFrame(result)

    @classmethod
    def reduce(
        cls,
        df: pandas.DataFrame,
        reduce_func: Union[dict, Callable[..., pandas.DataFrame]],
        axis: int,
        groupby_kwargs: dict,
        agg_args: list,
        agg_kwargs: dict,
        partition_idx: int = 0,
        drop: bool = False,
        method: Optional[str] = None,
        finalizer_fn: Optional[Callable[[pandas.DataFrame], pandas.DataFrame]] = None,
    ) -> pandas.DataFrame:
        """
        Execute Reduce phase of GroupByReduce.

        Combines groups from the Map phase and applies reduce function.

        Parameters
        ----------
        df : pandas.DataFrame
            Serialized frame which contain groups to combine.
        reduce_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject`.
        axis : {0, 1}
            Axis to group and apply aggregation function along. 0 means index axis
            when 1 means column axis.
        groupby_kwargs : dict
            Dictionary which carries arguments for `pandas.DataFrame.groupby`.
        agg_args : list-like
            Positional arguments to pass to the aggregation functions.
        agg_kwargs : dict
            Keyword arguments to pass to the aggregation functions.
        partition_idx : int, default: 0
            Internal index of column partition to which this function is applied.
        drop : bool, default: False
            Indicates whether or not by-data came from the `self` frame.
        method : str, optional
            Name of the groupby function. This is a hint to be able to do special casing.
        finalizer_fn : callable(pandas.DataFrame) -> pandas.DataFrame, optional
            A callable to execute at the end a groupby kernel against groupby result.

        Returns
        -------
        pandas.DataFrame
            GroupBy aggregation result.
        """
        # Wrapping names into an Index should be unnecessary, however
        # there is a bug in pandas with intersection that forces us to do so:
        # https://github.com/pandas-dev/pandas/issues/39699
        by_part = pandas.Index(df.index.names)

        groupby_kwargs = groupby_kwargs.copy()
        as_index = groupby_kwargs.get("as_index", True)

        # Set `as_index` to True to track the metadata of the grouping object
        groupby_kwargs["as_index"] = True

        # since now index levels contain out 'by', in the reduce phace
        # we want to group on these levels
        groupby_kwargs["level"] = list(range(len(df.index.names)))

        apply_func = cls.get_callable(reduce_func, df)
        result = apply_func(
            df.groupby(axis=axis, **groupby_kwargs), *agg_args, **agg_kwargs
        )

        if not as_index:
            idx = df.index
            GroupBy.handle_as_index_for_dataframe(
                result,
                by_part,
                by_cols_dtypes=(
                    idx.dtypes.values
                    if isinstance(idx, pandas.MultiIndex) and hasattr(idx, "dtypes")
                    else (idx.dtype,)
                ),
                by_length=len(by_part),
                selection=reduce_func.keys() if isinstance(reduce_func, dict) else None,
                partition_idx=partition_idx,
                drop=drop,
                method=method,
                inplace=True,
            )
        # Result could not always be a frame, so wrapping it into DataFrame
        result = pandas.DataFrame(result)
        if result.index.name == MODIN_UNNAMED_SERIES_LABEL:
            result.index.name = None

        return result if finalizer_fn is None else finalizer_fn(result)

    @classmethod
    def caller(
        cls,
        query_compiler: PandasQueryCompiler,
        by,
        map_func: Union[dict, Callable[..., pandas.DataFrame]],
        reduce_func: Union[dict, Callable[..., pandas.DataFrame]],
        axis: int,
        groupby_kwargs: dict,
        agg_args: list,
        agg_kwargs: dict,
        drop: bool = False,
        method: Optional[str] = None,
        default_to_pandas_func: Optional[Callable[..., pandas.DataFrame]] = None,
        finalizer_fn: Optional[Callable[[pandas.DataFrame], pandas.DataFrame]] = None,
    ) -> PandasQueryCompiler:
        """
        Execute GroupBy aggregation with TreeReduce approach.

        Parameters
        ----------
        query_compiler : PandasQueryCompiler
            Frame to group.
        by : PandasQueryCompiler, column or index label, Grouper or list of such
            Object that determine groups.
        map_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the Map phase.
        reduce_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the Reduce phase.
        axis : {0, 1}
            Axis to group and apply aggregation function along. 0 means index axis
            when 1 means column axis.
        groupby_kwargs : dict
            Dictionary which carries arguments for pandas.DataFrame.groupby.
        agg_args : list-like
            Positional arguments to pass to the aggregation functions.
        agg_kwargs : dict
            Keyword arguments to pass to the aggregation functions.
        drop : bool, default: False
            Indicates whether or not by-data came from the `self` frame.
        method : str, optional
            Name of the GroupBy aggregation function. This is a hint to be able to do special casing.
        default_to_pandas_func : callable(pandas.DataFrameGroupBy) -> pandas.DataFrame, optional
            The pandas aggregation function equivalent to the `map_func + reduce_func`.
            Used in case of defaulting to pandas. If not specified `map_func` is used.
        finalizer_fn : callable(pandas.DataFrame) -> pandas.DataFrame, optional
            A callable to execute at the end a groupby kernel against groupby result.

        Returns
        -------
        PandasQueryCompiler
            QueryCompiler which carries the result of GroupBy aggregation.
        """
        is_unsupported_axis = axis != 0
        # Defaulting to pandas in case of an empty frame as we can't process it properly.
        # Higher API level won't pass empty data here unless the frame has delayed
        # computations. So we apparently lose some laziness here (due to index access)
        # because of the inability to process empty groupby natively.
        is_empty_data = (
            len(query_compiler.columns) == 0 or len(query_compiler.index) == 0
        )
        is_grouping_using_by_arg = (
            groupby_kwargs.get("level", None) is None and by is not None
        )
        is_unsupported_by_arg = isinstance(by, pandas.Grouper) or (
            not hashable(by) and not isinstance(by, type(query_compiler))
        )

        if (
            is_unsupported_axis
            or is_empty_data
            or (is_grouping_using_by_arg and is_unsupported_by_arg)
        ):
            if default_to_pandas_func is None:
                default_to_pandas_func = (
                    (lambda grp: grp.agg(map_func))
                    if isinstance(map_func, dict)
                    else map_func
                )
            default_to_pandas_func = GroupByDefault.register(default_to_pandas_func)
            return default_to_pandas_func(
                query_compiler,
                by=by,
                axis=axis,
                groupby_kwargs=groupby_kwargs,
                agg_args=agg_args,
                agg_kwargs=agg_kwargs,
                drop=drop,
            )

        # The bug only occurs in the case of Categorical 'by', so we might want to check whether any of
        # the 'by' dtypes is Categorical before going into this branch, however triggering 'dtypes'
        # computation if they're not computed may take time, so we don't do it
        if not groupby_kwargs.get("sort", True) and isinstance(
            by, type(query_compiler)
        ):
            ErrorMessage.mismatch_with_pandas(
                operation="df.groupby(categorical_by, sort=False)",
                message=(
                    "the groupby keys will be sorted anyway, although the 'sort=False' was passed. "
                    + "See the following issue for more details: "
                    + "https://github.com/modin-project/modin/issues/3571"
                ),
            )
            groupby_kwargs = groupby_kwargs.copy()
            groupby_kwargs["sort"] = True

        map_fn, reduce_fn = cls.build_map_reduce_functions(
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            map_func=map_func,
            reduce_func=reduce_func,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
            method=method,
            finalizer_fn=finalizer_fn,
        )

        # If `by` is a ModinFrame, then its partitions will be broadcasted to every
        # `self` partition in a way determined by engine (modin_frame.groupby_reduce)
        # Otherwise `by` was already bound to the Map function in `build_map_reduce_functions`.
        broadcastable_by = getattr(by, "_modin_frame", None)
        apply_indices = list(map_func.keys()) if isinstance(map_func, dict) else None
        if (
            broadcastable_by is not None
            and groupby_kwargs.get("as_index", True)
            and broadcastable_by.has_materialized_dtypes
        ):
            new_index = ModinIndex(
                # actual value will be assigned on a parent update
                value=None,
                axis=0,
                dtypes=broadcastable_by.dtypes,
            )
        else:
            new_index = None
        new_modin_frame = query_compiler._modin_frame.groupby_reduce(
            axis,
            broadcastable_by,
            map_fn,
            reduce_fn,
            apply_indices=apply_indices,
            new_index=new_index,
        )

        result = query_compiler.__constructor__(new_modin_frame)
        return result

    @classmethod
    def get_callable(
        cls,
        agg_func: Union[dict, Callable[..., pandas.DataFrame]],
        df: pandas.DataFrame,
        preserve_aggregation_order: bool = True,
    ) -> Callable[..., pandas.DataFrame]:
        """
        Build aggregation function to apply to each group at this particular partition.

        If it's dictionary aggregation — filters aggregation dictionary for keys which
        this particular partition contains, otherwise do nothing with passed function.

        Parameters
        ----------
        agg_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Aggregation function.
        df : pandas.DataFrame
            Serialized partition which contains available columns.
        preserve_aggregation_order : bool, default: True
            Whether to manually restore the order of columns for the result specified
            by the `agg_func` keys (only makes sense when `agg_func` is a dictionary).

        Returns
        -------
        callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame
            Aggregation function that can be safely applied to this particular partition.
        """
        if not isinstance(agg_func, dict):
            return agg_func

        grp_has_id_level = df.columns.names[0] == cls.ID_LEVEL_NAME
        # The 'id' level prevents us from a lookup for the original
        # partition's columns. So dropping the level.
        partition_columns = frozenset(
            df.columns.droplevel(0) if grp_has_id_level else df.columns
        )

        partition_dict = {k: v for k, v in agg_func.items() if k in partition_columns}
        return cls._build_callable_for_dict(
            partition_dict, preserve_aggregation_order, grp_has_id_level
        )

    @classmethod
    def _build_callable_for_dict(
        cls,
        agg_dict: dict,
        preserve_aggregation_order: bool = True,
        grp_has_id_level: bool = False,
    ) -> Callable[..., pandas.DataFrame]:
        """
        Build callable for an aggregation dictionary.

        Parameters
        ----------
        agg_dict : dict
            Aggregation dictionary.
        preserve_aggregation_order : bool, default: True
            Whether to manually restore the order of columns for the result specified
            by the `agg_func` keys (only makes sense when `agg_func` is a dictionary).
        grp_has_id_level : bool, default: False
            Whether the frame we're grouping on has intermediate columns
            (see ``cls.ID_LEVEL_NAME``).

        Returns
        -------
        callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame
        """
        # We have to keep this import away from the module level to avoid circular import
        from modin.pandas.utils import walk_aggregation_dict

        # We now filter aggregation functions into those that could be applied natively
        # using pandas (pandas_grp_obj.agg(**native_aggs)) and those that require
        # special treatment (custom_aggs).
        custom_aggs = {}
        native_aggs = {}

        result_columns = []
        for col, func, func_name, col_renaming_required in walk_aggregation_dict(
            agg_dict
        ):
            # Filter dictionary
            dict_to_add = (
                custom_aggs if cls.is_registered_implementation(func) else native_aggs
            )

            new_value = func if func_name is None else (func_name, func)
            old_value = dict_to_add.get(col, None)

            if old_value is not None:
                ErrorMessage.catch_bugs_and_request_email(
                    failure_condition=not isinstance(old_value, list),
                    extra_log="Expected for all aggregation values to be a list when at least "
                    + f"one column has multiple aggregations. Got: {old_value} {type(old_value)}",
                )
                old_value.append(new_value)
            else:
                # Pandas knows that it has to modify the resulting columns if it meets
                # a function wrapped into a list. Renaming is required if either a new
                # column name was explicitly specified, or multiple functions were
                # specified per one column, or if any other column in the aggregation
                # is going to be renamed.
                dict_to_add[col] = [new_value] if col_renaming_required else new_value

            # Construct resulting columns
            if col_renaming_required:
                func_name = str(func) if func_name is None else func_name
                result_columns.append(
                    (*(col if isinstance(col, tuple) else (col,)), func_name)
                )
            else:
                result_columns.append(col)

        result_columns = pandas.Index(result_columns)

        def aggregate_on_dict(grp_obj, *args, **kwargs):
            """Aggregate the passed groupby object."""
            if len(native_aggs) == 0:
                native_agg_res = None
            elif grp_has_id_level:
                # Adding the 'id' level to the aggregation keys so they match `grp_obj` columns
                native_aggs_modified = {
                    (
                        cls.ID_LEVEL_NAME,
                        *(key if isinstance(key, tuple) else (key,)),
                    ): value
                    for key, value in native_aggs.items()
                }
                native_agg_res = grp_obj.agg(native_aggs_modified)
                # Dropping the 'id' level from the resulted frame
                native_agg_res.columns = native_agg_res.columns.droplevel(0)
            else:
                native_agg_res = grp_obj.agg(native_aggs)

            custom_results = []
            insert_id_levels = False

            for col, func, func_name, col_renaming_required in walk_aggregation_dict(
                custom_aggs
            ):
                if grp_has_id_level:
                    cols_without_ids = grp_obj.obj.columns.droplevel(0)
                    if isinstance(cols_without_ids, pandas.MultiIndex):
                        # We may have multiple columns matching the `col` in
                        # a MultiIndex case, that's why use `.get_locs` here
                        col_pos = cols_without_ids.get_locs(col)
                    else:
                        # `pandas.Index` doesn't have `.get_locs` method
                        col_pos = cols_without_ids.get_loc(col)
                    agg_key = grp_obj.obj.columns[col_pos]
                else:
                    agg_key = [col]

                result = func(grp_obj[agg_key])
                # The `func` may have discarded an ID-level if there were any.
                # So checking for this again.
                result_has_id_level = result.columns.names[0] == cls.ID_LEVEL_NAME
                insert_id_levels |= result_has_id_level

                if col_renaming_required:
                    func_name = str(func) if func_name is None else func_name
                    if result_has_id_level:
                        result.columns = pandas.MultiIndex.from_tuples(
                            [
                                # `old_col[0]` stores values from the 'id'
                                # level, the ones we want to preserve here
                                (old_col[0], col, func_name)
                                for old_col in result.columns
                            ],
                            names=[
                                result.columns.names[0],
                                result.columns.names[1],
                                None,
                            ],
                        )
                    else:
                        result.columns = pandas.MultiIndex.from_tuples(
                            [(col, func_name)] * len(result.columns),
                            names=[result.columns.names[0], None],
                        )

                custom_results.append(result)

            if insert_id_levels:
                # As long as any `result` has an id-level we have to insert the level
                # into every `result` so the number of levels matches
                for idx, ext_result in enumerate(custom_results):
                    if ext_result.columns.names[0] != cls.ID_LEVEL_NAME:
                        custom_results[idx] = pandas.concat(
                            [ext_result],
                            keys=[cls.ID_LEVEL_NAME],
                            names=[cls.ID_LEVEL_NAME],
                            axis=1,
                            copy=False,
                        )

                if native_agg_res is not None:
                    native_agg_res = pandas.concat(
                        [native_agg_res],
                        keys=[cls.ID_LEVEL_NAME],
                        names=[cls.ID_LEVEL_NAME],
                        axis=1,
                        copy=False,
                    )

            native_res_part = [] if native_agg_res is None else [native_agg_res]
            parts = [*native_res_part, *custom_results]
            if parts:
                result = pandas.concat(parts, axis=1, copy=False)
            else:
                result = pandas.DataFrame(columns=result_columns)

            # The order is naturally preserved if there's no custom aggregations
            if preserve_aggregation_order and len(custom_aggs):
                result = result.reindex(result_columns, axis=1)
            return result

        return aggregate_on_dict

    @classmethod
    def is_registered_implementation(cls, func: Callable) -> bool:
        """
        Check whether the passed `func` was registered as a TreeReduce implementation.

        Parameters
        ----------
        func : callable

        Returns
        -------
        bool
        """
        return callable(func) and hasattr(func, cls._GROUPBY_REDUCE_IMPL_FLAG)

    @classmethod
    def build_map_reduce_functions(
        cls,
        by,
        axis: int,
        groupby_kwargs: dict,
        map_func: Union[dict, Callable[..., pandas.DataFrame]],
        reduce_func: Union[dict, Callable[..., pandas.DataFrame]],
        agg_args: list,
        agg_kwargs: dict,
        drop: bool = False,
        method: Optional[str] = None,
        finalizer_fn: Callable[[pandas.DataFrame], pandas.DataFrame] = None,
    ) -> tuple[Callable, Callable]:
        """
        Bind appropriate arguments to map and reduce functions.

        Parameters
        ----------
        by : BaseQueryCompiler, column or index label, Grouper or list of such
            Object that determine groups.
        axis : {0, 1}
            Axis to group and apply aggregation function along. 0 means index axis
            when 1 means column axis.
        groupby_kwargs : dict
            Dictionary which carries arguments for pandas.DataFrame.groupby.
        map_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the Map phase.
        reduce_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the Reduce phase.
        agg_args : list-like
            Positional arguments to pass to the aggregation functions.
        agg_kwargs : dict
            Keyword arguments to pass to the aggregation functions.
        drop : bool, default: False
            Indicates whether or not by-data came from the `self` frame.
        method : str, optional
            Name of the GroupBy aggregation function. This is a hint to be able to do special casing.
        finalizer_fn : callable(pandas.DataFrame) -> pandas.DataFrame, optional
            A callable to execute at the end a groupby kernel against groupby result.

        Returns
        -------
        Tuple of callable
            Tuple of map and reduce functions with bound arguments.
        """
        # if by is a query compiler, then it will be broadcasted explicit via
        # groupby_reduce method of the modin frame and so we don't want secondary
        # implicit broadcastion via passing it as an function argument.
        if hasattr(by, "_modin_frame"):
            by = None

        def _map(
            df: pandas.DataFrame,
            other: Optional[pandas.DataFrame] = None,
            **kwargs: dict,
        ) -> pandas.DataFrame:
            def wrapper(
                df: pandas.DataFrame, other: Optional[pandas.DataFrame] = None
            ) -> pandas.DataFrame:
                return cls.map(
                    df,
                    other=other,
                    axis=axis,
                    by=by,
                    groupby_kwargs=groupby_kwargs.copy(),
                    map_func=map_func,
                    agg_args=agg_args,
                    agg_kwargs=agg_kwargs,
                    drop=drop,
                    **kwargs,
                )

            try:
                result = wrapper(df, other)
            # This will happen with Arrow buffer read-only errors. We don't want to copy
            # all the time, so this will try to fast-path the code first.
            except ValueError:
                result = wrapper(df.copy(), other if other is None else other.copy())
            return result

        def _reduce(df: pandas.DataFrame, **call_kwargs: dict) -> pandas.DataFrame:
            def wrapper(df: pandas.DataFrame):
                return cls.reduce(
                    df,
                    axis=axis,
                    groupby_kwargs=groupby_kwargs,
                    reduce_func=reduce_func,
                    agg_args=agg_args,
                    agg_kwargs=agg_kwargs,
                    drop=drop,
                    method=method,
                    finalizer_fn=finalizer_fn,
                    **call_kwargs,
                )

            try:
                result = wrapper(df)
            # This will happen with Arrow buffer read-only errors. We don't want to copy
            # all the time, so this will try to fast-path the code first.
            except ValueError:
                result = wrapper(df.copy())
            return result

        return _map, _reduce


================================================
FILE: modin/core/dataframe/algebra/map.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses builder class for Map operator."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable

from .operator import Operator

if TYPE_CHECKING:
    import pandas

    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class Map(Operator):
    """Builder class for Map operator."""

    @classmethod
    def register(
        cls,
        function: Callable[..., pandas.DataFrame],
        *call_args: tuple,
        **call_kwds: dict,
    ) -> Callable[..., PandasQueryCompiler]:
        """
        Build Map operator that will be performed across each partition.

        Parameters
        ----------
        function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
            Function that will be applied to the each partition.
            Function takes `pandas.DataFrame` and returns `pandas.DataFrame`
            of the same shape.
        *call_args : tuple
            Args that will be passed to the returned function.
        **call_kwds : dict
            Kwargs that will be passed to the returned function.

        Returns
        -------
        callable
            Function that takes query compiler and executes map function.
        """

        def caller(
            query_compiler: PandasQueryCompiler, *args: tuple, **kwargs: dict
        ) -> PandasQueryCompiler:
            """Execute Map function against passed query compiler."""
            shape_hint = call_kwds.pop("shape_hint", None) or query_compiler._shape_hint
            return query_compiler.__constructor__(
                query_compiler._modin_frame.map(
                    lambda x: function(x, *args, **kwargs), *call_args, **call_kwds
                ),
                shape_hint=shape_hint,
            )

        return caller


================================================
FILE: modin/core/dataframe/algebra/operator.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module contains an interface for operator builder classes."""

from __future__ import annotations

from typing import Callable, Optional


class Operator(object):
    """Interface for building operators that can execute in parallel across partitions."""

    def __init__(self) -> None:
        raise ValueError(
            "Please use {}.register instead of the constructor".format(
                type(self).__name__
            )
        )

    @classmethod
    def register(cls, func: Callable, **kwargs: dict):
        """
        Build operator that applies source function across the entire dataset.

        Parameters
        ----------
        func : callable
            Source function.
        **kwargs : dict
            Kwargs that will be passed to the builder function.

        Returns
        -------
        callable
        """
        raise NotImplementedError("Please implement in child class")

    @classmethod
    def validate_axis(cls, axis: Optional[int]) -> int:
        """
        Ensure that axis to apply function on has valid value.

        Parameters
        ----------
        axis : int, optional
            0 or None means apply on index, 1 means apply on columns.

        Returns
        -------
        int
            Integer representation of given axis.
        """
        return 0 if axis is None else axis


================================================
FILE: modin/core/dataframe/algebra/reduce.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses builder class for Reduce operator."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Optional

from .operator import Operator

if TYPE_CHECKING:
    import pandas

    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class Reduce(Operator):
    """Builder class for Reduce operator."""

    @classmethod
    def register(
        cls,
        reduce_function: Callable[..., pandas.Series],
        axis: Optional[int] = None,
        shape_hint: Optional[str] = None,
    ) -> Callable[..., PandasQueryCompiler]:
        """
        Build Reduce operator that will be performed across rows/columns.

        It's used if `func` reduces the dimension of partitions in contrast to `Fold`.

        Parameters
        ----------
        reduce_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.Series
            Source function.
        axis : int, optional
            Axis to apply function along.
        shape_hint : {"row", "column", None}, default: None
            Shape hint for the results known to be a column or a row, otherwise None.

        Returns
        -------
        callable
            Function that takes query compiler and executes Reduce function.
        """

        def caller(
            query_compiler: PandasQueryCompiler, *args: tuple, **kwargs: dict
        ) -> PandasQueryCompiler:
            """Execute Reduce function against passed query compiler."""
            _axis = kwargs.get("axis") if axis is None else axis
            return query_compiler.__constructor__(
                query_compiler._modin_frame.reduce(
                    cls.validate_axis(_axis),
                    lambda x: reduce_function(x, *args, **kwargs),
                ),
                shape_hint=shape_hint,
            )

        return caller


================================================
FILE: modin/core/dataframe/algebra/tree_reduce.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses builder class for TreeReduce operator."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Optional

from .operator import Operator

if TYPE_CHECKING:
    import pandas
    from pandas._typing import DtypeObj

    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class TreeReduce(Operator):
    """Builder class for TreeReduce operator."""

    @classmethod
    def register(
        cls,
        map_function: Optional[Callable[..., pandas.DataFrame]],
        reduce_function: Optional[Callable[..., pandas.Series]] = None,
        axis: Optional[int] = None,
        compute_dtypes: Optional[Callable[..., DtypeObj]] = None,
    ) -> Callable[..., PandasQueryCompiler]:
        """
        Build TreeReduce operator.

        Parameters
        ----------
        map_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
            Source map function.
        reduce_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.Series, optional
            Source reduce function.
        axis : int, optional
            Specifies axis to apply function along.
        compute_dtypes : callable(pandas.Series, *func_args, **func_kwargs) -> DtypeObj, optional
            Callable for computing dtypes.

        Returns
        -------
        callable
            Function that takes query compiler and executes passed functions
            with TreeReduce algorithm.
        """
        if reduce_function is None:
            reduce_function = map_function

        def caller(
            query_compiler: PandasQueryCompiler, *args: tuple, **kwargs: dict
        ) -> PandasQueryCompiler:
            """Execute TreeReduce function against passed query compiler."""
            _axis = kwargs.get("axis") if axis is None else axis

            new_dtypes = None
            if compute_dtypes and query_compiler.frame_has_materialized_dtypes:
                new_dtypes = str(compute_dtypes(query_compiler.dtypes, *args, **kwargs))

            return query_compiler.__constructor__(
                query_compiler._modin_frame.tree_reduce(
                    cls.validate_axis(_axis),
                    lambda x: map_function(x, *args, **kwargs),
                    lambda y: reduce_function(y, *args, **kwargs),
                    dtypes=new_dtypes,
                )
            )

        return caller


================================================
FILE: modin/core/dataframe/base/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe classes."""


================================================
FILE: modin/core/dataframe/base/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe class and Axis and JoinType Enums."""


================================================
FILE: modin/core/dataframe/base/dataframe/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains class ModinDataframe.

ModinDataframe is a parent abstract class for any dataframe class.
"""

from abc import ABC, abstractmethod
from typing import Callable, Dict, Hashable, List, Optional, Union

from modin.core.dataframe.base.dataframe.utils import Axis, JoinType


class ModinDataframe(ABC):
    """
    An abstract class that represents the Parent class for any Dataframe class.

    This class is intended to specify the behaviors that a Dataframe must implement.

    For more details about how these methods were chosen, please refer to this
    (https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf) paper, which specifies
    a Dataframe algebra that this class exposes.
    """

    @abstractmethod
    def take_2d_labels_or_positional(
        self,
        row_labels: Optional[List[Hashable]] = None,
        row_positions: Optional[List[int]] = None,
        col_labels: Optional[List[Hashable]] = None,
        col_positions: Optional[List[int]] = None,
    ) -> "ModinDataframe":
        """
        Mask rows and columns in the dataframe.

        Allow users to perform selection and projection on the row and column labels (named notation),
        in addition to the row and column number (positional notation).

        Parameters
        ----------
        row_labels : list of hashable, optional
            The row labels to extract.
        row_positions : list of int, optional
            The row positions to extract.
        col_labels : list of hashable, optional
            The column labels to extract.
        col_positions : list of int, optional
            The column positions to extract.

        Returns
        -------
        ModinDataframe
             A new ModinDataframe from the mask provided.

        Notes
        -----
        If both `row_labels` and `row_positions` are provided, a ValueError is raised.
        The same rule applies for `col_labels` and `col_positions`.
        """
        pass

    @abstractmethod
    def filter_by_types(self, types: List[Hashable]) -> "ModinDataframe":
        """
        Allow the user to specify a type or set of types by which to filter the columns.

        Parameters
        ----------
        types : list of hashables
            The types to filter columns by.

        Returns
        -------
        ModinDataframe
             A new ModinDataframe with only the columns whose dtypes appear in `types`.
        """
        pass

    @abstractmethod
    def map(
        self,
        function: Callable,
        axis: Optional[Union[int, Axis]] = None,
        dtypes: Optional[str] = None,
        new_columns: Optional[List[Hashable]] = None,
    ) -> "ModinDataframe":
        """
        Apply a user-defined function row-wise if `axis`=0, column-wise if `axis`=1, and cell-wise if `axis` is None.

        Parameters
        ----------
        function : callable(row|col|cell) -> row|col|cell
            The function to map across the dataframe.
        axis : int or modin.core.dataframe.base.utils.Axis, optional
            The axis to map over.
        dtypes : str, optional
            The data types for the result. This is an optimization
            because there are functions that always result in a particular data
            type, and this allows us to avoid (re)computing it.
        new_columns : List[Hashable], optional
            New column labels of the result, its length has to be identical
            to the older columns. If not specified, old column labels are preserved.

        Returns
        -------
        ModinDataframe
             A new ModinDataframe with the map applied.

        Notes
        -----
        This does not change the shape of the dataframe.
        """
        pass

    @abstractmethod
    def filter(self, axis: Union[int, Axis], condition: Callable) -> "ModinDataframe":
        """
        Filter data based on the function provided along the specified axis.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to filter over.
        condition : callable(row|col) -> bool
            The function to use for the filter. This function should filter the
            data itself. It accepts either a row or column (depending on the axis argument) and
            returns True to keep the row/col, and False to drop it.

        Returns
        -------
        ModinDataframe
             A new ModinDataframe filtered by content according to the filter provided by condition.
        """
        pass

    @abstractmethod
    def explode(
        self,
        axis: Union[int, Axis],
        function: Callable,
        result_schema: Optional[Dict[Hashable, type]] = None,
    ) -> "ModinDataframe":
        """
        Explode data based on the function provided along the specified axis.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to expand over.
        function : callable
            The function to use to expand the data. This function should accept one
            row/column, and return multiple.
        result_schema : dictionary, optional
            Mapping from column labels to data types that represents the types of the output dataframe.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the specified axis expanded.

        Notes
        -----
        Only one axis can be expanded at a time.

        The user-defined function may increase the number of rows (columns if axis=1),
        but it should not remove or drop rows.
        """
        pass

    @abstractmethod
    def window(
        self,
        axis: Union[int, Axis],
        reduce_fn: Callable,
        window_size: int,
        result_schema: Optional[Dict[Hashable, type]] = None,
    ) -> "ModinDataframe":
        """
        Apply a sliding window operator that acts as a GROUPBY on each window, reducing each window to a single row (column).

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to slide over.
        reduce_fn : callable(rowgroup|colgroup) -> row|col
            The reduce function to apply over the data.
        window_size : int
            The number of row/columns to pass to the function.
            (The size of the sliding window).
        result_schema : dictionary, optional
            Mapping from column labels to data types that represents the types of the output dataframe.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the reduce function applied over windows of the specified
            axis.

        Notes
        -----
        The user-defined reduce function must reduce each window's column
        (row if axis=1) down to a single value.
        """
        pass

    @abstractmethod
    def groupby(
        self,
        axis: Union[int, Axis],
        by: Union[str, List[str]],
        operator: Callable,
        result_schema: Optional[Dict[Hashable, type]] = None,
    ) -> "ModinDataframe":
        """
        Generate groups based on values in the input column(s) and perform the specified operation on each.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to apply the grouping over.
        by : string or list of strings
            One or more column labels to use for grouping.
        operator : callable
            The operation to carry out on each of the groups. The operator is another
            algebraic operator with its own user-defined function parameter, depending
            on the output desired by the user.
        result_schema : dictionary, optional
            Mapping from column labels to data types that represents the types of the output dataframe.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe containing the groupings specified, with the operator
            applied to each group.

        Notes
        -----
        No communication between groups is allowed in this algebra implementation.

        The number of rows (columns if axis=1) returned by the user-defined function
        passed to the groupby may be at most the number of rows in the group, and
        may be as small as a single row.

        Unlike the pandas API, an intermediate "GROUP BY" object is not present in this
        algebra implementation.
        """
        pass

    @abstractmethod
    def reduce(
        self,
        axis: Union[int, Axis],
        function: Callable,
        dtypes: Optional[str] = None,
    ) -> "ModinDataframe":
        """
        Perform a user-defined aggregation on the specified axis, where the axis reduces down to a singleton.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to perform the reduce over.
        function : callable(row|col) -> single value
            The reduce function to apply to each column.
        dtypes : str, optional
            The data types for the result. This is an optimization
            because there are functions that always result in a particular data
            type, and this allows us to avoid (re)computing it.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the same columns as the previous, with only a single row.

        Notes
        -----
        The user-defined function must reduce to a single value.
        """
        pass

    @abstractmethod
    def tree_reduce(
        self,
        axis: Union[int, Axis],
        map_func: Callable,
        reduce_func: Optional[Callable] = None,
        dtypes: Optional[str] = None,
    ) -> "ModinDataframe":
        """
        Perform a user-defined aggregation on the specified axis, where the axis reduces down to a singleton using a tree-reduce computation pattern.

        The map function is applied first over multiple partitions of a column, and then the reduce
        function (if specified, otherwise the map function is applied again) is applied to the
        results to produce a single value.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to perform the tree reduce over.
        map_func : callable(row|col) -> row|col|single value
            The map function to apply to each column.
        reduce_func : callable(row|col) -> single value, optional
            The reduce function to apply to the results of the map function.
        dtypes : str, optional
            The data types for the result. This is an optimization
            because there are functions that always result in a particular data
            type, and this allows us to avoid (re)computing it.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the same columns as the previous, with only a single row.

        Notes
        -----
        The user-defined function must reduce to a single value.

        If the user-defined function requires access to the entire column, please use reduce instead.
        """
        pass

    @abstractmethod
    def infer_types(self, columns_list: List[str]) -> "ModinDataframe":
        """
        Determine the compatible type shared by all values in the specified columns, and coerce them to that type.

        Parameters
        ----------
        columns_list : list of strings
            List of column labels to infer and induce types over.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the inferred schema.
        """
        pass

    @abstractmethod
    def join(
        self,
        axis: Union[int, Axis],
        condition: Callable,
        other: "ModinDataframe",
        join_type: Union[str, JoinType],
    ) -> "ModinDataframe":
        """
        Join this dataframe with the other.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to perform the join on.
        condition : callable
            Function that determines which rows should be joined. The condition can be a
            simple equality, e.g. "left.col1 == right.col1" or can be arbitrarily complex.
        other : ModinDataframe
            The other data to join with, i.e. the right dataframe.
        join_type : string  {"inner", "left", "right", "outer"} or modin.core.dataframe.base.utils.JoinType
            The type of join to perform.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe that is the result of applying the specified join over the two
            dataframes.

        Notes
        -----
        During the join, this dataframe is considered the left, while the other is
        treated as the right.

        Only inner joins, left outer, right outer, and full outer joins are currently supported.
        Support for other join types (e.g. natural join) may be implemented in the future.
        """
        pass

    @abstractmethod
    def concat(
        self,
        axis: Union[int, Axis],
        others: Union["ModinDataframe", List["ModinDataframe"]],
    ) -> "ModinDataframe":
        """
        Append rows/columns along the specified axis from multiple dataframes.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis on which to perform the concatenation.
        others : ModinDataframe or list of ModinDataframes
            The other ModinDataframe(s) to concatenate.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe that is the result of concatenating the dataframes over the
            specified axis.

        Notes
        -----
        The concat operator incurs fixed overheads, and so this algebra places no
        limit to the number of dataframes that may be concatenated in this way.
        """
        pass

    @abstractmethod
    def transpose(self) -> "ModinDataframe":
        """
        Swap the row and column axes.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the row and column axes swapped.

        Notes
        -----
        Transposing a dataframe is expensive, and so it is performed lazily. The axes are swapped
        logically immediately, but the physical swap does not occur until absolutely necessary,
        which helps motivate the axis argument to the other operators in this algebra.
        """
        pass

    @abstractmethod
    def to_labels(self, column_labels: Union[str, List[str]]) -> "ModinDataframe":
        """
        Replace the row labels with one or more columns of data.

        Parameters
        ----------
        column_labels : string or list of strings
            Column label(s) to use as the new row labels.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the row labels replaced by the specified columns.

        Notes
        -----
        When multiple column labels are specified, a hierarchical set of labels is created, ordered by the ordering
        of labels in the input.
        """
        pass

    @abstractmethod
    def from_labels(self) -> "ModinDataframe":
        """
        Move the row labels into the data at position 0, and sets the row labels to the positional notation.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the row labels moved into the data.

        Notes
        -----
        In the case that the dataframe has hierarchical labels, all label "levels" are inserted into the dataframe
        in the order they occur in the labels, with the outermost being in position 0.
        """
        pass

    @abstractmethod
    def rename(
        self,
        new_row_labels: Optional[Union[Dict[Hashable, Hashable], Callable]] = None,
        new_col_labels: Optional[Union[Dict[Hashable, Hashable], Callable]] = None,
    ) -> "ModinDataframe":
        """
        Replace the row and column labels with the specified new labels.

        Parameters
        ----------
        new_row_labels : dictionary or callable, optional
            Mapping or callable that relates old row labels to new labels.
        new_col_labels : dictionary or callable, optional
            Mapping or callable that relates old col labels to new labels.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe with the new row and column labels.
        """
        pass

    @abstractmethod
    def sort_by(
        self,
        axis: Union[int, Axis],
        labels: Union[str, List[str]],
        ascending: bool = True,
    ) -> "ModinDataframe":
        """
        Logically reorder rows (columns if axis=1) lexicographically by the data in a column or set of columns.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to perform the sort over.
        labels : string or list of strings
            Column (row if axis=1) label(s) to use to determine lexicographical ordering. If multiple
            columns (rows if axis=1) are provided, the sort is performed on the first column (row if axis=1),
            with ties broken by the other columns (rows if axis=1) provided.
        ascending : boolean, default: True
            Whether to sort in ascending or descending order.

        Returns
        -------
        ModinDataframe
            A new ModinDataframe sorted into lexicographical order by the specified column(s).
        """
        pass


================================================
FILE: modin/core/dataframe/base/dataframe/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains useful enums for Modin.

Axis is an enum that represents the `axis` argument for dataframe operations.
JoinType is an enum that represents the `join_type` or `how` argument for the join algebra operator.
"""

from enum import Enum
from typing import Dict, List, Sequence, Tuple, cast

import pandas
from pandas._typing import IndexLabel
from pandas.api.types import is_scalar
from pandas.core.dtypes.common import is_integer_dtype


class Axis(Enum):  # noqa: PR01
    """
    An enum that represents the `axis` argument provided to the algebra operators.

    The enum has 3 values - ROW_WISE to represent the row axis, COL_WISE to represent the
    column axis, and CELL_WISE to represent no axis. ROW_WISE operations iterate over the rows
    COL_WISE operations over the columns, and CELL_WISE operations over any of the partitioning
    schemes that are supported in Modin (row-wise, column-wise, or block-wise).
    """

    ROW_WISE = 0
    COL_WISE = 1
    CELL_WISE = None


class JoinType(Enum):  # noqa: PR01
    """
    An enum that represents the `join_type` argument provided to the algebra operators.

    The enum has 4 values - INNER to represent inner joins, LEFT to represent left joins, RIGHT to
    represent right joins, and OUTER to represent outer joins.
    """

    INNER = "inner"
    LEFT = "left"
    RIGHT = "right"
    OUTER = "outer"


def join_columns(
    left: pandas.Index,
    right: pandas.Index,
    left_on: IndexLabel,
    right_on: IndexLabel,
    suffixes: Tuple[str, str],
) -> Tuple[pandas.Index, Dict[IndexLabel, IndexLabel], Dict[IndexLabel, IndexLabel]]:
    """
    Compute resulting columns for the two dataframes being merged.

    Parameters
    ----------
    left : pandas.Index
        Columns of the left frame to join.
    right : pandas.Index
        Columns of the right frame to join.
    left_on : list-like or scalar
        Column names on which the frames are joined in the left DataFrame.
    right_on : list-like or scalar
        Column names on which the frames are joined in the right DataFrame.
    suffixes : tuple[str, str]
        A 2-length sequence containing suffixes to append to the intersected columns.

    Returns
    -------
    pandas.Index, dict[IndexLabel -> IndexLabel], dict[IndexLabel -> IndexLabel]
        Returns columns for the resulting frame and mappings of old to new column
        names for `left` and `right` accordingly.

    Raises
    ------
    NotImplementedError
        Raised when one of the keys to join is an index level, pandas behaviour is really
        complicated in this case, so we're not supporting this case for now.
    """
    # using `cast` to make `mypy` acknowledged that the variable now ensured to be `Sequence[IndexLabel]`
    left_on = cast(Sequence[IndexLabel], [left_on] if is_scalar(left_on) else left_on)
    right_on = cast(
        Sequence[IndexLabel], [right_on] if is_scalar(right_on) else right_on
    )

    # handling a simple case of merging on one column and when the column is located in an index
    if len(left_on) == 1 and len(right_on) == 1 and left_on[0] == right_on[0]:
        if left_on[0] not in left and right_on[0] not in right:
            # in this case the 'on' column will stay in the index, so we can simply
            # drop the 'left/right_on' values and proceed as normal
            left_on = []
            right_on = []
        # in other cases, we can simply add the index name to columns and proceed as normal
        # on python 3.9 with pandas-stubs 2.2, these lines will warn about insert being an untyped call,
        # but this error is no longer present on higher versions
        elif left_on[0] not in left:
            left = left.insert(loc=0, item=left_on[0])  # type: ignore[no-untyped-call, unused-ignore]
        elif right_on[0] not in right:
            right = right.insert(loc=0, item=right_on[0])  # type: ignore[no-untyped-call, unused-ignore]

    if any(col not in left for col in left_on) or any(
        col not in right for col in right_on
    ):
        raise NotImplementedError(
            "Cases, where one of the keys to join is an index level, are not yet supported."
        )

    left_conflicts = set(left) & (set(right) - set(right_on))
    right_conflicts = set(right) & (set(left) - set(left_on))
    conflicting_cols = left_conflicts | right_conflicts

    def _get_new_name(col: IndexLabel, suffix: str) -> IndexLabel:
        if col in conflicting_cols:
            return (
                (f"{col[0]}{suffix}", *col[1:])
                if isinstance(col, tuple)
                else f"{col}{suffix}"
            )
        else:
            return col

    left_renamer: Dict[IndexLabel, IndexLabel] = {}
    right_renamer: Dict[IndexLabel, IndexLabel] = {}
    new_left: List = []
    new_right: List = []

    for col in left:
        new_name = _get_new_name(col, suffixes[0])
        new_left.append(new_name)
        left_renamer[col] = new_name

    for col in right:
        # If we're joining on the column that exists in both frames then it was already
        # taken from the 'left', don't want to take it again from the 'right'.
        if not (col in left_on and col in right_on):
            new_name = _get_new_name(col, suffixes[1])
            new_right.append(new_name)
            right_renamer[col] = new_name

    new_columns = pandas.Index(new_left + new_right)
    return new_columns, left_renamer, right_renamer


def is_trivial_index(index: pandas.Index) -> bool:
    """
    Check if the index is a trivial index, i.e. a sequence [0..n].

    Parameters
    ----------
    index : pandas.Index
        An index to check.

    Returns
    -------
    bool
    """
    if len(index) == 0:
        return True
    if isinstance(index, pandas.RangeIndex):
        return index.start == 0 and index.step == 1
    if not (isinstance(index, pandas.Index) and is_integer_dtype(index)):
        return False
    return (
        index.is_monotonic_increasing
        and index.is_unique
        and index.min() == 0
        and index.max() == len(index) - 1
    )


================================================
FILE: modin/core/dataframe/base/interchange/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe functionality related to data exchange protocols."""


================================================
FILE: modin/core/dataframe/base/interchange/dataframe_protocol/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Base Modin Dataframe functionality related to the dataframe exchange protocol.

See more in https://data-apis.org/dataframe-protocol/latest/index.html.
"""


================================================
FILE: modin/core/dataframe/base/interchange/dataframe_protocol/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Dataframe exchange protocol implementation.

See more in https://data-apis.org/dataframe-protocol/latest/index.html.
"""

from abc import ABC, abstractmethod
from typing import Any, Dict, Iterable, Optional, Sequence, Tuple, TypedDict

from .utils import ColumnNullType, DlpackDeviceType, DTypeKind


class ColumnBuffers(TypedDict):  # noqa: GL08
    # first element is a buffer containing the column data;
    # second element is the data buffer's associated dtype
    data: Tuple["ProtocolBuffer", Any]

    # first element is a buffer containing mask values indicating missing data;
    # second element is the mask value buffer's associated dtype.
    # None if the null representation is not a bit or byte mask
    validity: Optional[Tuple["ProtocolBuffer", Any]]

    # first element is a buffer containing the offset values for
    # variable-size binary data (e.g., variable-length strings);
    # second element is the offsets buffer's associated dtype.
    # None if the data buffer does not have an associated offsets buffer
    offsets: Optional[Tuple["ProtocolBuffer", Any]]


class CategoricalDescription(TypedDict):  # noqa: GL08
    # whether the ordering of dictionary indices is semantically meaningful
    is_ordered: bool
    # whether a column-style mapping of categorical values to other objects exists
    is_dictionary: bool
    # None if not a column-style categorical.
    categories: Optional["ProtocolColumn"]


class ProtocolBuffer(ABC):
    """
    Data in the buffer is guaranteed to be contiguous in memory.

    Note that there is no dtype attribute present, a buffer can be thought of
    as simply a block of memory. However, if the column that the buffer is
    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
    implemented, then that dtype information will be contained in the return
    value from ``__dlpack__``.

    This distinction is useful to support both (a) data exchange via DLPack on a
    buffer and (b) dtypes like variable-length strings which do not have a
    fixed number of bytes per element.
    """

    @property
    @abstractmethod
    def bufsize(self) -> int:
        """
        Buffer size in bytes.

        Returns
        -------
        int
        """
        pass

    @property
    @abstractmethod
    def ptr(self) -> int:
        """
        Pointer to start of the buffer as an integer.

        Returns
        -------
        int
        """
        pass

    @abstractmethod
    def __dlpack__(self) -> Any:
        """
        Produce DLPack capsule (see array API standard).

        DLPack not implemented in NumPy yet, so leave it out here.

        Raises
        ------
        ``TypeError`` if the buffer contains unsupported dtypes.
        ``NotImplementedError`` if DLPack support is not implemented.

        Notes
        -----
        Useful to have to connect to array libraries. Support optional because
        it's not completely trivial to implement for a Python-only library.
        """
        pass

    @abstractmethod
    def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]:
        """
        Device type and device ID for where the data in the buffer resides.

        Uses device type codes matching DLPack. Enum members are:
            - CPU = 1
            - CUDA = 2
            - CPU_PINNED = 3
            - OPENCL = 4
            - VULKAN = 7
            - METAL = 8
            - VPI = 9
            - ROCM = 10

        Returns
        -------
        tuple
            Device type and device ID.

        Notes
        -----
        Must be implemented even if ``__dlpack__`` is not.
        """
        pass


class ProtocolColumn(ABC):
    """
    A column object, with only the methods and properties required by the interchange protocol defined.

    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length strings).

    TBD: Arrow has a separate "null" dtype, and has no separate mask concept.
         Instead, it seems to use "children" for both columns with a bit mask,
         and for nested dtypes. Unclear whether this is elegant or confusing.
         This design requires checking the null representation explicitly.
         The Arrow design requires checking:
         1. the ARROW_FLAG_NULLABLE (for sentinel values)
         2. if a column has two children, combined with one of those children
            having a null dtype.
         Making the mask concept explicit seems useful. One null dtype would
         not be enough to cover both bit and byte masks, so that would mean
         even more checking if we did it the Arrow way.
    TBD: there's also the "chunk" concept here, which is implicit in Arrow as
         multiple buffers per array (= column here). Semantically it may make
         sense to have both: chunks were meant for example for lazy evaluation
         of data which doesn't fit in memory, while multiple buffers per column
         could also come from doing a selection operation on a single
         contiguous buffer.
         Given these concepts, one would expect chunks to be all of the same
         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),
         while multiple buffers could have data-dependent lengths. Not an issue
         in pandas if one column is backed by a single NumPy array, but in
         Arrow it seems possible.
         Are multiple chunks *and* multiple buffers per column necessary for
         the purposes of this interchange protocol, or must producers either
         reuse the chunk concept for this or copy the data?

    Notes
    -----
    This ProtocolColumn object can only be produced by ``__dataframe__``,
    so doesn't need its own version or ``__column__`` protocol.
    """

    @abstractmethod
    def size(self) -> int:
        """
        Size of the column, in elements.

        Corresponds to `DataFrame.num_rows()` if column is a single chunk;
        equal to size of this current chunk otherwise.

        Is a method rather than a property because it may cause a (potentially
        expensive) computation for some dataframe implementations.

        Returns
        -------
        int
            Size of the column, in elements.
        """
        pass

    @property
    @abstractmethod
    def offset(self) -> int:
        """
        Get the offset of first element.

        May be > 0 if using chunks; for example for a column
        with N chunks of equal size M (only the last chunk may be shorter),
        ``offset = n * M``, ``n = 0 .. N-1``.

        Returns
        -------
        int
            The offset of first element.
        """
        pass

    @property
    @abstractmethod
    def dtype(self) -> Tuple[DTypeKind, int, str, str]:
        """
        Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.

        * Kind : DTypeKind
        * Bit-width : the number of bits as an integer
        * Format string : data type description format string in Apache Arrow C
                        Data Interface format.
        * Endianness : current only native endianness (``=``) is supported

        Returns
        -------
        tuple
            ``(kind, bit-width, format string, endianness)``.

        Notes
        -----
        - Kind specifiers are aligned with DLPack where possible
          (hence the jump to 20, leave enough room for future extension).
        - Masks must be specified as boolean with either bit width 1 (for bit masks)
          or 8 (for byte masks).
        - Dtype width in bits was preferred over bytes
        - Endianness isn't too useful, but included now in case in the future
          we need to support non-native endianness
        - Went with Apache Arrow format strings over NumPy format strings
          because they're more complete from a dataframe perspective
        - Format strings are mostly useful for datetime specification, and for categoricals.
        - For categoricals, the format string describes the type of the categorical
          in the data buffer. In case of a separate encoding of the categorical
          (e.g. an integer to string mapping), this can be derived from ``self.describe_categorical``.
        - Data types not included: complex, Arrow-style null, binary, decimal,
          and nested (list, struct, map, union) dtypes.
        """
        pass

    @property
    @abstractmethod
    def describe_categorical(self) -> CategoricalDescription:
        """
        If the dtype is categorical, there are two options.

        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding categorical values.

        TBD: are there any other in-memory representations that are needed?

        Returns
        -------
        dict
            Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.

        Raises
        ------
        ``TypeError`` if the dtype is not categorical.
        """
        pass

    @property
    @abstractmethod
    def describe_null(self) -> Tuple[ColumnNullType, Any]:
        """
        Return the missing value (or "null") representation the column dtype uses.

        Return as a tuple ``(kind, value)``.
        * Kind: ColumnNullType
        * Value : if kind is "sentinel value", the actual value. If kind is a bit
          mask or a byte mask, the value (0 or 1) indicating a missing value. None
          otherwise.

        Returns
        -------
        tuple
            ``(kind, value)``.
        """
        pass

    @property
    @abstractmethod
    def null_count(self) -> int:
        """
        Get number of null elements, if known.

        Returns
        -------
        int

        Notes
        -----
        Arrow uses -1 to indicate "unknown", but None seems cleaner.
        """
        pass

    @property
    @abstractmethod
    def metadata(self) -> Dict[str, Any]:
        """
        Get the metadata for the column.

        See `DataFrame.metadata` for more details.

        Returns
        -------
        dict
        """
        pass

    @abstractmethod
    def num_chunks(self) -> int:
        """
        Return the number of chunks the column consists of.

        Returns
        -------
        int
           The number of chunks the column consists of.
        """
        pass

    @abstractmethod
    def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable["ProtocolColumn"]:
        """
        Return an iterator yielding the chunks.

        By default ``n_chunks=None``, yields the chunks that the data is stored as by the producer.
        If given, ``n_chunks`` must be a multiple of ``self.num_chunks()``,
        meaning the producer must subdivide each chunk before yielding it.

        Parameters
        ----------
        n_chunks : int, optional
            Number of chunks to yield.

        Yields
        ------
        DataFrame
            A ``DataFrame`` object(s).

        Raises
        ------
        ``RuntimeError`` if ``n_chunks`` is not a multiple of ``self.num_chunks()``.
        """
        pass

    @abstractmethod
    def get_buffers(self) -> ColumnBuffers:
        """
        Return a dictionary containing the underlying buffers.

        Returns
        -------
        dict
            - "data": a two-element tuple whose first element is a buffer
              containing the data and whose second element is the data buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
              containing mask values indicating missing data and
              whose second element is the mask value buffer's
              associated dtype. None if the null representation is not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
              containing the offset values for variable-size binary data
              (e.g., variable-length strings) and whose second element is the offsets
              buffer's associated dtype. None if the data buffer does not have
              an associated offsets buffer.
        """
        pass


class ProtocolDataframe(ABC):
    """
    A data frame class, with only the methods required by the interchange protocol defined.

    Instances of this (private) class are returned from
    ``modin.core.dataframe.base.dataframe.dataframe.ModinDataframe.__dataframe__``
    as objects with the methods and attributes defined on this class.

    A "data frame" represents an ordered collection of named columns.
    A column's "name" must be a unique string. Columns may be accessed by name or by position.
    This could be a public data frame class, or an object with the methods and
    attributes defined on this ProtocolDataframe class could be returned from the
    ``__dataframe__`` method of a public data frame class in a library adhering
    to the dataframe interchange protocol specification.
    """

    version = 0  # version of the protocol

    @abstractmethod
    def __dataframe__(
        self, nan_as_null: bool = False, allow_copy: bool = True
    ) -> "ProtocolDataframe":
        """
        Construct a new dataframe interchange object, potentially changing the parameters.

        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.

        Parameters
        ----------
        nan_as_null : bool, default: False
            A keyword intended for the consumer to tell the producer
            to overwrite null values in the data with ``NaN``.
            This currently has no effect; once support for nullable extension
            dtypes is added, this value should be propagated to columns.
        allow_copy : bool, default: True
            A keyword that defines whether or not the library is allowed
            to make a copy of the data. For example, copying data would be necessary
            if a library supports strided buffers, given that this protocol
            specifies contiguous buffers. Currently, if the flag is set to ``False``
            and a copy is needed, a ``RuntimeError`` will be raised.

        Returns
        -------
        ProtocolDataframe
        """
        pass

    @property
    @abstractmethod
    def metadata(self) -> Dict[str, Any]:
        """
        Get the metadata for the data frame, as a dictionary with string keys.

        The contents of `metadata` may be anything, they are meant for a library
        to store information that it needs to, e.g., roundtrip losslessly or
        for two implementations to share data that is not (yet) part of the
        interchange protocol specification. For avoiding collisions with other
        entries, please add name the keys with the name of the library
        followed by a period and the desired name, e.g, ``pandas.indexcol``.

        Returns
        -------
        dict
        """
        pass

    @abstractmethod
    def num_columns(self) -> int:
        """
        Return the number of columns in the ProtocolDataframe.

        Returns
        -------
        int
            The number of columns in the ProtocolDataframe.
        """
        pass

    @abstractmethod
    def num_rows(self) -> Optional[int]:
        """
        Return the number of rows in the ProtocolDataframe, if available.

        Returns
        -------
        int
            The number of rows in the ProtocolDataframe.
        """
        pass

    @abstractmethod
    def num_chunks(self) -> int:
        """
        Return the number of chunks the ProtocolDataframe consists of.

        Returns
        -------
        int
            The number of chunks the ProtocolDataframe consists of.
        """
        pass

    @abstractmethod
    def column_names(self) -> Iterable[str]:
        """
        Return an iterator yielding the column names.

        Yields
        ------
        str
            The name of the column(s).
        """
        pass

    @abstractmethod
    def get_column(self, i: int) -> ProtocolColumn:
        """
        Return the column at the indicated position.

        Parameters
        ----------
        i : int
            Positional index of the column to be returned.

        Returns
        -------
        Column
            The column at the indicated position.
        """
        pass

    @abstractmethod
    def get_column_by_name(self, name: str) -> ProtocolColumn:
        """
        Return the column whose name is the indicated name.

        Parameters
        ----------
        name : str
            String label of the column to be returned.

        Returns
        -------
        Column
            The column whose name is the indicated name.
        """
        pass

    @abstractmethod
    def get_columns(self) -> Iterable[ProtocolColumn]:
        """
        Return an iterator yielding the columns.

        Yields
        ------
        Column
            The ``Column`` object(s).
        """
        pass

    @abstractmethod
    def select_columns(self, indices: Sequence[int]) -> "ProtocolDataframe":
        """
        Create a new ProtocolDataframe by selecting a subset of columns by index.

        Parameters
        ----------
        indices : Sequence[int]
            Column indices to be selected out of the ProtocolDataframe.

        Returns
        -------
        ProtocolDataframe
            A new ProtocolDataframe with selected a subset of columns by index.
        """
        pass

    @abstractmethod
    def select_columns_by_name(self, names: Sequence[str]) -> "ProtocolDataframe":
        """
        Create a new ProtocolDataframe by selecting a subset of columns by name.

        Parameters
        ----------
        names : Sequence[str]
            Column names to be selected out of the ProtocolDataframe.

        Returns
        -------
        ProtocolDataframe
            A new ProtocolDataframe with selected a subset of columns by name.
        """
        pass

    @abstractmethod
    def get_chunks(
        self, n_chunks: Optional[int] = None
    ) -> Iterable["ProtocolDataframe"]:
        """
        Return an iterator yielding the chunks.

        By default `n_chunks=None`, yields the chunks that the data is stored as by the producer.
        If given, `n_chunks` must be a multiple of `self.num_chunks()`,
        meaning the producer must subdivide each chunk before yielding it.

        Parameters
        ----------
        n_chunks : int, optional
            Number of chunks to yield.

        Yields
        ------
        ProtocolDataframe
            A ``ProtocolDataframe`` object(s).

        Raises
        ------
        ``RuntimeError`` if ``n_chunks`` is not a multiple of ``self.num_chunks()``.
        """
        pass


================================================
FILE: modin/core/dataframe/base/interchange/dataframe_protocol/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Dataframe exchange protocol implementation.

See more in https://data-apis.org/dataframe-protocol/latest/index.html.
"""

import enum
import re
from typing import Optional, Union

import numpy as np
import pandas
from pandas.api.types import is_datetime64_dtype


class DTypeKind(enum.IntEnum):  # noqa PR01
    """
    Integer enum for data types.
    Attributes
    ----------
    INT : int
        Matches to signed integer data type.
    UINT : int
        Matches to unsigned integer data type.
    FLOAT : int
        Matches to floating point data type.
    BOOL : int
        Matches to boolean data type.
    STRING : int
        Matches to string data type (UTF-8 encoded).
    DATETIME : int
        Matches to datetime data type.
    CATEGORICAL : int
        Matches to categorical data type.
    """

    INT = 0
    UINT = 1
    FLOAT = 2
    BOOL = 20
    STRING = 21  # UTF-8
    DATETIME = 22
    CATEGORICAL = 23


class ColumnNullType(enum.IntEnum):  # noqa PR01
    """
    Integer enum for null type representation.
    Attributes
    ----------
    NON_NULLABLE : int
        Non-nullable column.
    USE_NAN : int
        Use explicit float NaN value.
    USE_SENTINEL : int
        Sentinel value besides NaN.
    USE_BITMASK : int
        The bit is set/unset representing a null on a certain position.
    USE_BYTEMASK : int
        The byte is set/unset representing a null on a certain position.
    """

    NON_NULLABLE = 0
    USE_NAN = 1
    USE_SENTINEL = 2
    USE_BITMASK = 3
    USE_BYTEMASK = 4


class DlpackDeviceType(enum.IntEnum):  # noqa PR01
    """Integer enum for device type codes matching DLPack."""

    CPU = 1
    CUDA = 2
    CPU_PINNED = 3
    OPENCL = 4
    VULKAN = 7
    METAL = 8
    VPI = 9
    ROCM = 10


class ArrowCTypes:
    """
    Enum for Apache Arrow C type format strings.

    The Arrow C data interface:
    https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings
    """

    NULL = "n"
    BOOL = "b"
    INT8 = "c"
    UINT8 = "C"
    INT16 = "s"
    UINT16 = "S"
    INT32 = "i"
    UINT32 = "I"
    INT64 = "l"
    UINT64 = "L"
    FLOAT16 = "e"
    FLOAT32 = "f"
    FLOAT64 = "g"
    STRING = "u"  # utf-8
    DATE32 = "tdD"
    DATE64 = "tdm"
    # Resoulution:
    #   - seconds -> 's'
    #   - miliseconds -> 'm'
    #   - microseconds -> 'u'
    #   - nanoseconds -> 'n'
    TIMESTAMP = "ts{resolution}:{tz}"
    TIME = "tt{resolution}"


class Endianness:
    """Enum indicating the byte-order of a data-type."""

    LITTLE = "<"
    BIG = ">"
    NATIVE = "="
    NA = "|"


def pandas_dtype_to_arrow_c(dtype: Union[np.dtype, pandas.CategoricalDtype]) -> str:
    """
    Represent pandas `dtype` as a format string in Apache Arrow C notation.

    Parameters
    ----------
    dtype : np.dtype
        Datatype of pandas DataFrame to represent.

    Returns
    -------
    str
        Format string in Apache Arrow C notation of the given `dtype`.
    """
    if isinstance(dtype, pandas.CategoricalDtype):
        return ArrowCTypes.INT64
    elif dtype == pandas.api.types.pandas_dtype("O"):
        return ArrowCTypes.STRING

    format_str = getattr(ArrowCTypes, dtype.name.upper(), None)
    if format_str is not None:
        return format_str

    if is_datetime64_dtype(dtype):
        # Selecting the first char of resolution string:
        # dtype.str -> '<M8[ns]'
        resolution = re.findall(r"\[(.*)\]", dtype.str)[0][:1]
        return ArrowCTypes.TIMESTAMP.format(resolution=resolution, tz="")

    raise NotImplementedError(
        f"Convertion of {dtype} to Arrow C format string is not implemented."
    )


def raise_copy_alert(copy_reason: Optional[str] = None) -> None:
    """
    Raise a ``RuntimeError`` mentioning that there's a copy required.

    Parameters
    ----------
    copy_reason : str, optional
        The reason of making a copy. Should fit to the following format:
        'The copy occurred due to {copy_reason}.'.
    """
    msg = "Copy required but 'allow_copy=False' is set."
    if copy_reason:
        msg += f" The copy occurred due to {copy_reason}."
    raise RuntimeError(msg)


================================================
FILE: modin/core/dataframe/base/partitioning/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe classes related to its partitioning."""


================================================
FILE: modin/core/dataframe/base/partitioning/axis_partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base class of an axis partition for a Modin Dataframe."""

from abc import ABC, abstractmethod
from typing import Any, Callable, Iterable, Optional, Tuple, Type, Union

from modin.logging import ClassLogger
from modin.logging.config import LogLevel


class BaseDataframeAxisPartition(
    ABC, ClassLogger, modin_layer="VIRTUAL-PARTITION", log_level=LogLevel.DEBUG
):  # pragma: no cover
    """
    An abstract class that represents the parent class for any axis partition class.

    This class is intended to simplify the way that operations are performed.

    Attributes
    ----------
    _PARTITIONS_METADATA_LEN : int
        The number of metadata values that the object of `partition_type` consumes.
    """

    @property
    @abstractmethod
    def list_of_blocks(self) -> list:
        """Get the list of physical partition objects that compose this partition."""
        pass

    def apply(
        self,
        func: Callable,
        *args: Iterable,
        num_splits: Optional[int] = None,
        other_axis_partition: Optional["BaseDataframeAxisPartition"] = None,
        maintain_partitioning: bool = True,
        lengths: Optional[Iterable] = None,
        manual_partition: bool = False,
        **kwargs: dict,
    ) -> Any:
        """
        Apply a function to this axis partition along full axis.

        Parameters
        ----------
        func : callable
            The function to apply. This will be preprocessed according to
            the corresponding `BaseDataframePartition` objects.
        *args : iterable
            Positional arguments to pass to `func`.
        num_splits : int, default: None
            The number of times to split the result object.
        other_axis_partition : BaseDataframeAxisPartition, default: None
            Another `BaseDataframeAxisPartition` object to be applied
            to func. This is for operations that are between two data sets.
        maintain_partitioning : bool, default: True
            Whether to keep the partitioning in the same
            orientation as it was previously or not. This is important because we may be
            operating on an individual axis partition and not touching the rest.
            In this case, we have to return the partitioning to its previous
            orientation (the lengths will remain the same). This is ignored between
            two axis partitions.
        lengths : iterable, default: None
            The list of lengths to shuffle the partition into.
        manual_partition : bool, default: False
            If True, partition the result with `lengths`.
        **kwargs : dict
            Additional keywords arguments to be passed in `func`.

        Returns
        -------
        list
            A list of `BaseDataframePartition` objects.

        Notes
        -----
        The procedures that invoke this method assume full axis
        knowledge. Implement this method accordingly.

        You must return a list of `BaseDataframePartition` objects from this method.
        """
        pass

    # Child classes must have these in order to correctly subclass.
    partition_type: Type
    _PARTITIONS_METADATA_LEN = 0

    def _wrap_partitions(
        self, partitions: list, extract_metadata: Optional[bool] = None
    ) -> list:
        """
        Wrap remote partition objects with `BaseDataframePartition` class.

        Parameters
        ----------
        partitions : list
            List of remotes partition objects to be wrapped with `BaseDataframePartition` class.
        extract_metadata : bool, optional
            Whether the partitions list contains information about partition's metadata.
            If `None` was passed will take the argument's value from the value of `cls._PARTITIONS_METADATA_LEN`.

        Returns
        -------
        list
            List of wrapped remote partition objects.
        """
        assert self.partition_type is not None

        if extract_metadata is None:
            # If `_PARTITIONS_METADATA_LEN == 0` then the execution doesn't support metadata
            # and thus we should never try extracting it, otherwise assuming that the common
            # approach of always passing the metadata is used.
            extract_metadata = bool(self._PARTITIONS_METADATA_LEN)

        if extract_metadata:
            # Here we recieve a 1D array of futures describing partitions and their metadata as:
            # [object_id{partition_idx}, metadata{partition_idx}_{metadata_idx}, ...]
            # Here's an example of such array:
            # [
            #  object_id1, metadata1_1, metadata1_2, ..., metadata1_PARTITIONS_METADATA_LEN,
            #  object_id2, metadata2_1, ..., metadata2_PARTITIONS_METADATA_LEN,
            #  ...
            #  object_idN, metadataN_1, ..., metadataN_PARTITIONS_METADATA_LEN,
            # ]
            return [
                self.partition_type(*init_args)
                for init_args in zip(
                    # `partition_type` consumes `(object_id, *metadata)`, thus adding `+1`
                    *[iter(partitions)]
                    * (1 + self._PARTITIONS_METADATA_LEN)
                )
            ]
        else:
            return [self.partition_type(object_id) for object_id in partitions]

    def force_materialization(
        self, get_ip: bool = False
    ) -> "BaseDataframeAxisPartition":
        """
        Materialize axis partitions into a single partition.

        Parameters
        ----------
        get_ip : bool, default: False
            Whether to get node ip address to a single partition or not.

        Returns
        -------
        BaseDataframeAxisPartition
            An axis partition containing only a single materialized partition.
        """
        materialized = self.apply(
            lambda x: x, num_splits=1, maintain_partitioning=False
        )
        return type(self)(materialized, get_ip=get_ip)  # type: ignore[call-arg]

    def unwrap(
        self, squeeze: bool = False, get_ip: bool = False
    ) -> Union[list, Tuple[list, list]]:
        """
        Unwrap partitions from this axis partition.

        Parameters
        ----------
        squeeze : bool, default: False
            Flag used to unwrap only one partition.
        get_ip : bool, default: False
            Whether to get node ip address to each partition or not.

        Returns
        -------
        list
            List of partitions from this axis partition.

        Notes
        -----
        If `get_ip=True`, a tuple of lists of Ray.ObjectRef/Dask.Future to node ip addresses and
        unwrapped partitions, respectively, is returned if Ray/Dask is used as an engine
        (i.e. [(Ray.ObjectRef/Dask.Future, Ray.ObjectRef/Dask.Future), ...]).
        """
        if squeeze and len(self.list_of_blocks) == 1:
            if get_ip:
                # TODO(https://github.com/modin-project/modin/issues/5176): Stop ignoring the list_of_ips
                # check once we know that we're not calling list_of_ips on python axis partitions
                return self.list_of_ips[0], self.list_of_blocks[0]  # type: ignore[attr-defined]
            else:
                return self.list_of_blocks[0]
        else:
            if get_ip:
                return list(zip(self.list_of_ips, self.list_of_blocks))  # type: ignore[attr-defined]
            else:
                return self.list_of_blocks


================================================
FILE: modin/core/dataframe/pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe classes optimized for pandas storage format."""


================================================
FILE: modin/core/dataframe/pandas/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe class optimized for pandas storage format."""


================================================
FILE: modin/core/dataframe/pandas/dataframe/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains class PandasDataframe.

PandasDataframe is a parent abstract class for any dataframe class
for pandas storage format.
"""

from __future__ import annotations

import datetime
import re
from abc import ABC, abstractmethod
from functools import cached_property
from typing import TYPE_CHECKING, Callable, Dict, Hashable, List, Optional, Union

import numpy as np
import pandas
from pandas._libs.lib import no_default
from pandas.api.types import is_object_dtype
from pandas.core.dtypes.common import is_dtype_equal, is_list_like, is_numeric_dtype
from pandas.core.indexes.api import Index, RangeIndex

from modin.config import (
    IsRayCluster,
    MinColumnPartitionSize,
    MinRowPartitionSize,
    NPartitions,
)
from modin.core.dataframe.base.dataframe.dataframe import ModinDataframe
from modin.core.dataframe.base.dataframe.utils import Axis, JoinType, is_trivial_index
from modin.core.dataframe.pandas.dataframe.utils import (
    ShuffleSortFunctions,
    add_missing_categories_to_groupby,
    lazy_metadata_decorator,
)
from modin.core.dataframe.pandas.metadata import (
    DtypesDescriptor,
    LazyProxyCategoricalDtype,
    ModinDtypes,
    ModinIndex,
)
from modin.core.storage_formats.pandas.parsers import (
    find_common_type_cat as find_common_type,
)
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
from modin.core.storage_formats.pandas.utils import get_length_list
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger
from modin.logging.config import LogLevel
from modin.pandas.indexing import is_range_like
from modin.pandas.utils import (
    check_both_not_none,
    get_pandas_backend,
    is_full_grab_slice,
)
from modin.utils import MODIN_UNNAMED_SERIES_LABEL

if TYPE_CHECKING:
    from pandas._typing import npt

    from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
        ProtocolDataframe,
    )
    from modin.core.dataframe.pandas.partitioning.partition_manager import (
        PandasDataframePartitionManager,
    )


class PandasDataframe(
    ABC, ClassLogger, modin_layer="CORE-DATAFRAME", log_level=LogLevel.DEBUG
):
    """
    An abstract class that represents the parent class for any pandas storage format dataframe class.

    This class provides interfaces to run operations on dataframe partitions.

    Parameters
    ----------
    partitions : np.ndarray
        A 2D NumPy array of partitions.
    index : sequence or callable, optional
        The index for the dataframe. Converted to a ``pandas.Index``.
        Is computed from partitions on demand if not specified.
        If ``callable() -> (pandas.Index, list of row lengths or None)`` type,
        then the calculation will be delayed until `self.index` is called.
    columns : sequence, optional
        The columns object for the dataframe. Converted to a ``pandas.Index``.
        Is computed from partitions on demand if not specified.
    row_lengths : list, optional
        The length of each partition in the rows. The "height" of
        each of the block partitions. Is computed if not provided.
    column_widths : list, optional
        The width of each partition in the columns. The "width" of
        each of the block partitions. Is computed if not provided.
    dtypes : pandas.Series or callable, optional
        The data types for the dataframe columns.
    pandas_backend : {"pyarrow", None}, optional
        Backend used by pandas.
    """

    _partition_mgr_cls: PandasDataframePartitionManager
    _query_compiler_cls = PandasQueryCompiler
    # These properties flag whether or not we are deferring the metadata synchronization
    _deferred_index: bool = False
    _deferred_column: bool = False

    _index_cache: ModinIndex = None
    _columns_cache: ModinIndex = None
    _dtypes: Optional[ModinDtypes] = None
    _pandas_backend: Optional[str] = None

    @property
    def storage_format(self) -> str:
        """
        The storage format for this frame's data.

        Returns
        -------
        str
            The storage format.
        """
        return "Pandas"

    @property
    @abstractmethod
    def engine(self) -> str:
        """
        The engine for this frame.

        Returns
        -------
        str
            The engine.
        """
        pass

    @cached_property
    def __constructor__(self) -> type[PandasDataframe]:
        """
        Create a new instance of this object.

        Returns
        -------
        callable
        """
        return type(self)

    def __init__(
        self,
        partitions,
        index=None,
        columns=None,
        row_lengths=None,
        column_widths=None,
        dtypes: Optional[Union[pandas.Series, ModinDtypes, Callable]] = None,
        pandas_backend: Optional[str] = None,
    ):
        self._partitions = partitions
        self.set_index_cache(index)
        self.set_columns_cache(columns)
        self._row_lengths_cache = row_lengths
        self._column_widths_cache = column_widths
        self._pandas_backend = pandas_backend
        if pandas_backend != "pyarrow" or len(partitions) == 0:
            # If the backend is pyarrow and there are no partitions, the computed dtype otherwise becomes NaN,
            # which means we lost the dtype, so actually set it in that case
            self.set_dtypes_cache(dtypes)
        else:
            # In this case, the type precomputation may be incorrect; we need
            # to know the type algebra precisely. Considering the number of operations
            # and different combinations of backends, the best solution would be to
            # introduce optimizations gradually, with a large number of tests.
            self.set_dtypes_cache(None)

        self._validate_axes_lengths()
        self._filter_empties(compute_metadata=False)

    def _validate_axes_lengths(self):
        """Validate that labels are split correctly if split is known."""
        if (
            self._row_lengths_cache is not None
            and self.has_materialized_index
            and len(self.index) > 0
        ):
            # An empty frame can have 0 rows but a nonempty index. If the frame
            # does have rows, the number of rows must equal the size of the
            # index.
            num_rows = sum(self._row_lengths_cache)
            if num_rows > 0:
                ErrorMessage.catch_bugs_and_request_email(
                    num_rows != len(self.index),
                    f"Row lengths: {num_rows} != {len(self.index)}",
                )
            ErrorMessage.catch_bugs_and_request_email(
                any(val < 0 for val in self._row_lengths_cache),
                f"Row lengths cannot be negative: {self._row_lengths_cache}",
            )
        if (
            self._column_widths_cache is not None
            and self.has_materialized_columns
            and len(self.columns) > 0
        ):
            # An empty frame can have 0 column but a nonempty column index. If
            # the frame does have columns, the number of columns must equal the
            # size of the columns.
            num_columns = sum(self._column_widths_cache)
            if num_columns > 0:
                ErrorMessage.catch_bugs_and_request_email(
                    num_columns != len(self.columns),
                    f"Column widths: {num_columns} != {len(self.columns)}",
                )
            ErrorMessage.catch_bugs_and_request_email(
                any(val < 0 for val in self._column_widths_cache),
                f"Column widths cannot be negative: {self._column_widths_cache}",
            )

    @property
    def num_parts(self) -> int:
        """
        Get the total number of partitions for this frame.

        Returns
        -------
        int
        """
        return np.prod(self._partitions.shape)

    @property
    def row_lengths(self):
        """
        Compute the row partitions lengths if they are not cached.

        Returns
        -------
        list
            A list of row partitions lengths.
        """
        if self._row_lengths_cache is None:
            if len(self._partitions.T) > 0:
                row_parts = self._partitions.T[0]
                self._row_lengths_cache = self._get_lengths(row_parts, Axis.ROW_WISE)
            else:
                self._row_lengths_cache = []
        return self._row_lengths_cache

    @classmethod
    def _get_lengths(cls, parts, axis):
        """
        Get list of dimensions for all the provided parts.

        Parameters
        ----------
        parts : list
            List of parttions.
        axis : {0, 1}
            The axis along which to get the lengths (0 - length across rows or, 1 - width across columns).

        Returns
        -------
        list
        """
        if axis == Axis.ROW_WISE:
            return [part.length() for part in parts]
        else:
            return [part.width() for part in parts]

    def __len__(self) -> int:
        """
        Return length of index axis.

        Returns
        -------
        int
        """
        if self.has_materialized_index:
            _len = len(self.index)
        else:
            _len = sum(self.row_lengths)
        return _len

    @property
    def column_widths(self):
        """
        Compute the column partitions widths if they are not cached.

        Returns
        -------
        list
            A list of column partitions widths.
        """
        if self._column_widths_cache is None:
            if len(self._partitions) > 0:
                col_parts = self._partitions[0]
                self._column_widths_cache = self._get_lengths(col_parts, Axis.COL_WISE)
            else:
                self._column_widths_cache = []
        return self._column_widths_cache

    def _set_axis_lengths_cache(self, value, axis=0):
        """
        Set the row/column lengths cache for the specified axis.

        Parameters
        ----------
        value : list of ints
        axis : int, default: 0
            0 for row lengths and 1 for column widths.
        """
        if axis == 0:
            self._row_lengths_cache = value
        else:
            self._column_widths_cache = value

    def _get_axis_lengths_cache(self, axis=0):
        """
        Get partition's shape caches along the specified axis if avaliable.

        Parameters
        ----------
        axis : int, default: 0
            0 - get row lengths cache, 1 - get column widths cache.

        Returns
        -------
        list of ints or None
            If the cache is computed return a list of ints, ``None`` otherwise.
        """
        return self._row_lengths_cache if axis == 0 else self._column_widths_cache

    def _get_axis_lengths(self, axis: int = 0) -> List[int]:
        """
        Get row lengths/column widths.

        Parameters
        ----------
        axis : int, default: 0

        Returns
        -------
        list of ints
        """
        return self.row_lengths if axis == 0 else self.column_widths

    @property
    def has_dtypes_cache(self) -> bool:
        """
        Check if the dtypes cache exists.

        Returns
        -------
        bool
        """
        return self._dtypes is not None

    @property
    def has_materialized_dtypes(self) -> bool:
        """
        Check if dataframe has materialized index cache.

        Returns
        -------
        bool
        """
        return self.has_dtypes_cache and self._dtypes.is_materialized

    def copy_dtypes_cache(self):
        """
        Copy the dtypes cache.

        Returns
        -------
        pandas.Series, callable or None
            If there is an pandas.Series in the cache, then copying occurs.
        """
        dtypes_cache = None
        if self.has_dtypes_cache:
            dtypes_cache = self._dtypes.copy()
        return dtypes_cache

    def _maybe_update_proxies(self, dtypes, new_parent=None):
        """
        Update lazy proxies stored inside of `dtypes` with a new parent inplace.

        Parameters
        ----------
        dtypes : pandas.Series, ModinDtypes or callable
        new_parent : object, optional
            A new parent to link the proxies to. If not specified
            will consider the `self` to be a new parent.

        Returns
        -------
        pandas.Series, ModinDtypes or callable
        """
        new_parent = new_parent or self
        if isinstance(dtypes, ModinDtypes):
            dtypes = dtypes.maybe_specify_new_frame_ref(new_parent)
        if isinstance(dtypes, pandas.Series):
            LazyProxyCategoricalDtype.update_dtypes(dtypes, new_parent)
        return dtypes

    def set_dtypes_cache(self, dtypes):
        """
        Set dtypes cache.

        Parameters
        ----------
        dtypes : pandas.Series, ModinDtypes, callable or None
        """
        dtypes = self._maybe_update_proxies(dtypes)
        if dtypes is None and self.has_materialized_columns:
            # try to set a descriptor instead of 'None' to be more flexible in
            # dtypes computing
            try:
                self._dtypes = ModinDtypes(
                    DtypesDescriptor(
                        cols_with_unknown_dtypes=self.columns.tolist(), parent_df=self
                    )
                )
            except NotImplementedError:
                self._dtypes = None
        elif isinstance(dtypes, ModinDtypes) or dtypes is None:
            self._dtypes = dtypes
        else:
            self._dtypes = ModinDtypes(dtypes)

    @property
    def dtypes(self):
        """
        Compute the data types if they are not cached.

        Returns
        -------
        pandas.Series
            A pandas Series containing the data types for this dataframe.
        """
        if self.has_dtypes_cache:
            dtypes = self._dtypes.get()
        else:
            dtypes = self._compute_dtypes()
            self.set_dtypes_cache(dtypes)
            # During materialization, we can find out the backend and, if it
            # is suitable, use the ability to pre-calculate types.
            self._pandas_backend = get_pandas_backend(dtypes)
        return dtypes

    def get_dtypes_set(self):
        """
        Get a set of dtypes that are in this dataframe.

        Returns
        -------
        set
        """
        if isinstance(self._dtypes, ModinDtypes):
            return self._dtypes.get_dtypes_set()
        return set(self.dtypes.values)

    def _compute_dtypes(self, columns=None) -> pandas.Series:
        """
        Compute the data types via TreeReduce pattern for the specified columns.

        Parameters
        ----------
        columns : list-like, optional
            Columns to compute dtypes for. If not specified compute dtypes
            for all the columns in the dataframe.

        Returns
        -------
        pandas.Series
            A pandas Series containing the data types for this dataframe.
        """

        def dtype_builder(df):
            return df.apply(lambda col: find_common_type(col.values), axis=0)

        if columns is not None:
            # Sorting positions to request columns in the order they're stored (it's more efficient)
            numeric_indices = sorted(self.columns.get_indexer_for(columns))
            if any(pos < 0 for pos in numeric_indices):
                raise KeyError(
                    f"Some of the columns are not in index: subset={columns}; columns={self.columns}"
                )
            obj = self.take_2d_labels_or_positional(
                col_labels=self.columns[numeric_indices].tolist()
            )
        else:
            obj = self

        # For now we will use a pandas Series for the dtypes.
        if len(obj.columns) > 0:
            dtypes = (
                obj.tree_reduce(0, lambda df: df.dtypes, dtype_builder)
                .to_pandas()
                .iloc[0]
            )
        else:
            dtypes = pandas.Series([])
        # reset name to None because we use MODIN_UNNAMED_SERIES_LABEL internally
        dtypes.name = None
        return dtypes

    def set_index_cache(self, index):
        """
        Set index cache.

        Parameters
        ----------
        index : sequence, callable or None
        """
        if index is None:
            self._index_cache = ModinIndex(self, axis=0)
        elif isinstance(index, ModinIndex):
            # update reference with the new frame to not pollute memory
            self._index_cache = index.maybe_specify_new_frame_ref(self, axis=0)
        else:
            self._index_cache = ModinIndex(index)

    def set_columns_cache(self, columns):
        """
        Set columns cache.

        Parameters
        ----------
        columns : sequence, callable or None
        """
        if columns is None:
            self._columns_cache = ModinIndex(self, axis=1)
        elif isinstance(columns, ModinIndex):
            # update reference with the new frame to not pollute memory
            self._columns_cache = columns.maybe_specify_new_frame_ref(self, axis=1)
        else:
            self._columns_cache = ModinIndex(columns)

    def set_axis_cache(self, value, axis=0):
        """
        Set cache for the specified axis (index or columns).

        Parameters
        ----------
        value : sequence, callable or None
        axis : int, default: 0
        """
        if axis == 0:
            self.set_index_cache(value)
        else:
            self.set_columns_cache(value)

    def has_axis_cache(self, axis=0) -> bool:
        """
        Check if the cache for the specified axis exists.

        Parameters
        ----------
        axis : int, default: 0

        Returns
        -------
        bool
        """
        return self.has_index_cache if axis == 0 else self.has_columns_cache

    @property
    def has_index_cache(self):
        """
        Check if the index cache exists.

        Returns
        -------
        bool
        """
        return self._index_cache is not None

    def copy_index_cache(self, copy_lengths=False):
        """
        Copy the index cache.

        Parameters
        ----------
        copy_lengths : bool, default: False
            Whether to copy the stored partition lengths to the
            new index object.

        Returns
        -------
        pandas.Index, callable or ModinIndex
            If there is an pandas.Index in the cache, then copying occurs.
        """
        idx_cache = self._index_cache
        if self.has_index_cache:
            idx_cache = self._index_cache.copy(copy_lengths)
        return idx_cache

    def _get_axis_cache(self, axis=0) -> ModinIndex:
        """
        Get axis cache for the specified axis if available.

        Parameters
        ----------
        axis : int, default: 0

        Returns
        -------
        ModinIndex
        """
        return self._index_cache if axis == 0 else self._columns_cache

    @property
    def has_columns_cache(self):
        """
        Check if the columns cache exists.

        Returns
        -------
        bool
        """
        return self._columns_cache is not None

    def copy_columns_cache(self, copy_lengths=False):
        """
        Copy the columns cache.

        Parameters
        ----------
        copy_lengths : bool, default: False
            Whether to copy the stored partition lengths to the
            new index object.

        Returns
        -------
        pandas.Index or None
            If there is an pandas.Index in the cache, then copying occurs.
        """
        columns_cache = self._columns_cache
        if columns_cache is not None:
            columns_cache = columns_cache.copy(copy_lengths)
        return columns_cache

    def copy_axis_cache(self, axis=0, copy_lengths=False):
        """
        Copy the axis cache (index or columns).

        Parameters
        ----------
        axis : int, default: 0
        copy_lengths : bool, default: False
            Whether to copy the stored partition lengths to the
            new index object.

        Returns
        -------
        pandas.Index, callable or None
            If there is an pandas.Index in the cache, then copying occurs.
        """
        if axis == 0:
            return self.copy_index_cache(copy_lengths)
        else:
            return self.copy_columns_cache(copy_lengths)

    @property
    def has_materialized_index(self):
        """
        Check if dataframe has materialized index cache.

        Returns
        -------
        bool
        """
        return self.has_index_cache and self._index_cache.is_materialized

    @property
    def has_materialized_columns(self):
        """
        Check if dataframe has materialized columns cache.

        Returns
        -------
        bool
        """
        return self.has_columns_cache and self._columns_cache.is_materialized

    def _validate_set_axis(self, new_labels, old_labels):
        """
        Validate the possibility of replacement of old labels with the new labels.

        Parameters
        ----------
        new_labels : list-like
            The labels to replace with.
        old_labels : list-like
            The labels to replace.

        Returns
        -------
        list-like
            The validated labels.
        """
        new_labels = (
            ModinIndex(new_labels)
            if not isinstance(new_labels, ModinIndex)
            else new_labels
        )
        old_len = len(old_labels)
        new_len = len(new_labels)
        if old_len != new_len:
            raise ValueError(
                f"Length mismatch: Expected axis has {old_len} elements, "
                + f"new values have {new_len} elements"
            )
        return new_labels

    def _get_index(self):
        """
        Get the index from the cache object.

        Returns
        -------
        pandas.Index
            An index object containing the row labels.
        """
        if self.has_index_cache:
            index, row_lengths = self._index_cache.get(return_lengths=True)
        else:
            index, row_lengths = self._compute_axis_labels_and_lengths(0)
            self.set_index_cache(index)
        if self._row_lengths_cache is None:
            self._row_lengths_cache = row_lengths
        return index

    def _get_columns(self):
        """
        Get the columns from the cache object.

        Returns
        -------
        pandas.Index
            An index object containing the column labels.
        """
        if self.has_columns_cache:
            columns, column_widths = self._columns_cache.get(return_lengths=True)
        else:
            columns, column_widths = self._compute_axis_labels_and_lengths(1)
            self.set_columns_cache(columns)
        if self._column_widths_cache is None:
            self._column_widths_cache = column_widths
        return columns

    def _set_index(self, new_index):
        """
        Replace the current row labels with new labels.

        Parameters
        ----------
        new_index : list-like
            The new row labels.
        """
        if self.has_materialized_index:
            new_index = self._validate_set_axis(new_index, self._index_cache)
        self.set_index_cache(new_index)
        self.synchronize_labels(axis=0)

    def _set_columns(self, new_columns):
        """
        Replace the current column labels with new labels.

        Parameters
        ----------
        new_columns : list-like
           The new column labels.
        """
        if self.has_materialized_columns:
            # do not set new columns if they're identical to the previous ones
            if (
                isinstance(new_columns, pandas.Index)
                and self.columns.identical(new_columns)
            ) or (
                not isinstance(new_columns, pandas.Index)
                and np.array_equal(self.columns.values, new_columns)
            ):
                return
            new_columns = self._validate_set_axis(new_columns, self._columns_cache)
        if isinstance(self._dtypes, ModinDtypes):
            try:
                new_dtypes = self._dtypes.set_index(new_columns)
            except NotImplementedError:
                # can raise on duplicated labels
                new_dtypes = None
        elif isinstance(self._dtypes, pandas.Series):
            new_dtypes = self.dtypes.set_axis(new_columns)
        else:
            new_dtypes = None
        self.set_columns_cache(new_columns)
        # we have to set new dtypes cache after columns,
        # so the 'self.columns' and 'new_dtypes.index' indices would match
        self.set_dtypes_cache(new_dtypes)
        self.synchronize_labels(axis=1)

    columns = property(_get_columns, _set_columns)
    index = property(_get_index, _set_index)

    @property
    def axes(self):
        """
        Get index and columns that can be accessed with an `axis` integer.

        Returns
        -------
        list
            List with two values: index and columns.
        """
        return [self.index, self.columns]

    def get_axis(self, axis: int = 0) -> pandas.Index:
        """
        Get index object for the requested axis.

        Parameters
        ----------
        axis : {0, 1}, default: 0

        Returns
        -------
        pandas.Index
        """
        return self.index if axis == 0 else self.columns

    def _compute_axis_labels_and_lengths(self, axis: int, partitions=None):
        """
        Compute the labels for specific `axis`.

        Parameters
        ----------
        axis : int
            Axis to compute labels along.
        partitions : np.ndarray, optional
            A 2D NumPy array of partitions from which labels will be grabbed.
            If not specified, partitions will be taken from `self._partitions`.

        Returns
        -------
        pandas.Index
            Labels for the specified `axis`.
        List of int
            Size of partitions alongside specified `axis`.
        """
        if partitions is None:
            partitions = self._partitions
        new_index, internal_idx = self._partition_mgr_cls.get_indices(axis, partitions)
        return new_index, list(map(len, internal_idx))

    def _filter_empties(self, compute_metadata=True):
        """
        Remove empty partitions from `self._partitions` to avoid triggering excess computation.

        Parameters
        ----------
        compute_metadata : bool, default: True
            Trigger the computations for partition sizes and labels if they're not done already.
        """
        if not compute_metadata and (
            self._row_lengths_cache is None or self._column_widths_cache is None
        ):
            # do not trigger the computations
            return

        if (
            self.has_materialized_index
            and len(self.index) == 0
            or self.has_materialized_columns
            and len(self.columns) == 0
            or sum(self.row_lengths) == 0
            or sum(self.column_widths) == 0
        ):
            # This is the case for an empty frame. We don't want to completely remove
            # all metadata and partitions so for the moment, we won't prune if the frame
            # is empty.
            # TODO: Handle empty dataframes better
            return
        self._partitions = np.array(
            [
                [
                    self._partitions[i][j]
                    for j in range(len(self._partitions[i]))
                    if j < len(self.column_widths) and self.column_widths[j] != 0
                ]
                for i in range(len(self._partitions))
                if i < len(self.row_lengths) and self.row_lengths[i] != 0
            ]
        )
        new_col_widths = [w for w in self.column_widths if w != 0]
        new_row_lengths = [r for r in self.row_lengths if r != 0]

        # check whether an axis partitioning was modified and if we should reset the lengths id for 'ModinIndex'
        if new_col_widths != self.column_widths:
            self.set_columns_cache(self.copy_columns_cache(copy_lengths=False))
        if new_row_lengths != self.row_lengths:
            self.set_index_cache(self.copy_index_cache(copy_lengths=False))

        self._column_widths_cache = new_col_widths
        self._row_lengths_cache = new_row_lengths

    def synchronize_labels(self, axis=None):
        """
        Set the deferred axes variables for the ``PandasDataframe``.

        Parameters
        ----------
        axis : int, optional
            The deferred axis.
            0 for the index, 1 for the columns.
        """
        if axis is None:
            self._deferred_index = True
            self._deferred_column = True
        elif axis == 0:
            self._deferred_index = True
        else:
            self._deferred_column = True

    def _propagate_index_objs(self, axis=None) -> None:
        """
        Synchronize labels by applying the index object for specific `axis` to the `self._partitions` lazily.

        Adds `set_axis` function to call-queue of each partition from `self._partitions`
        to apply new axis.

        Parameters
        ----------
        axis : int, optional
            The axis to apply to. If it's None applies to both axes.
        """
        self._filter_empties(compute_metadata=False)
        if axis is None or axis == 0:
            cum_row_lengths = np.cumsum([0] + self.row_lengths)
        if axis is None or axis == 1:
            cum_col_widths = np.cumsum([0] + self.column_widths)

        if axis is None:

            def apply_idx_objs(df, idx, cols):
                # We should make at least one copy to avoid the data modification problem
                # that may arise when sharing buffers from distributed storage
                # (zero-copy pickling).
                return df.set_axis(idx, axis="index").set_axis(
                    cols, axis="columns", copy=False
                )

            self._partitions = np.array(
                [
                    [
                        self._partitions[i][j].add_to_apply_calls(
                            apply_idx_objs,
                            idx=self.index[
                                slice(cum_row_lengths[i], cum_row_lengths[i + 1])
                            ],
                            cols=self.columns[
                                slice(cum_col_widths[j], cum_col_widths[j + 1])
                            ],
                            length=self.row_lengths[i],
                            width=self.column_widths[j],
                        )
                        for j in range(len(self._partitions[i]))
                    ]
                    for i in range(len(self._partitions))
                ]
            )
            self._deferred_index = False
            self._deferred_column = False
        elif axis == 0:

            def apply_idx_objs(df, idx):
                return df.set_axis(idx, axis="index")

            self._partitions = np.array(
                [
                    [
                        self._partitions[i][j].add_to_apply_calls(
                            apply_idx_objs,
                            idx=self.index[
                                slice(cum_row_lengths[i], cum_row_lengths[i + 1])
                            ],
                            length=self.row_lengths[i],
                            width=(
                                self.column_widths[j]
                                if self._column_widths_cache is not None
                                else None
                            ),
                        )
                        for j in range(len(self._partitions[i]))
                    ]
                    for i in range(len(self._partitions))
                ]
            )
            self._deferred_index = False
        elif axis == 1:

            def apply_idx_objs(df, cols):
                return df.set_axis(cols, axis="columns")

            self._partitions = np.array(
                [
                    [
                        self._partitions[i][j].add_to_apply_calls(
                            apply_idx_objs,
                            cols=self.columns[
                                slice(cum_col_widths[j], cum_col_widths[j + 1])
                            ],
                            length=(
                                self.row_lengths[i]
                                if self._row_lengths_cache is not None
                                else None
                            ),
                            width=self.column_widths[j],
                        )
                        for j in range(len(self._partitions[i]))
                    ]
                    for i in range(len(self._partitions))
                ]
            )
            self._deferred_column = False
        else:
            ErrorMessage.catch_bugs_and_request_email(
                axis is not None and axis not in [0, 1]
            )

    @lazy_metadata_decorator(apply_axis=None)
    def take_2d_labels_or_positional(
        self,
        row_labels: Optional[List[Hashable]] = None,
        row_positions: Optional[List[int]] = None,
        col_labels: Optional[List[Hashable]] = None,
        col_positions: Optional[List[int]] = None,
    ) -> PandasDataframe:
        """
        Lazily select columns or rows from given indices.

        Parameters
        ----------
        row_labels : list of hashable, optional
            The row labels to extract.
        row_positions : list-like of ints, optional
            The row positions to extract.
        col_labels : list of hashable, optional
            The column labels to extract.
        col_positions : list-like of ints, optional
            The column positions to extract.

        Returns
        -------
        PandasDataframe
             A new PandasDataframe from the mask provided.

        Notes
        -----
        If both `row_labels` and `row_positions` are provided, a ValueError is raised.
        The same rule applies for `col_labels` and `col_positions`.
        """
        if check_both_not_none(row_labels, row_positions):
            raise ValueError(
                "Both row_labels and row_positions were provided - "
                + "please provide only one of row_labels and row_positions."
            )
        if check_both_not_none(col_labels, col_positions):
            raise ValueError(
                "Both col_labels and col_positions were provided - "
                + "please provide only one of col_labels and col_positions."
            )

        if row_labels is not None:
            # Get numpy array of positions of values from `row_labels`
            if isinstance(self.index, pandas.MultiIndex):
                row_positions = np.zeros(len(row_labels), dtype="int64")
                # we can't use .get_locs(row_labels) because the function
                # requires a different format for row_labels
                for idx, label in enumerate(row_labels):
                    if isinstance(label, str):
                        label = [label]
                    # get_loc can return slice that _take_2d_positional can't handle
                    row_positions[idx] = self.index.get_locs(label)[0]
            else:
                row_positions = self.index.get_indexer_for(row_labels)

        if col_labels is not None:
            # Get numpy array of positions of values from `col_labels`
            if isinstance(self.columns, pandas.MultiIndex):
                col_positions = np.zeros(len(col_labels), dtype="int64")
                # we can't use .get_locs(col_labels) because the function
                # requires a different format for row_labels
                for idx, label in enumerate(col_labels):
                    if isinstance(label, str):
                        label = [label]
                    # get_loc can return slice that _take_2d_positional can't handle
                    col_positions[idx] = self.columns.get_locs(label)[0]
            else:
                col_positions = self.columns.get_indexer_for(col_labels)

        return self._take_2d_positional(row_positions, col_positions)

    def _get_sorted_positions(self, positions):
        """
        Sort positions if necessary.

        Parameters
        ----------
        positions : Sequence[int]

        Returns
        -------
        Sequence[int]
        """
        # Helper for take_2d_positional
        if is_range_like(positions) and positions.step > 0:
            sorted_positions = positions
        else:
            sorted_positions = np.sort(positions)
        return sorted_positions

    def _get_new_lengths(self, partitions_dict, *, axis: int) -> List[int]:
        """
        Find lengths of new partitions.

        Parameters
        ----------
        partitions_dict : dict
        axis : int

        Returns
        -------
        list[int]
        """
        # Helper for take_2d_positional
        if axis == 0:
            axis_lengths = self.row_lengths
        else:
            axis_lengths = self.column_widths

        new_lengths = [
            len(
                # Row lengths for slice are calculated as the length of the slice
                # on the partition. Often this will be the same length as the current
                # length, but sometimes it is different, thus the extra calculation.
                range(*part_indexer.indices(axis_lengths[part_idx]))
                if isinstance(part_indexer, slice)
                else part_indexer
            )
            for part_idx, part_indexer in partitions_dict.items()
        ]
        return new_lengths

    def _get_new_index_obj(
        self, positions, sorted_positions, axis: int
    ) -> tuple[pandas.Index, slice | npt.NDArray[np.intp]]:
        """
        Find the new Index object for take_2d_positional result.

        Parameters
        ----------
        positions : Sequence[int]
        sorted_positions : Sequence[int]
        axis : int

        Returns
        -------
        pandas.Index
        slice or Sequence[int]
        """
        # Helper for take_2d_positional
        # Use the slice to calculate the new columns
        if axis == 0:
            idx = self.index
        else:
            idx = self.columns

        # TODO: Support fast processing of negative-step ranges
        if is_range_like(positions) and positions.step > 0:
            # pandas Index is more likely to preserve its metadata if the indexer
            #  is slice
            monotonic_idx = slice(positions.start, positions.stop, positions.step)
        else:
            monotonic_idx = np.asarray(sorted_positions, dtype=np.intp)

        new_idx = idx[monotonic_idx]
        return new_idx, monotonic_idx

    def _take_2d_positional(
        self,
        row_positions: Optional[List[int]] = None,
        col_positions: Optional[List[int]] = None,
    ) -> PandasDataframe:
        """
        Lazily select columns or rows from given indices.

        Parameters
        ----------
        row_positions : list-like of ints, optional
            The row positions to extract.
        col_positions : list-like of ints, optional
            The column positions to extract.

        Returns
        -------
        PandasDataframe
             A new PandasDataframe from the mask provided.
        """
        indexers = []
        for axis, indexer in enumerate((row_positions, col_positions)):
            if is_range_like(indexer):
                if indexer.step == 1 and len(indexer) == len(self.get_axis(axis)):
                    # By this function semantics, `None` indexer is a full-axis access
                    indexer = None
                elif indexer is not None and not isinstance(indexer, pandas.RangeIndex):
                    # Pure python's range is not fully compatible with a list of ints,
                    # converting it to ``pandas.RangeIndex``` that is compatible.
                    indexer = pandas.RangeIndex(
                        indexer.start, indexer.stop, indexer.step
                    )
            else:
                ErrorMessage.catch_bugs_and_request_email(
                    failure_condition=not (indexer is None or is_list_like(indexer)),
                    extra_log="Mask takes only list-like numeric indexers, "
                    + f"received: {type(indexer)}",
                )
                if isinstance(indexer, list):
                    indexer = np.array(indexer, dtype=np.int64)
            indexers.append(indexer)
        row_positions, col_positions = indexers

        if col_positions is None and row_positions is None:
            return self.copy()

        # quite fast check that allows skip sorting
        must_sort_row_pos = row_positions is not None and not np.all(
            row_positions[1:] >= row_positions[:-1]
        )
        must_sort_col_pos = col_positions is not None and not np.all(
            col_positions[1:] >= col_positions[:-1]
        )

        if col_positions is None and row_positions is not None:
            # Check if the optimization that first takes part of the data using the mask
            # operation so that later less data is concatenated into a whole column is useful.
            # In the case when only a small portion of the data is discarded, the overhead of the
            # engine (for putting data in and out of storage) can exceed the resulting speedup.
            all_rows = None
            if self.has_materialized_index:
                all_rows = len(self.index)
            elif self._row_lengths_cache or must_sort_row_pos:
                all_rows = sum(self.row_lengths)

            # 'base_num_cols' specifies the number of columns that the dataframe should have
            # in order to jump to 'reordered_labels' in case of len(row_positions) / len(self) >= base_ratio;
            # these variables may be a subject to change in order to tune performance more accurately
            base_num_cols = 10
            base_ratio = 0.2
            # Example:
            #   len(self.columns): 10 == base_num_cols -> min ratio to jump to reorder_labels: 0.2 == base_ratio
            #   len(self.columns): 15 -> min ratio to jump to reorder_labels: 0.3
            #   len(self.columns): 20 -> min ratio to jump to reorder_labels: 0.4
            #   ...
            #   len(self.columns): 49 -> min ratio to jump to reorder_labels: 0.98
            #   len(self.columns): 50 -> min ratio to jump to reorder_labels: 1.0
            #   len(self.columns): 55 -> min ratio to jump to reorder_labels: 1.0
            #   ...
            if (all_rows and len(row_positions) > 0.9 * all_rows) or (
                must_sort_row_pos
                and len(row_positions) * base_num_cols
                >= min(
                    all_rows * len(self.columns) * base_ratio,
                    len(row_positions) * base_num_cols,
                )
            ):
                return self._reorder_labels(
                    row_positions=row_positions, col_positions=col_positions
                )
        sorted_row_positions = sorted_col_positions = None
        if row_positions is not None:
            if must_sort_row_pos:
                sorted_row_positions = self._get_sorted_positions(row_positions)
            else:
                sorted_row_positions = row_positions
            # Get dict of row_parts as {row_index: row_internal_indices}
            row_partitions_dict = self._get_dict_of_block_index(
                0, sorted_row_positions, are_indices_sorted=True
            )
            new_row_lengths = self._get_new_lengths(row_partitions_dict, axis=0)
            new_index, _ = self._get_new_index_obj(
                row_positions, sorted_row_positions, axis=0
            )
        else:
            row_partitions_dict = {i: slice(None) for i in range(len(self._partitions))}
            new_row_lengths = self._row_lengths_cache
            new_index = self.copy_index_cache(copy_lengths=True)

        if col_positions is not None:
            if must_sort_col_pos:
                sorted_col_positions = self._get_sorted_positions(col_positions)
            else:
                sorted_col_positions = col_positions
            # Get dict of col_parts as {col_index: col_internal_indices}
            col_partitions_dict = self._get_dict_of_block_index(
                1, sorted_col_positions, are_indices_sorted=True
            )
            new_col_widths = self._get_new_lengths(col_partitions_dict, axis=1)
            new_columns, monotonic_col_idx = self._get_new_index_obj(
                col_positions, sorted_col_positions, axis=1
            )

            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=sum(new_col_widths) != len(new_columns),
                extra_log=f"{sum(new_col_widths)} != {len(new_columns)}.\n"
                + f"{col_positions}\n{self.column_widths}\n{col_partitions_dict}",
            )

            if self.has_materialized_dtypes:
                new_dtypes = self.dtypes.iloc[monotonic_col_idx]
            elif isinstance(self._dtypes, ModinDtypes):
                try:
                    supported_monotonic_col_idx = monotonic_col_idx
                    if isinstance(monotonic_col_idx, slice):
                        supported_monotonic_col_idx = pandas.RangeIndex(
                            monotonic_col_idx.start,
                            monotonic_col_idx.stop,
                            monotonic_col_idx.step,
                        ).to_list()
                    new_dtypes = self._dtypes.lazy_get(
                        supported_monotonic_col_idx, numeric_index=True
                    )
                # can raise either on missing cache or on duplicated labels
                except (ValueError, NotImplementedError):
                    new_dtypes = None
            else:
                new_dtypes = None
        else:
            col_partitions_dict = {
                i: slice(None) for i in range(len(self._partitions.T))
            }
            new_col_widths = self._column_widths_cache
            new_columns = self.copy_columns_cache(copy_lengths=True)
            new_dtypes = self.copy_dtypes_cache()

        new_partitions = np.array(
            [
                [
                    self._partitions[row_idx][col_idx].mask(
                        row_internal_indices, col_internal_indices
                    )
                    for col_idx, col_internal_indices in col_partitions_dict.items()
                ]
                for row_idx, row_internal_indices in row_partitions_dict.items()
            ]
        )
        intermediate = self.__constructor__(
            new_partitions,
            new_index,
            new_columns,
            new_row_lengths,
            new_col_widths,
            new_dtypes,
            pandas_backend=self._pandas_backend,
        )

        return self._maybe_reorder_labels(
            intermediate,
            row_positions,
            col_positions,
        )

    def _maybe_reorder_labels(
        self,
        intermediate: PandasDataframe,
        row_positions,
        col_positions,
    ) -> PandasDataframe:
        """
        Call re-order labels on take_2d_labels_or_positional result if necessary.

        Parameters
        ----------
        intermediate : PandasDataFrame
        row_positions : list-like of ints, optional
            The row positions to extract.
        col_positions : list-like of ints, optional
            The column positions to extract.

        Returns
        -------
        PandasDataframe
        """
        # Check if monotonically increasing, return if it is. Fast track code path for
        # common case to keep it fast.
        if (
            row_positions is None
            # Fast range processing of non-positive-step ranges is not yet supported
            or (is_range_like(row_positions) and row_positions.step > 0)
            or len(row_positions) == 1
            or np.all(row_positions[1:] >= row_positions[:-1])
        ) and (
            col_positions is None
            # Fast range processing of non-positive-step ranges is not yet supported
            or (is_range_like(col_positions) and col_positions.step > 0)
            or len(col_positions) == 1
            or np.all(col_positions[1:] >= col_positions[:-1])
        ):
            return intermediate

        # The new labels are often smaller than the old labels, so we can't reuse the
        # original order values because those were mapped to the original data. We have
        # to reorder here based on the expected order from within the data.
        # To do so, we "unsort" the indices by using np.argsort() twice, as inspired by
        # https://stackoverflow.com/questions/2483696/undo-or-reverse-argsort-python,
        # meaning that `new_row_order` must be so `np.sort(row_positions)[new_row_order] == row_positions`
        # This is achieved by first calculating the indices which would sort `row_positions`,
        # and then by calculating new indices that would sort "sorting indices" themselves.
        # First argsort brings us to the proper "index space" (according to smaller labels count),
        # and the second re-orders them to match the original data.
        new_row_order, new_col_order = None, None
        if is_range_like(row_positions):
            if row_positions.step < 0:
                # do not need to re-order positive-step-ranges
                new_row_order = pandas.RangeIndex(len(row_positions) - 1, -1, -1)
        elif row_positions is not None:
            new_row_order = np.argsort(
                np.argsort(np.asarray(row_positions, dtype=np.intp))
            )
        if is_range_like(col_positions):
            if col_positions.step < 0:
                new_col_order = pandas.RangeIndex(len(col_positions) - 1, -1, -1)
        elif col_positions is not None:
            new_col_order = np.argsort(
                np.argsort(np.asarray(col_positions, dtype=np.intp))
            )
        return intermediate._reorder_labels(
            row_positions=new_row_order, col_positions=new_col_order
        )

    @lazy_metadata_decorator(apply_axis="rows")
    def from_labels(self) -> PandasDataframe:
        """
        Convert the row labels to a column of data, inserted at the first position.

        Gives result by similar way as `pandas.DataFrame.reset_index`. Each level
        of `self.index` will be added as separate column of data.

        Returns
        -------
        PandasDataframe
            A PandasDataframe with new columns from index labels.
        """
        new_row_labels = pandas.RangeIndex(len(self.index))
        if self.index.nlevels > 1:
            level_names = [
                (
                    self.index.names[i]
                    if self.index.names[i] is not None
                    else "level_{}".format(i)
                )
                for i in range(self.index.nlevels)
            ]
        else:
            level_names = [
                (
                    self.index.names[0]
                    if self.index.names[0] is not None
                    else (
                        "index" if "index" not in self.columns else "level_{}".format(0)
                    )
                )
            ]
        names = tuple(level_names) if len(level_names) > 1 else level_names[0]
        new_dtypes = self.index.to_frame(name=names).dtypes
        try:
            new_dtypes = ModinDtypes.concat([new_dtypes, self._dtypes])
        except NotImplementedError:
            # can raise on duplicated labels
            new_dtypes = None

        # We will also use the `new_column_names` in the calculation of the internal metadata, so this is a
        # lightweight way of ensuring the metadata matches.
        if self.columns.nlevels > 1:
            # Column labels are different for multilevel index.
            new_column_names = pandas.MultiIndex.from_tuples(
                # Set level names on the 1st columns level and fill up empty level names with empty string.
                # Expand tuples in level names. This is how reset_index works when col_level col_fill are not specified.
                [
                    tuple(
                        list(level) + [""] * (self.columns.nlevels - len(level))
                        if isinstance(level, tuple)
                        else [level] + [""] * (self.columns.nlevels - 1)
                    )
                    for level in level_names
                ],
                names=self.columns.names,
            )
        else:
            new_column_names = pandas.Index(level_names, tupleize_cols=False)
        new_columns = new_column_names.append(self.columns)

        def from_labels_executor(
            df: pandas.DataFrame, **kwargs
        ) -> pandas.DataFrame:  # pragma: no cover
            # Setting the names here ensures that external and internal metadata always match.
            df.index.names = new_column_names

            # Handling of a case when columns have the same name as one of index levels names.
            # In this case `df.reset_index` provides errors related to columns duplication.
            # This case is possible because columns metadata updating is deferred. To workaround
            # `df.reset_index` error we allow columns duplication in "if" branch via `concat`.
            if any(name_level in df.columns for name_level in df.index.names):
                columns_to_add = df.index.to_frame()
                columns_to_add.reset_index(drop=True, inplace=True)
                df = df.reset_index(drop=True)
                result = pandas.concat([columns_to_add, df], axis=1, copy=False)
            else:
                result = df.reset_index()
            # Put the index back to the original due to GH#4394
            result.index = df.index
            return result

        new_parts = self._partition_mgr_cls.apply_func_to_select_indices(
            0,
            self._partitions,
            from_labels_executor,
            [0],
            keep_remaining=True,
        )
        new_column_widths = [
            self.index.nlevels + self.column_widths[0]
        ] + self.column_widths[1:]
        result = self.__constructor__(
            new_parts,
            new_row_labels,
            new_columns,
            row_lengths=self._row_lengths_cache,
            column_widths=new_column_widths,
            dtypes=new_dtypes,
            pandas_backend=self._pandas_backend,
        )
        # Set flag for propagating deferred row labels across dataframe partitions
        result.synchronize_labels(axis=0)
        return result

    def to_labels(self, column_list: List[Hashable]) -> PandasDataframe:
        """
        Move one or more columns into the row labels. Previous labels are dropped.

        Parameters
        ----------
        column_list : list of hashable
            The list of column names to place as the new row labels.

        Returns
        -------
        PandasDataframe
            A new PandasDataframe that has the updated labels.
        """
        extracted_columns = self.take_2d_labels_or_positional(
            col_labels=column_list
        ).to_pandas()

        if len(column_list) == 1:
            new_labels = pandas.Index(
                extracted_columns.squeeze(axis=1), name=column_list[0]
            )
        else:
            new_labels = pandas.MultiIndex.from_frame(
                extracted_columns, names=column_list
            )
        result = self.take_2d_labels_or_positional(
            col_labels=[i for i in self.columns if i not in extracted_columns.columns]
        )
        result.index = new_labels
        return result

    @lazy_metadata_decorator(apply_axis="both")
    def _reorder_labels(self, row_positions=None, col_positions=None):
        """
        Reorder the column and or rows in this DataFrame.

        Parameters
        ----------
        row_positions : list of int, optional
            The ordered list of new row orders such that each position within the list
            indicates the new position.
        col_positions : list of int, optional
            The ordered list of new column orders such that each position within the
            list indicates the new position.

        Returns
        -------
        PandasDataframe
            A new PandasDataframe with reordered columns and/or rows.
        """
        new_dtypes = self.copy_dtypes_cache()
        if row_positions is not None:
            # We want to preserve the frame's partitioning so passing in ``keep_partitioning=True``
            # in order to use the cached `row_lengths` values for the new frame.
            # If the frame's is re-partitioned using the "standard" partitioning,
            # then knowing that, we can compute new row lengths.
            ordered_rows = self._partition_mgr_cls.map_axis_partitions(
                0,
                self._partitions,
                lambda df: df.iloc[row_positions],
                keep_partitioning=True,
            )
            row_idx = self.index[row_positions]

            if len(row_idx) != len(self.index):
                # The frame was re-partitioned along the 0 axis during reordering using
                # the "standard" partitioning. Knowing the standard partitioning scheme
                # we are able to compute new row lengths.
                new_lengths = get_length_list(
                    axis_len=len(row_idx),
                    num_splits=ordered_rows.shape[0],
                    min_block_size=MinRowPartitionSize.get(),
                )
            else:
                # If the frame's partitioning was preserved then
                # we can use previous row lengths cache
                new_lengths = self._row_lengths_cache
        else:
            ordered_rows = self._partitions
            row_idx = self.copy_index_cache(copy_lengths=True)
            new_lengths = self._row_lengths_cache
        if col_positions is not None:
            # We want to preserve the frame's partitioning so passing in ``keep_partitioning=True``
            # in order to use the cached `column_widths` values for the new frame.
            # If the frame's is re-partitioned using the "standard" partitioning,
            # then knowing that, we can compute new column widths.
            ordered_cols = self._partition_mgr_cls.map_axis_partitions(
                1,
                ordered_rows,
                lambda df: df.iloc[:, col_positions],
                keep_partitioning=True,
            )
            col_idx = self.columns[col_positions]
            if self.has_materialized_dtypes:
                new_dtypes = self.dtypes.iloc[col_positions]
            elif isinstance(self._dtypes, ModinDtypes):
                try:
                    new_dtypes = self._dtypes.lazy_get(col_idx)
                # can raise on duplicated labels
                except NotImplementedError:
                    new_dtypes = None

            if len(col_idx) != len(self.columns):
                # The frame was re-partitioned along the 1 axis during reordering using
                # the "standard" partitioning. Knowing the standard partitioning scheme
                # we are able to compute new column widths.
                new_widths = get_length_list(
                    axis_len=len(col_idx),
                    num_splits=ordered_cols.shape[1],
                    min_block_size=MinColumnPartitionSize.get(),
                )
            else:
                # If the frame's partitioning was preserved then
                # we can use previous column widths cache
                new_widths = self._column_widths_cache
        else:
            ordered_cols = ordered_rows
            col_idx = self.copy_columns_cache(copy_lengths=True)
            new_widths = self._column_widths_cache
        return self.__constructor__(
            ordered_cols,
            row_idx,
            col_idx,
            new_lengths,
            new_widths,
            new_dtypes,
            pandas_backend=self._pandas_backend,
        )

    @lazy_metadata_decorator(apply_axis=None)
    def copy(self):
        """
        Copy this object.

        Returns
        -------
        PandasDataframe
            A copied version of this object.
        """
        return self.__constructor__(
            self._partitions,
            self.copy_index_cache(copy_lengths=True),
            self.copy_columns_cache(copy_lengths=True),
            self._row_lengths_cache,
            self._column_widths_cache,
            self.copy_dtypes_cache(),
            pandas_backend=self._pandas_backend,
        )

    @lazy_metadata_decorator(apply_axis="both")
    def astype(self, col_dtypes, errors: str = "raise"):
        """
        Convert the columns dtypes to given dtypes.

        Parameters
        ----------
        col_dtypes : dictionary of {col: dtype,...} or str
            Where col is the column name and dtype is a NumPy dtype.
        errors : {'raise', 'ignore'}, default: 'raise'
            Control raising of exceptions on invalid data for provided dtype.

        Returns
        -------
        BaseDataFrame
            Dataframe with updated dtypes.
        """
        new_dtypes = None
        self_dtypes = self.dtypes
        # When casting to "category" we have to make up the whole axis partition
        # to get the properly encoded table of categories. Every block partition
        # will store the encoded table. That can lead to higher memory footprint.
        # TODO: Revisit if this hurts users.
        use_full_axis_cast = False
        if isinstance(col_dtypes, dict):
            for column, dtype in col_dtypes.items():
                if not is_dtype_equal(dtype, self_dtypes[column]):
                    if new_dtypes is None:
                        new_dtypes = self_dtypes.copy()
                    # Update the new dtype series to the proper pandas dtype
                    new_dtype = pandas.api.types.pandas_dtype(dtype)
                    if self.engine == "Dask" and hasattr(dtype, "_is_materialized"):
                        # FIXME: https://github.com/dask/distributed/issues/8585
                        _ = dtype._materialize_categories()

                    # We cannot infer without computing the dtype if new dtype is categorical
                    if isinstance(new_dtype, pandas.CategoricalDtype):
                        new_dtypes[column] = LazyProxyCategoricalDtype._build_proxy(
                            # Actual parent will substitute `None` at `.set_dtypes_cache`
                            parent=None,
                            column_name=column,
                            materializer=lambda parent, column: parent._compute_dtypes(
                                columns=[column]
                            )[column],
                        )
                        use_full_axis_cast = True
                    else:
                        new_dtypes[column] = new_dtype

            def astype_builder(df):
                """Compute new partition frame with dtypes updated."""
                return df.astype(
                    {k: v for k, v in col_dtypes.items() if k in df}, errors=errors
                )

        else:
            # Assume that the dtype is a scalar.
            if not (self_dtypes == col_dtypes).all():
                new_dtypes = self_dtypes.copy()
                new_dtype = pandas.api.types.pandas_dtype(col_dtypes)
                if self.engine == "Dask" and hasattr(new_dtype, "_is_materialized"):
                    # FIXME: https://github.com/dask/distributed/issues/8585
                    _ = new_dtype._materialize_categories()
                if isinstance(new_dtype, pandas.CategoricalDtype):
                    new_dtypes[:] = new_dtypes.to_frame().apply(
                        lambda column: LazyProxyCategoricalDtype._build_proxy(
                            # Actual parent will substitute `None` at `.set_dtypes_cache`
                            parent=None,
                            column_name=column.index[0],
                            materializer=lambda parent, column: parent._compute_dtypes(
                                columns=[column]
                            )[column],
                        )
                    )[0]
                    use_full_axis_cast = True
                else:
                    new_dtypes[:] = new_dtype

            def astype_builder(df):
                """Compute new partition frame with dtypes updated."""
                return df.astype(col_dtypes, errors=errors)

        if new_dtypes is None:
            return self.copy()
        if use_full_axis_cast:
            new_frame = self._partition_mgr_cls.map_axis_partitions(
                0, self._partitions, astype_builder, keep_partitioning=True
            )
        else:
            new_frame = self._partition_mgr_cls.lazy_map_partitions(
                self._partitions, astype_builder
            )
        return self.__constructor__(
            new_frame,
            self.copy_index_cache(copy_lengths=True),
            self.copy_columns_cache(copy_lengths=True),
            self._row_lengths_cache,
            self._column_widths_cache,
            new_dtypes,
            pandas_backend=get_pandas_backend(new_dtypes),
        )

    def numeric_columns(self, include_bool=True):
        """
        Return the names of numeric columns in the frame.

        Parameters
        ----------
        include_bool : bool, default: True
            Whether to consider boolean columns as numeric.

        Returns
        -------
        list
            List of column names.
        """
        columns = []
        for col, dtype in zip(self.columns, self.dtypes):
            if is_numeric_dtype(dtype) and (
                include_bool or (not include_bool and dtype != np.bool_)
            ):
                columns.append(col)
        return columns

    def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
        """
        Convert indices to an ordered dict mapping partition (or block) index to internal indices in said partition.

        Parameters
        ----------
        axis : {0, 1}
            The axis along which to get the indices (0 - rows, 1 - columns).
        indices : list of int, slice
            A list of global indices to convert.
        are_indices_sorted : bool, default: False
            Flag indicating whether the `indices` sequence is sorted by ascending or not.
            Note: the internal algorithm requires for the `indices` to be sorted, this
            flag is used for optimization in order to not sort already sorted data.
            Be careful when passing ``True`` for this flag, if the data appears to be unsorted
            with the flag set to ``True`` this would lead to undefined behavior.

        Returns
        -------
        dict
            A mapping from partition index to list of internal indices which correspond to `indices` in each
            partition.
        """
        # TODO: Support handling of slices with specified 'step'. For now, converting them into a range
        if isinstance(indices, slice) and (
            indices.step is not None and indices.step != 1
        ):
            indices = range(*indices.indices(len(self.get_axis(axis))))
        # Fasttrack slices
        if isinstance(indices, slice) or (is_range_like(indices) and indices.step == 1):
            # Converting range-like indexer to slice
            indices = slice(indices.start, indices.stop, indices.step)
            if is_full_grab_slice(indices, sequence_len=len(self.get_axis(axis))):
                return dict(
                    zip(
                        range(self._partitions.shape[axis]),
                        [slice(None)] * self._partitions.shape[axis],
                    )
                )
            # Empty selection case
            if indices.start == indices.stop and indices.start is not None:
                return dict()
            if indices.start is None or indices.start == 0:
                last_part, last_idx = list(
                    self._get_dict_of_block_index(axis, [indices.stop]).items()
                )[0]
                dict_of_slices = dict(zip(range(last_part), [slice(None)] * last_part))
                dict_of_slices.update({last_part: slice(last_idx[0])})
                return dict_of_slices
            elif indices.stop is None or indices.stop >= len(self.get_axis(axis)):
                first_part, first_idx = list(
                    self._get_dict_of_block_index(axis, [indices.start]).items()
                )[0]
                dict_of_slices = dict({first_part: slice(first_idx[0], None)})
                num_partitions = np.size(self._partitions, axis=axis)
                part_list = range(first_part + 1, num_partitions)
                dict_of_slices.update(
                    dict(zip(part_list, [slice(None)] * len(part_list)))
                )
                return dict_of_slices
            else:
                first_part, first_idx = list(
                    self._get_dict_of_block_index(axis, [indices.start]).items()
                )[0]
                last_part, last_idx = list(
                    self._get_dict_of_block_index(axis, [indices.stop]).items()
                )[0]
                if first_part == last_part:
                    return dict({first_part: slice(first_idx[0], last_idx[0])})
                else:
                    if last_part - first_part == 1:
                        return dict(
                            # FIXME: this dictionary creation feels wrong - it might not maintain the order
                            {
                                first_part: slice(first_idx[0], None),
                                last_part: slice(None, last_idx[0]),
                            }
                        )
                    else:
                        dict_of_slices = dict({first_part: slice(first_idx[0], None)})
                        part_list = range(first_part + 1, last_part)
                        dict_of_slices.update(
                            dict(zip(part_list, [slice(None)] * len(part_list)))
                        )
                        dict_of_slices.update({last_part: slice(None, last_idx[0])})
                        return dict_of_slices
        if isinstance(indices, list):
            # Converting python list to numpy for faster processing
            indices = np.array(indices, dtype=np.int64)
        # Fasttrack empty numpy array
        if isinstance(indices, np.ndarray) and indices.size == 0:
            # This will help preserve metadata stored in empty dataframes (indexes and dtypes)
            # Otherwise, we will get an empty `new_partitions` array, from which it will
            #  no longer be possible to obtain metadata
            return dict([(0, np.array([], dtype=np.int64))])
        negative_mask = np.less(indices, 0)
        has_negative = np.any(negative_mask)
        if has_negative:
            # We're going to modify 'indices' inplace in a numpy way, so doing a copy/converting indices to numpy.
            indices = (
                indices.copy()
                if isinstance(indices, np.ndarray)
                else np.array(indices, dtype=np.int64)
            )
            indices[negative_mask] = indices[negative_mask] % len(self.get_axis(axis))
        # If the `indices` array was modified because of the negative indices conversion
        # then the original order was broken and so we have to sort anyway:
        if has_negative or not are_indices_sorted:
            indices = np.sort(indices)
        if axis == 0:
            bins = np.array(self.row_lengths)
        else:
            bins = np.array(self.column_widths)
        # INT_MAX to make sure we don't try to compute on partitions that don't exist.
        cumulative = np.append(bins[:-1].cumsum(), np.iinfo(bins.dtype).max)

        def internal(block_idx: int, global_index):
            """Transform global index to internal one for given block (identified by its index)."""
            return (
                global_index
                if not block_idx
                else np.subtract(
                    global_index, cumulative[min(block_idx, len(cumulative) - 1) - 1]
                )
            )

        partition_ids = np.digitize(indices, cumulative)
        count_for_each_partition = np.array(
            [(partition_ids == i).sum() for i in range(len(cumulative))]
        ).cumsum()
        # Compute the internal indices and pair those with the partition index.
        # If the first partition has any values we need to return, compute those
        # first to make the list comprehension easier. Otherwise, just append the
        # rest of the values to an empty list.
        if count_for_each_partition[0] > 0:
            first_partition_indices = [
                (0, internal(0, indices[slice(count_for_each_partition[0])]))
            ]
        else:
            first_partition_indices = []
        partition_ids_with_indices = first_partition_indices + [
            (
                i,
                internal(
                    i,
                    indices[
                        slice(
                            count_for_each_partition[i - 1],
                            count_for_each_partition[i],
                        )
                    ],
                ),
            )
            for i in range(1, len(count_for_each_partition))
            if count_for_each_partition[i] > count_for_each_partition[i - 1]
        ]
        return dict(partition_ids_with_indices)

    @staticmethod
    def _join_index_objects(axis, indexes, how, sort, fill_value=None):
        """
        Join the pair of index objects (columns or rows) by a given strategy.

        Unlike Index.join() in pandas, if `axis` is 1, `sort` is False,
        and `how` is "outer", the result will _not_ be sorted.

        Parameters
        ----------
        axis : {0, 1}
            The axis index object to join (0 - rows, 1 - columns).
        indexes : list(Index)
            The indexes to join on.
        how : {'left', 'right', 'inner', 'outer', None}
            The type of join to join to make. If `None` then joined index
            considered to be the first index in the `indexes` list.
        sort : boolean
            Whether or not to sort the joined index.
        fill_value : any, optional
            Value to use for missing values.

        Returns
        -------
        (Index, func)
            Joined index with make_reindexer func.
        """
        assert isinstance(indexes, list)

        # define helper functions
        def merge(left_index, right_index):
            """Combine a pair of indices depending on `axis`, `how` and `sort` from outside."""
            if axis == 1 and how == "outer" and not sort:
                return left_index.union(right_index, sort=False)
            else:
                return left_index.join(right_index, how=how, sort=sort)

        # define condition for joining indexes
        all_indices_equal = all(indexes[0].equals(index) for index in indexes[1:])
        do_join_index = how is not None and not all_indices_equal

        # define condition for joining indexes with getting indexers
        need_indexers = (
            axis == 0
            and not all_indices_equal
            and any(not index.is_unique for index in indexes)
        )
        indexers = None

        # perform joining indexes
        if do_join_index:
            if len(indexes) == 2 and need_indexers:
                # in case of count of indexes > 2 we should perform joining all indexes
                # after that get indexers
                # in the fast path we can obtain joined_index and indexers in one call
                indexers = [None, None]
                joined_index, indexers[0], indexers[1] = indexes[0].join(
                    indexes[1], how=how, sort=sort, return_indexers=True
                )
            else:
                joined_index = indexes[0]
                # TODO: revisit for performance
                for index in indexes[1:]:
                    joined_index = merge(joined_index, index)
        else:
            joined_index = indexes[0].copy()

        if need_indexers and indexers is None:
            indexers = [index.get_indexer_for(joined_index) for index in indexes]

        def make_reindexer(do_reindex: bool, frame_idx: int):
            """Create callback that reindexes the dataframe using newly computed index."""
            # the order of the frames must match the order of the indexes
            if not do_reindex:
                return lambda df: df

            if need_indexers:
                assert indexers is not None

                return lambda df: df._reindex_with_indexers(
                    {0: [joined_index, indexers[frame_idx]]},
                    copy=True,
                    allow_dups=True,
                    fill_value=fill_value,
                )
            return lambda df: df.reindex(joined_index, axis=axis, fill_value=fill_value)

        return joined_index, make_reindexer

    # Internal methods
    # These methods are for building the correct answer in a modular way.
    # Please be careful when changing these!

    def _build_treereduce_func(self, axis, func):
        """
        Properly formats a TreeReduce result so that the partitioning is correct.

        Parameters
        ----------
        axis : int
            The axis along which to apply the function.
        func : callable
            The function to apply.

        Returns
        -------
        callable
            A function to be shipped to the partitions to be executed.

        Notes
        -----
        This should be used for any TreeReduce style operation that results in a
        reduced data dimensionality (dataframe -> series).
        """

        def _tree_reduce_func(df, *args, **kwargs):
            """Tree-reducer function itself executing `func`, presenting the resulting pandas.Series as pandas.DataFrame."""
            series_result = func(df, *args, **kwargs)
            if axis == 0 and isinstance(series_result, pandas.Series):
                # In the case of axis=0, we need to keep the shape of the data
                # consistent with what we have done. In the case of a reduce, the
                # data for axis=0 should be a single value for each column. By
                # transposing the data after we convert to a DataFrame, we ensure that
                # the columns of the result line up with the columns from the data.
                # axis=1 does not have this requirement because the index already will
                # line up with the index of the data based on how pandas creates a
                # DataFrame from a Series.
                result = pandas.DataFrame(series_result).T
                result.index = [MODIN_UNNAMED_SERIES_LABEL]
            else:
                result = pandas.DataFrame(series_result)
                if isinstance(series_result, pandas.Series):
                    result.columns = [MODIN_UNNAMED_SERIES_LABEL]
            return result

        return _tree_reduce_func

    def _compute_tree_reduce_metadata(self, axis, new_parts, dtypes=None):
        """
        Compute the metadata for the result of reduce function.

        Parameters
        ----------
        axis : int
            The axis on which reduce function was applied.
        new_parts : NumPy 2D array
            Partitions with the result of applied function.
        dtypes : str, optional
            The data types for the result. This is an optimization
            because there are functions that always result in a particular data
            type, and this allows us to avoid (re)computing it.

        Returns
        -------
        PandasDataframe
            Modin series (1xN frame) containing the reduced data.
        """
        new_axes, new_axes_lengths = [0, 0], [0, 0]

        new_axes[axis] = [MODIN_UNNAMED_SERIES_LABEL]
        new_axes[axis ^ 1] = self.get_axis(axis ^ 1)

        new_axes_lengths[axis] = [1]
        new_axes_lengths[axis ^ 1] = self._get_axis_lengths(axis ^ 1)

        if dtypes == "copy":
            dtypes = self.copy_dtypes_cache()
        elif dtypes is not None:
            dtypes = pandas.Series(
                [pandas.api.types.pandas_dtype(dtypes)] * len(new_axes[1]),
                index=new_axes[1],
            )

        result = self.__constructor__(
            new_parts,
            *new_axes,
            *new_axes_lengths,
            dtypes,
            pandas_backend=self._pandas_backend,
        )
        return result

    @lazy_metadata_decorator(apply_axis="both")
    def reduce(
        self,
        axis: Union[int, Axis],
        function: Callable,
        dtypes: Optional[str] = None,
    ) -> PandasDataframe:
        """
        Perform a user-defined aggregation on the specified axis, where the axis reduces down to a singleton. Requires knowledge of the full axis for the reduction.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to perform the reduce over.
        function : callable(row|col) -> single value
            The reduce function to apply to each column.
        dtypes : str, optional
            The data types for the result. This is an optimization
            because there are functions that always result in a particular data
            type, and this allows us to avoid (re)computing it.

        Returns
        -------
        PandasDataframe
            Modin series (1xN frame) containing the reduced data.

        Notes
        -----
        The user-defined function must reduce to a single value.
        """
        axis = Axis(axis)
        function = self._build_treereduce_func(axis.value, function)
        new_parts = self._partition_mgr_cls.map_axis_partitions(
            axis.value, self._partitions, function
        )
        return self._compute_tree_reduce_metadata(axis.value, new_parts, dtypes=dtypes)

    @lazy_metadata_decorator(apply_axis="opposite", axis_arg=0)
    def tree_reduce(
        self,
        axis: Union[int, Axis],
        map_func: Callable,
        reduce_func: Optional[Callable] = None,
        dtypes: Optional[str] = None,
    ) -> PandasDataframe:
        """
        Apply function that will reduce the data to a pandas Series.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to perform the tree reduce over.
        map_func : callable(row|col) -> row|col
            Callable function to map the dataframe.
        reduce_func : callable(row|col) -> single value, optional
            Callable function to reduce the dataframe.
            If none, then apply map_func twice.
        dtypes : str, optional
            The data types for the result. This is an optimization
            because there are functions that always result in a particular data
            type, and this allows us to avoid (re)computing it.

        Returns
        -------
        PandasDataframe
            A new dataframe.
        """
        axis = Axis(axis)
        map_func = self._build_treereduce_func(axis.value, map_func)
        if reduce_func is None:
            reduce_func = map_func
        else:
            reduce_func = self._build_treereduce_func(axis.value, reduce_func)

        map_parts = self._partition_mgr_cls.map_partitions(self._partitions, map_func)
        reduce_parts = self._partition_mgr_cls.map_axis_partitions(
            axis.value, map_parts, reduce_func
        )
        return self._compute_tree_reduce_metadata(
            axis.value, reduce_parts, dtypes=dtypes
        )

    @lazy_metadata_decorator(apply_axis=None)
    def map(
        self,
        func: Callable,
        dtypes: Optional[str] = None,
        new_columns: Optional[pandas.Index] = None,
        func_args=None,
        func_kwargs=None,
        lazy=False,
    ) -> PandasDataframe:
        """
        Perform a function that maps across the entire dataset.

        Parameters
        ----------
        func : callable(row|col|cell) -> row|col|cell
            The function to apply.
        dtypes : dtypes of the result, optional
            The data types for the result. This is an optimization
            because there are functions that always result in a particular data
            type, and this allows us to avoid (re)computing it.
        new_columns : pandas.Index, optional
            New column labels of the result, its length has to be identical
            to the older columns. If not specified, old column labels are preserved.
        func_args : iterable, optional
            Positional arguments for the 'func' callable.
        func_kwargs : dict, optional
            Keyword arguments for the 'func' callable.
        lazy : bool, default: False
            Whether to prefer lazy execution or not.

        Returns
        -------
        PandasDataframe
            A new dataframe.
        """
        map_fn = (
            self._partition_mgr_cls.lazy_map_partitions
            if lazy
            else self._partition_mgr_cls.map_partitions
        )
        new_partitions = map_fn(self._partitions, func, func_args, func_kwargs)

        if new_columns is not None and self.has_materialized_columns:
            assert len(new_columns) == len(
                self.columns
            ), "New column's length must be identical to the previous columns"
        elif new_columns is None:
            new_columns = self.copy_columns_cache(copy_lengths=True)
        if isinstance(dtypes, str) and dtypes == "copy":
            dtypes = self.copy_dtypes_cache()
        elif dtypes is not None and not isinstance(dtypes, pandas.Series):
            if isinstance(new_columns, ModinIndex):
                # Materializing lazy columns in order to build dtype's index
                new_columns = new_columns.get(return_lengths=False)
            dtypes = pandas.Series(
                [pandas.api.types.pandas_dtype(dtypes)] * len(new_columns),
                index=new_columns,
            )
        return self.__constructor__(
            new_partitions,
            self.copy_index_cache(copy_lengths=True),
            new_columns,
            self._row_lengths_cache,
            self._column_widths_cache,
            dtypes=dtypes,
            pandas_backend=self._pandas_backend,
        )

    def window(
        self,
        axis: Union[int, Axis],
        reduce_fn: Callable,
        window_size: int,
        result_schema: Optional[Dict[Hashable, type]] = None,
    ) -> PandasDataframe:
        """
        Apply a sliding window operator that acts as a GROUPBY on each window, and reduces down to a single row (column) per window.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to slide over.
        reduce_fn : callable(rowgroup|colgroup) -> row|col
            The reduce function to apply over the data.
        window_size : int
            The number of row/columns to pass to the function.
            (The size of the sliding window).
        result_schema : dict, optional
            Mapping from column labels to data types that represents the types of the output dataframe.

        Returns
        -------
        PandasDataframe
            A new PandasDataframe with the reduce function applied over windows of the specified
                axis.

        Notes
        -----
        The user-defined reduce function must reduce each window’s column
        (row if axis=1) down to a single value.
        """
        pass

    @lazy_metadata_decorator(apply_axis="both")
    def fold(self, axis, func, new_index=None, new_columns=None, shape_preserved=False):
        """
        Perform a function across an entire axis.

        Parameters
        ----------
        axis : int
            The axis to apply over.
        func : callable
            The function to apply.
        new_index : list-like, optional
            The index of the result.
        new_columns : list-like, optional
            The columns of the result.
        shape_preserved : bool, default: False
            Whether the shape of the dataframe is preserved or not
            after applying a function.

        Returns
        -------
        PandasDataframe
            A new dataframe.
        """
        new_row_lengths = None
        new_column_widths = None
        if shape_preserved:
            if new_index is None:
                new_index = self.copy_index_cache(copy_lengths=True)
            if new_columns is None:
                new_columns = self.copy_columns_cache(copy_lengths=True)
            new_row_lengths = self._row_lengths_cache
            new_column_widths = self._column_widths_cache

        new_partitions = self._partition_mgr_cls.map_axis_partitions(
            axis, self._partitions, func, keep_partitioning=True
        )
        return self.__constructor__(
            new_partitions,
            new_index,
            new_columns,
            row_lengths=new_row_lengths,
            column_widths=new_column_widths,
            pandas_backend=self._pandas_backend,
        )

    def infer_objects(self) -> PandasDataframe:
        """
        Attempt to infer better dtypes for object columns.

        Attempts soft conversion of object-dtyped columns, leaving non-object and unconvertible
        columns unchanged. The inference rules are the same as during normal Series/DataFrame
        construction.

        Returns
        -------
        PandasDataframe
            A new PandasDataframe with the inferred schema.
        """
        obj_cols = [
            col for col, dtype in enumerate(self.dtypes) if is_object_dtype(dtype)
        ]
        return self.infer_types(obj_cols)

    def infer_types(self, col_labels: List[str]) -> PandasDataframe:
        """
        Determine the compatible type shared by all values in the specified columns, and coerce them to that type.

        Parameters
        ----------
        col_labels : list
            List of column labels to infer and induce types over.

        Returns
        -------
        PandasDataframe
            A new PandasDataframe with the inferred schema.
        """
        # Compute dtypes on the specified columns, and then set those dtypes on a new frame
        new_cols = self.take_2d_labels_or_positional(col_labels=col_labels)
        new_cols_dtypes = new_cols.tree_reduce(0, pandas.DataFrame.infer_objects).dtypes
        new_dtypes = self.dtypes.copy()
        new_dtypes[col_labels] = new_cols_dtypes
        return self.__constructor__(
            self._partitions,
            self.copy_index_cache(copy_lengths=True),
            self.copy_columns_cache(copy_lengths=True),
            self._row_lengths_cache,
            self._column_widths_cache,
            new_dtypes,
            pandas_backend=self._pandas_backend,
        )

    def join(
        self,
        axis: Union[int, Axis],
        condition: Callable,
        other: ModinDataframe,
        join_type: Union[str, JoinType],
    ) -> PandasDataframe:
        """
        Join this dataframe with the other.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to perform the join on.
        condition : callable
            Function that determines which rows should be joined. The condition can be a
            simple equality, e.g. "left.col1 == right.col1" or can be arbitrarily complex.
        other : ModinDataframe
            The other data to join with, i.e. the right dataframe.
        join_type : string {"inner", "left", "right", "outer"} or modin.core.dataframe.base.utils.JoinType
            The type of join to perform.

        Returns
        -------
        PandasDataframe
            A new PandasDataframe that is the result of applying the specified join over the two
            dataframes.

        Notes
        -----
        During the join, this dataframe is considered the left, while the other is
        treated as the right.

        Only inner joins, left outer, right outer, and full outer joins are currently supported.
        Support for other join types (e.g. natural join) may be implemented in the future.
        """
        pass

    def rename(
        self,
        new_row_labels: Optional[Union[Dict[Hashable, Hashable], Callable]] = None,
        new_col_labels: Optional[Union[Dict[Hashable, Hashable], Callable]] = None,
    ) -> PandasDataframe:
        """
        Replace the row and column labels with the specified new labels.

        Parameters
        ----------
        new_row_labels : dictionary or callable, optional
            Mapping or callable that relates old row labels to new labels.
        new_col_labels : dictionary or callable, optional
            Mapping or callable that relates old col labels to new labels.

        Returns
        -------
        PandasDataframe
            A new PandasDataframe with the new row and column labels.
        """
        result = self.copy()
        if new_row_labels is not None:
            if callable(new_row_labels):
                new_row_labels = result.index.map(new_row_labels)
            result.index = new_row_labels
        if new_col_labels is not None:
            if callable(new_col_labels):
                new_col_labels = result.columns.map(new_col_labels)
            result.columns = new_col_labels
        return result

    def combine_and_apply(
        self, func, new_index=None, new_columns=None, new_dtypes=None
    ):
        """
        Combine all partitions into a single big one and apply the passed function to it.

        Use this method with care as it collects all the data on the same worker,
        it's only recommended to use this method on small or reduced datasets.

        Parameters
        ----------
        func : callable(pandas.DataFrame) -> pandas.DataFrame
            A function to apply to the combined partition.
        new_index : sequence, optional
            Index of the result.
        new_columns : sequence, optional
            Columns of the result.
        new_dtypes : dict-like, optional
            Dtypes of the result.

        Returns
        -------
        PandasDataframe
        """
        if self._partitions.shape[1] > 1:
            new_partitions = self._partition_mgr_cls.row_partitions(self._partitions)
            new_partitions = np.array([[partition] for partition in new_partitions])
            modin_frame = self.__constructor__(
                new_partitions,
                self.copy_index_cache(copy_lengths=True),
                self.copy_columns_cache(),
                self._row_lengths_cache,
                [len(self.columns)] if self.has_materialized_columns else None,
                self.copy_dtypes_cache(),
                pandas_backend=self._pandas_backend,
            )
        else:
            modin_frame = self
        return modin_frame.apply_full_axis(
            axis=0,
            func=func,
            new_index=new_index,
            new_columns=new_columns,
            dtypes=new_dtypes,
        )

    @lazy_metadata_decorator(apply_axis="both")
    def _apply_func_to_range_partitioning(
        self,
        key_columns,
        func,
        ascending=True,
        preserve_columns=False,
        data=None,
        data_key_columns=None,
        level=None,
        shuffle_func_cls=ShuffleSortFunctions,
        **kwargs,
    ):
        """
        Reshuffle data so it would be range partitioned and then apply the passed function row-wise.

        Parameters
        ----------
        key_columns : list of hashables
            Columns to build the range partitioning for. Can't be specified along with `level`.
        func : callable(pandas.DataFrame) -> pandas.DataFrame
            Function to apply against partitions.
        ascending : bool, default: True
            Whether the range should be built in ascending or descending order.
        preserve_columns : bool, default: False
            If the columns cache should be preserved (specify this flag if `func` doesn't change column labels).
        data : PandasDataframe, optional
            Dataframe to range-partition along with the `self` frame. If specified, the `func` will recieve
            a dataframe with an additional MultiIndex level in columns that separates `self` and `data`:
            ``df["grouper"] # self`` and ``df["data"] # data``.
        data_key_columns : list of hashables, optional
            Additional key columns from `data`. Will be combined with `key_columns`.
        level : list of ints or labels, optional
            Index level(s) to build the range partitioning for. Can't be specified along with `key_columns`.
        shuffle_func_cls : cls, default: ShuffleSortFunctions
            A class implementing ``modin.core.dataframe.pandas.utils.ShuffleFunctions`` to be used
            as a shuffle function.
        **kwargs : dict
            Additional arguments to forward to the range builder function.

        Returns
        -------
        PandasDataframe
            A new dataframe.
        """
        if data is not None:
            # adding an extra MultiIndex level in order to separate `self grouper` from the `data`
            # after concatenation
            new_grouper_cols = pandas.MultiIndex.from_tuples(
                [
                    ("grouper", *col) if isinstance(col, tuple) else ("grouper", col)
                    for col in self.columns
                ]
            )
            grouper = self.copy()
            grouper.columns = new_grouper_cols

            new_data_cols = pandas.MultiIndex.from_tuples(
                [
                    ("data", *col) if isinstance(col, tuple) else ("data", col)
                    for col in data.columns
                ]
            )
            data = data.copy()
            data.columns = new_data_cols

            grouper = grouper.concat(axis=1, others=[data], how="right", sort=False)

            # since original column names were modified, have to modify 'key_columns' as well
            key_columns = [
                ("grouper", *col) if isinstance(col, tuple) else ("grouper", col)
                for col in key_columns
            ]
            if data_key_columns is None:
                data_key_columns = []
            else:
                data_key_columns = [
                    ("data", *col) if isinstance(col, tuple) else ("data", col)
                    for col in data_key_columns
                ]
            key_columns += data_key_columns
        else:
            grouper = self

        # If there's only one row partition can simply apply the function row-wise without the need to reshuffle
        if grouper._partitions.shape[0] == 1:
            result = grouper.apply_full_axis(
                axis=1,
                func=func,
                new_columns=grouper.copy_columns_cache() if preserve_columns else None,
            )
            if preserve_columns:
                result._set_axis_lengths_cache(grouper._column_widths_cache, axis=1)
            return result

        # don't want to inherit over-partitioning so doing this 'min' check
        ideal_num_new_partitions = min(len(grouper._partitions), NPartitions.get())
        m = len(grouper) / ideal_num_new_partitions
        sampling_probability = (1 / m) * np.log(ideal_num_new_partitions * len(grouper))
        # If this df is overpartitioned, we try to sample each partition with probability
        # greater than 1, which leads to an error. In this case, we can do one of the following
        # two things. If there is only enough rows for one partition, and we have only 1 column
        # partition, we can just combine the overpartitioned df into one partition, and sort that
        # partition. If there is enough data for more than one partition, we can tell the sorting
        # algorithm how many partitions we want to end up with, so it samples and finds pivots
        # according to that.
        if sampling_probability >= 1:
            from modin.config import MinRowPartitionSize

            ideal_num_new_partitions = round(len(grouper) / MinRowPartitionSize.get())
            if len(grouper) < MinRowPartitionSize.get() or ideal_num_new_partitions < 2:
                # If the data is too small, we shouldn't try reshuffling/repartitioning but rather
                # simply combine all partitions and apply the sorting to the whole dataframe
                return grouper.combine_and_apply(func=func)

            if ideal_num_new_partitions < len(grouper._partitions):
                if len(grouper._partitions) % ideal_num_new_partitions == 0:
                    joining_partitions = np.split(
                        grouper._partitions, ideal_num_new_partitions
                    )
                else:
                    step = round(len(grouper._partitions) / ideal_num_new_partitions)
                    joining_partitions = np.split(
                        grouper._partitions,
                        range(step, len(grouper._partitions), step),
                    )

                new_partitions = np.array(
                    [
                        grouper._partition_mgr_cls.column_partitions(
                            ptn_grp, full_axis=False
                        )
                        for ptn_grp in joining_partitions
                    ]
                )
            else:
                new_partitions = grouper._partitions
        else:
            new_partitions = grouper._partitions

        shuffling_functions = shuffle_func_cls(
            grouper,
            key_columns,
            ascending[0] if is_list_like(ascending) else ascending,
            ideal_num_new_partitions,
            level=level,
            **kwargs,
        )

        if key_columns:
            # here we want to get indices of those partitions that hold the key columns
            key_indices = grouper.columns.get_indexer_for(key_columns)
            partition_indices = np.unique(
                np.digitize(key_indices, np.cumsum(grouper.column_widths))
            )
        elif level is not None:
            # each partition contains an index, so taking the first one
            partition_indices = [0]
        else:
            raise ValueError("Must specify either 'level' or 'key_columns'")

        new_partitions = grouper._partition_mgr_cls.shuffle_partitions(
            new_partitions,
            partition_indices,
            shuffling_functions,
            func,
        )

        result = grouper.__constructor__(new_partitions)
        if preserve_columns:
            result.set_columns_cache(grouper.copy_columns_cache())
            # We perform the final steps of the sort on full axis partitions, so we know that the
            # length of each partition is the full length of the dataframe.
            if grouper.has_materialized_columns:
                result._set_axis_lengths_cache([len(grouper.columns)], axis=1)
        return result

    @lazy_metadata_decorator(apply_axis="both")
    def sort_by(
        self,
        axis: Union[int, Axis],
        columns: Union[str, List[str]],
        ascending: bool = True,
        **kwargs,
    ) -> PandasDataframe:
        """
        Logically reorder rows (columns if axis=1) lexicographically by the data in a column or set of columns.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to perform the sort over.
        columns : string or list
            Column label(s) to use to determine lexicographical ordering.
        ascending : boolean, default: True
            Whether to sort in ascending or descending order.
        **kwargs : dict
            Keyword arguments to pass when sorting partitions.

        Returns
        -------
        PandasDataframe
            A new PandasDataframe sorted into lexicographical order by the specified column(s).
        """
        if not isinstance(columns, list):
            columns = [columns]

        def sort_function(df):  # pragma: no cover
            # When we do a sort on the result of Series.value_counts, we don't rename the index until
            # after everything is done, which causes an error when sorting the partitions, since the
            # index and the column share the same name, when in actuality, the index's name should be
            # None. This fixes the indexes name beforehand in that case, so that the sort works.
            index_renaming = None
            if any(name in df.columns for name in df.index.names):
                index_renaming = df.index.names
                df.index = df.index.set_names([None] * len(df.index.names))
            df = df.sort_values(by=columns, ascending=ascending, **kwargs)
            if index_renaming is not None:
                df.index = df.index.set_names(index_renaming)
            return df

        # If this df is empty, we don't want to try and shuffle or sort.
        if len(self.get_axis(1)) == 0 or len(self) == 0:
            return self.copy()

        axis = Axis(axis)
        if axis != Axis.ROW_WISE:
            raise NotImplementedError(
                f"Algebra sort only implemented row-wise. {axis.name} sort not implemented yet!"
            )

        result = self._apply_func_to_range_partitioning(
            key_columns=[columns[0]],
            func=sort_function,
            ascending=ascending,
            preserve_columns=True,
            **kwargs,
        )
        result.set_dtypes_cache(self.copy_dtypes_cache())

        if kwargs.get("ignore_index", False):
            result.index = RangeIndex(len(self.get_axis(axis.value)))

        # Since the strategy to pick our pivots involves random sampling
        # we could end up picking poor pivots, leading to skew in our partitions.
        # We should add a fix to check if there is skew in the partitions and rebalance
        # them if necessary. Calling `rebalance_partitions` won't do this, since it only
        # resolves the case where there isn't the right amount of partitions - not where
        # there is skew across the lengths of partitions.
        return result

    @lazy_metadata_decorator(apply_axis="both")
    def filter(self, axis: Union[Axis, int], condition: Callable) -> PandasDataframe:
        """
        Filter data based on the function provided along an entire axis.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to filter over.
        condition : callable(row|col) -> bool
            The function to use for the filter. This function should filter the
            data itself.

        Returns
        -------
        PandasDataframe
            A new filtered dataframe.
        """
        axis = Axis(axis)
        assert axis in (
            Axis.ROW_WISE,
            Axis.COL_WISE,
        ), "Axis argument to filter operator must be 0 (rows) or 1 (columns)"

        new_partitions = self._partition_mgr_cls.map_axis_partitions(
            axis.value, self._partitions, condition, keep_partitioning=True
        )

        new_axes, new_lengths = [0, 0], [0, 0]

        new_axes[axis.value] = self.copy_axis_cache(axis.value, copy_lengths=True)
        new_lengths[axis.value] = (
            self._row_lengths_cache if axis.value == 0 else self._column_widths_cache
        )
        new_axes[axis.value ^ 1], new_lengths[axis.value ^ 1] = None, None

        return self.__constructor__(
            new_partitions,
            *new_axes,
            *new_lengths,
            self.copy_dtypes_cache() if axis == Axis.COL_WISE else None,
            pandas_backend=self._pandas_backend,
        )

    def filter_by_types(self, types: List[Hashable]) -> PandasDataframe:
        """
        Allow the user to specify a type or set of types by which to filter the columns.

        Parameters
        ----------
        types : list
            The types to filter columns by.

        Returns
        -------
        PandasDataframe
             A new PandasDataframe from the filter provided.
        """
        return self.take_2d_labels_or_positional(
            col_positions=[i for i, dtype in enumerate(self.dtypes) if dtype in types]
        )

    @lazy_metadata_decorator(apply_axis="both")
    def explode(self, axis: Union[int, Axis], func: Callable) -> PandasDataframe:
        """
        Explode list-like entries along an entire axis.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis specifying how to explode. If axis=1, explode according
            to columns.
        func : callable
            The function to use to explode a single element.

        Returns
        -------
        PandasFrame
            A new filtered dataframe.
        """
        axis = Axis(axis)
        partitions = self._partition_mgr_cls.map_axis_partitions(
            axis.value, self._partitions, func, keep_partitioning=True
        )
        if axis == Axis.COL_WISE:
            new_index, row_lengths = self._compute_axis_labels_and_lengths(
                0, partitions
            )
            new_columns, column_widths = self.columns, self._column_widths_cache
        else:
            new_index, row_lengths = self.index, self._row_lengths_cache
            new_columns, column_widths = self._compute_axis_labels_and_lengths(
                1, partitions
            )
        return self.__constructor__(
            partitions,
            new_index,
            new_columns,
            row_lengths,
            column_widths,
            pandas_backend=self._pandas_backend,
        )

    def combine(self) -> PandasDataframe:
        """
        Create a single partition PandasDataframe from the partitions of the current dataframe.

        Returns
        -------
        PandasDataframe
            A single partition PandasDataframe.
        """
        new_index = None
        new_columns = None
        if self._deferred_index:
            new_index = self.index
        if self._deferred_column:
            new_columns = self.columns
        partitions = self._partition_mgr_cls.combine(
            self._partitions, new_index, new_columns
        )
        result = self.__constructor__(
            partitions,
            index=self.copy_index_cache(),
            columns=self.copy_columns_cache(),
            row_lengths=(
                [sum(self._row_lengths_cache)]
                if self._row_lengths_cache is not None
                else None
            ),
            column_widths=(
                [sum(self._column_widths_cache)]
                if self._column_widths_cache is not None
                else None
            ),
            dtypes=self.copy_dtypes_cache(),
            pandas_backend=self._pandas_backend,
        )
        return result

    @lazy_metadata_decorator(apply_axis="both")
    def apply_full_axis(
        self,
        axis,
        func,
        new_index=None,
        new_columns=None,
        apply_indices=None,
        enumerate_partitions: bool = False,
        dtypes=None,
        keep_partitioning=True,
        num_splits=None,
        sync_labels=True,
        pass_axis_lengths_to_partitions=False,
    ) -> PandasDataframe:
        """
        Perform a function across an entire axis.

        Parameters
        ----------
        axis : {0, 1}
            The axis to apply over (0 - rows, 1 - columns).
        func : callable
            The function to apply.
        new_index : list-like, optional
            The index of the result. We may know this in advance,
            and if not provided it must be computed.
        new_columns : list-like, optional
            The columns of the result. We may know this in
            advance, and if not provided it must be computed.
        apply_indices : list-like, optional
            Indices of `axis ^ 1` to apply function over.
        enumerate_partitions : bool, default: False
            Whether pass partition index into applied `func` or not.
            Note that `func` must be able to obtain `partition_idx` kwarg.
        dtypes : list-like or scalar, optional
            The data types of the result. This is an optimization
            because there are functions that always result in a particular data
            type, and allows us to avoid (re)computing it.
        keep_partitioning : boolean, default: True
            The flag to keep partition boundaries for Modin Frame if possible.
            Setting it to True disables shuffling data from one partition to another in case the resulting
            number of splits is equal to the initial number of splits.
        num_splits : int, optional
            The number of partitions to split the result into across the `axis`. If None, then the number
            of splits will be infered automatically. If `num_splits` is None and `keep_partitioning=True`
            then the number of splits is preserved.
        sync_labels : boolean, default: True
            Synchronize external indexes (`new_index`, `new_columns`) with internal indexes.
            This could be used when you're certain that the indices in partitions are equal to
            the provided hints in order to save time on syncing them.
        pass_axis_lengths_to_partitions : bool, default: False
            Whether pass partition lengths along `axis ^ 1` to the kernel `func`.
            Note that `func` must be able to obtain `df, *axis_lengths`.

        Returns
        -------
        PandasDataframe
            A new dataframe.

        Notes
        -----
        The data shape may change as a result of the function.
        """
        return self.broadcast_apply_full_axis(
            axis=axis,
            func=func,
            new_index=new_index,
            new_columns=new_columns,
            apply_indices=apply_indices,
            enumerate_partitions=enumerate_partitions,
            dtypes=dtypes,
            other=None,
            keep_partitioning=keep_partitioning,
            num_splits=num_splits,
            sync_labels=sync_labels,
            pass_axis_lengths_to_partitions=pass_axis_lengths_to_partitions,
        )

    @lazy_metadata_decorator(apply_axis="both")
    def apply_full_axis_select_indices(
        self,
        axis,
        func,
        apply_indices=None,
        numeric_indices=None,
        new_index=None,
        new_columns=None,
        keep_remaining=False,
        new_dtypes: Optional[Union[pandas.Series, ModinDtypes]] = None,
    ):
        """
        Apply a function across an entire axis for a subset of the data.

        Parameters
        ----------
        axis : int
            The axis to apply over.
        func : callable
            The function to apply.
        apply_indices : list-like, optional
            The labels to apply over.
        numeric_indices : list-like, optional
            The indices to apply over.
        new_index : list-like, optional
            The index of the result. We may know this in advance,
            and if not provided it must be computed.
        new_columns : list-like, optional
            The columns of the result. We may know this in
            advance, and if not provided it must be computed.
        keep_remaining : boolean, default: False
            Whether or not to drop the data that is not computed over.
        new_dtypes : ModinDtypes or pandas.Series, optional
            The data types of the result. This is an optimization
            because there are functions that always result in a particular data
            type, and allows us to avoid (re)computing it.

        Returns
        -------
        PandasDataframe
            A new dataframe.
        """
        assert apply_indices is not None or numeric_indices is not None
        # Convert indices to numeric indices
        old_index = self.index if axis else self.columns
        if apply_indices is not None:
            numeric_indices = old_index.get_indexer_for(apply_indices)
        # Get the indices for the axis being applied to (it is the opposite of axis
        # being applied over)
        dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)
        new_partitions = (
            self._partition_mgr_cls.apply_func_to_select_indices_along_full_axis(
                axis,
                self._partitions,
                func,
                dict_indices,
                keep_remaining=keep_remaining,
            )
        )
        # TODO Infer columns and index from `keep_remaining` and `apply_indices`
        if new_index is None:
            new_index = self.index if axis == 1 else None
        if new_columns is None:
            new_columns = self.columns if axis == 0 else None
        return self.__constructor__(
            new_partitions,
            new_index,
            new_columns,
            None,
            None,
            dtypes=new_dtypes,
            pandas_backend=self._pandas_backend,
        )

    @lazy_metadata_decorator(apply_axis="both")
    def apply_select_indices(
        self,
        axis,
        func,
        apply_indices=None,
        row_labels=None,
        col_labels=None,
        new_index=None,
        new_columns=None,
        new_dtypes: Optional[pandas.Series] = None,
        keep_remaining=False,
        item_to_distribute=no_default,
    ) -> PandasDataframe:
        """
        Apply a function for a subset of the data.

        Parameters
        ----------
        axis : {0, 1}
            The axis to apply over.
        func : callable
            The function to apply.
        apply_indices : list-like, optional
            The labels to apply over. Must be given if axis is provided.
        row_labels : list-like, optional
            The row labels to apply over. Must be provided with
            `col_labels` to apply over both axes.
        col_labels : list-like, optional
            The column labels to apply over. Must be provided
            with `row_labels` to apply over both axes.
        new_index : list-like, optional
            The index of the result, if known in advance.
        new_columns : list-like, optional
            The columns of the result, if known in advance.
        new_dtypes : pandas.Series, optional
            The dtypes of the result, if known in advance.
        keep_remaining : boolean, default: False
            Whether or not to drop the data that is not computed over.
        item_to_distribute : np.ndarray or scalar, default: no_default
            The item to split up so it can be applied over both axes.

        Returns
        -------
        PandasDataframe
            A new dataframe.
        """
        # TODO Infer columns and index from `keep_remaining` and `apply_indices`
        if new_index is None:
            new_index = self.index if axis == 1 else None
        if new_columns is None:
            new_columns = self.columns if axis == 0 else None
        if new_columns is not None and isinstance(new_dtypes, pandas.Series):
            assert new_dtypes.index.equals(
                new_columns
            ), f"{new_dtypes=} doesn't have the same columns as in {new_columns=}"

        if axis is not None:
            assert apply_indices is not None
            # Convert indices to numeric indices
            old_index = self.index if axis else self.columns
            numeric_indices = old_index.get_indexer_for(apply_indices)
            # Get indices being applied to (opposite of indices being applied over)
            dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)
            new_partitions = self._partition_mgr_cls.apply_func_to_select_indices(
                axis,
                self._partitions,
                func,
                dict_indices,
                keep_remaining=keep_remaining,
            )
            # Length objects for new object creation. This is shorter than if..else
            # This object determines the lengths and widths based on the given
            # parameters and builds a dictionary used in the constructor below. 0 gives
            # the row lengths and 1 gives the column widths. Since the dimension of
            # `axis` given may have changed, we currently just recompute it.
            # TODO Determine lengths from current lengths if `keep_remaining=False`
            lengths_objs = {
                axis: (
                    [len(apply_indices)]
                    if not keep_remaining
                    else [self.row_lengths, self.column_widths][axis]
                ),
                axis ^ 1: [self.row_lengths, self.column_widths][axis ^ 1],
            }
            return self.__constructor__(
                new_partitions,
                new_index,
                new_columns,
                lengths_objs[0],
                lengths_objs[1],
                new_dtypes,
                pandas_backend=self._pandas_backend,
            )
        else:
            # We are applying over both axes here, so make sure we have all the right
            # variables set.
            assert row_labels is not None and col_labels is not None
            assert keep_remaining
            assert item_to_distribute is not no_default
            row_partitions_list = self._get_dict_of_block_index(0, row_labels).items()
            col_partitions_list = self._get_dict_of_block_index(1, col_labels).items()
            new_partitions = self._partition_mgr_cls.apply_func_to_indices_both_axis(
                self._partitions,
                func,
                row_partitions_list,
                col_partitions_list,
                item_to_distribute,
                # Passing caches instead of values in order to not trigger shapes recomputation
                # if they are not used inside this function.
                self._row_lengths_cache,
                self._column_widths_cache,
            )
            return self.__constructor__(
                new_partitions,
                new_index,
                new_columns,
                self._row_lengths_cache,
                self._column_widths_cache,
                new_dtypes,
                pandas_backend=self._pandas_backend,
            )

    @lazy_metadata_decorator(apply_axis="both")
    def broadcast_apply(
        self,
        axis,
        func,
        other,
        join_type="left",
        copartition=True,
        labels="keep",
        dtypes=None,
    ):
        """
        Broadcast axis partitions of `other` to partitions of `self` and apply a function.

        Parameters
        ----------
        axis : {0, 1}
            Axis to broadcast over.
        func : callable
            Function to apply.
        other : PandasDataframe
            Modin DataFrame to broadcast.
        join_type : str, default: "left"
            Type of join to apply.
        copartition : bool, default: True
            Whether to align indices/partitioning of the `self` and `other` frame.
            Disabling this may save some time, however, you have to be 100% sure that
            the indexing and partitioning are identical along the broadcasting axis,
            this might be the case for example if `other` is a projection of the `self`
            or vice-versa. If copartitioning is disabled and partitioning/indexing are
            incompatible then you may end up with undefined behavior.
        labels : {"keep", "replace", "drop"}, default: "keep"
            Whether keep labels from `self` Modin DataFrame, replace them with labels
            from joined DataFrame or drop altogether to make them be computed lazily later.
        dtypes : "copy", pandas.Series or None, optional
            Dtypes of the result. "copy" to keep old dtypes and None to compute them on demand.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        if copartition:
            # Only sort the indices if they do not match
            (
                left_parts,
                right_parts,
                joined_index,
                partition_sizes_along_axis,
            ) = self._copartition(
                axis,
                other,
                join_type,
            )
            # unwrap list returned by `copartition`.
            right_parts = right_parts[0]
        else:
            left_parts = self._partitions
            right_parts = other._partitions
            partition_sizes_along_axis, joined_index = self._get_axis_lengths_cache(
                axis
            ), self.copy_axis_cache(axis)

        new_frame = self._partition_mgr_cls.broadcast_apply(
            axis, func, left_parts, right_parts
        )
        if isinstance(dtypes, str) and dtypes == "copy":
            dtypes = self.copy_dtypes_cache()

        def _pick_axis(get_axis, sizes_cache):
            if labels == "keep":
                return get_axis(), sizes_cache
            if labels == "replace":
                return joined_index, partition_sizes_along_axis
            assert labels == "drop", f"Unexpected `labels`: {labels}"
            return None, None

        if axis == 0:
            # Pass shape caches instead of values in order to not trigger shape computation.
            new_index, new_row_lengths = _pick_axis(
                self.copy_index_cache, self._row_lengths_cache
            )
            new_columns, new_column_widths = (
                self.copy_columns_cache(),
                self._column_widths_cache,
            )
        else:
            new_index, new_row_lengths = (
                self.copy_index_cache(),
                self._row_lengths_cache,
            )
            new_columns, new_column_widths = _pick_axis(
                self.copy_columns_cache, self._column_widths_cache
            )

        return self.__constructor__(
            new_frame,
            new_index,
            new_columns,
            new_row_lengths,
            new_column_widths,
            dtypes=dtypes,
            pandas_backend=self._pandas_backend,
        )

    def _prepare_frame_to_broadcast(self, axis, indices, broadcast_all):
        """
        Compute the indices to broadcast `self` considering `indices`.

        Parameters
        ----------
        axis : {0, 1}
            Axis to broadcast along.
        indices : dict
            Dict of indices and internal indices of partitions where `self` must
            be broadcasted.
        broadcast_all : bool
            Whether broadcast the whole axis of `self` frame or just a subset of it.

        Returns
        -------
        dict
            Dictionary with indices of partitions to broadcast.

        Notes
        -----
        New dictionary of indices of `self` partitions represents that
        you want to broadcast `self` at specified another partition named `other`. For example,
        Dictionary {key: {key1: [0, 1], key2: [5]}} means, that in `other`[key] you want to
        broadcast [self[key1], self[key2]] partitions and internal indices for `self` must be [[0, 1], [5]]
        """
        if broadcast_all:
            sizes = self.row_lengths if axis else self.column_widths
            return {key: dict(enumerate(sizes)) for key in indices.keys()}
        passed_len = 0
        result_dict = {}
        for part_num, internal in indices.items():
            result_dict[part_num] = self._get_dict_of_block_index(
                axis ^ 1, np.arange(passed_len, passed_len + len(internal))
            )
            passed_len += len(internal)
        return result_dict

    def _extract_partitions(self):
        """
        Extract partitions if partitions are present.

        If partitions are empty return a dummy partition with empty data but
        index and columns of current dataframe.

        Returns
        -------
        np.ndarray
            NumPy array with extracted partitions.
        """
        if self._partitions.size > 0:
            return self._partitions
        else:
            dtypes = None
            if self.has_materialized_dtypes:
                dtypes = self.dtypes
            return self._partition_mgr_cls.create_partition_from_metadata(
                index=self.index, columns=self.columns, dtypes=dtypes
            )

    @lazy_metadata_decorator(apply_axis="both")
    def broadcast_apply_select_indices(
        self,
        axis,
        func,
        other: PandasDataframe,
        apply_indices=None,
        numeric_indices=None,
        keep_remaining=False,
        broadcast_all=True,
        new_index=None,
        new_columns=None,
    ) -> PandasDataframe:
        """
        Apply a function to select indices at specified axis and broadcast partitions of `other` Modin DataFrame.

        Parameters
        ----------
        axis : {0, 1}
            Axis to apply function along.
        func : callable
            Function to apply.
        other : PandasDataframe
            Partitions of which should be broadcasted.
        apply_indices : list, optional
            List of labels to apply (if `numeric_indices` are not specified).
        numeric_indices : list, optional
            Numeric indices to apply (if `apply_indices` are not specified).
        keep_remaining : bool, default: False
            Whether drop the data that is not computed over or not.
        broadcast_all : bool, default: True
            Whether broadcast the whole axis of right frame to every
            partition or just a subset of it.
        new_index : pandas.Index, optional
            Index of the result. We may know this in advance,
            and if not provided it must be computed.
        new_columns : pandas.Index, optional
            Columns of the result. We may know this in advance,
            and if not provided it must be computed.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        assert (
            apply_indices is not None or numeric_indices is not None
        ), "Indices to apply must be specified!"

        if other is None:
            if apply_indices is None:
                apply_indices = self.get_axis(axis)[numeric_indices]
            return self.apply_select_indices(
                axis=axis,
                func=func,
                apply_indices=apply_indices,
                keep_remaining=keep_remaining,
                new_index=new_index,
                new_columns=new_columns,
            )

        if numeric_indices is None:
            old_index = self.index if axis else self.columns
            numeric_indices = old_index.get_indexer_for(apply_indices)

        dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)
        broadcasted_dict = other._prepare_frame_to_broadcast(
            axis, dict_indices, broadcast_all=broadcast_all
        )
        new_partitions = self._partition_mgr_cls.broadcast_apply_select_indices(
            axis,
            func,
            self._partitions,
            other._partitions,
            dict_indices,
            broadcasted_dict,
            keep_remaining,
        )
        return self.__constructor__(
            new_partitions,
            index=new_index,
            columns=new_columns,
            pandas_backend=self._pandas_backend,
        )

    @lazy_metadata_decorator(apply_axis="both")
    def broadcast_apply_full_axis(
        self,
        axis,
        func,
        other,
        new_index=None,
        new_columns=None,
        apply_indices=None,
        enumerate_partitions=False,
        dtypes=None,
        keep_partitioning=True,
        num_splits=None,
        sync_labels=True,
        pass_axis_lengths_to_partitions=False,
    ):
        """
        Broadcast partitions of `other` Modin DataFrame and apply a function along full axis.

        Parameters
        ----------
        axis : {0, 1}
            Axis to apply over (0 - rows, 1 - columns).
        func : callable
            Function to apply.
        other : PandasDataframe or list
            Modin DataFrame(s) to broadcast.
        new_index : list-like, optional
            Index of the result. We may know this in advance,
            and if not provided it must be computed.
        new_columns : list-like, optional
            Columns of the result. We may know this in
            advance, and if not provided it must be computed.
        apply_indices : list-like, optional
            Indices of `axis ^ 1` to apply function over.
        enumerate_partitions : bool, default: False
            Whether pass partition index into applied `func` or not.
            Note that `func` must be able to obtain `partition_idx` kwarg.
        dtypes : list-like or scalar, optional
            Data types of the result. This is an optimization
            because there are functions that always result in a particular data
            type, and allows us to avoid (re)computing it.
        keep_partitioning : boolean, default: True
            The flag to keep partition boundaries for Modin Frame if possible.
            Setting it to True disables shuffling data from one partition to another in case the resulting
            number of splits is equal to the initial number of splits.
        num_splits : int, optional
            The number of partitions to split the result into across the `axis`. If None, then the number
            of splits will be infered automatically. If `num_splits` is None and `keep_partitioning=True`
            then the number of splits is preserved.
        sync_labels : boolean, default: True
            Synchronize external indexes (`new_index`, `new_columns`) with internal indexes.
            This could be used when you're certain that the indices in partitions are equal to
            the provided hints in order to save time on syncing them.
        pass_axis_lengths_to_partitions : bool, default: False
            Whether pass partition lengths along `axis ^ 1` to the kernel `func`.
            Note that `func` must be able to obtain `df, *axis_lengths`.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        if other is not None:
            if not isinstance(other, list):
                other = [other]
            other = [o._extract_partitions() for o in other] if len(other) else None

        if apply_indices is not None:
            numeric_indices = self.get_axis(axis ^ 1).get_indexer_for(apply_indices)
            apply_indices = self._get_dict_of_block_index(
                axis ^ 1, numeric_indices
            ).keys()

        apply_func_args = None
        if pass_axis_lengths_to_partitions:
            if axis == 0:
                apply_func_args = (
                    self._column_widths_cache
                    if self._column_widths_cache is not None
                    else [part.width(materialize=False) for part in self._partitions[0]]
                )
            else:
                apply_func_args = (
                    self._row_lengths_cache
                    if self._row_lengths_cache is not None
                    else [
                        part.length(materialize=False) for part in self._partitions.T[0]
                    ]
                )

        new_partitions = self._partition_mgr_cls.broadcast_axis_partitions(
            axis=axis,
            left=self._partitions,
            right=other,
            apply_func=self._build_treereduce_func(axis, func),
            apply_indices=apply_indices,
            enumerate_partitions=enumerate_partitions,
            keep_partitioning=keep_partitioning,
            num_splits=num_splits,
            apply_func_args=apply_func_args,
        )
        kw = {"row_lengths": None, "column_widths": None}
        if isinstance(dtypes, str) and dtypes == "copy":
            kw["dtypes"] = self.copy_dtypes_cache()
        elif isinstance(dtypes, DtypesDescriptor):
            kw["dtypes"] = ModinDtypes(dtypes)
        elif dtypes is not None:
            if isinstance(dtypes, (pandas.Series, ModinDtypes)):
                kw["dtypes"] = dtypes.copy()
            else:
                if new_columns is None:
                    assert not is_list_like(dtypes)
                    dtype = pandas.api.types.pandas_dtype(dtypes)
                    kw["dtypes"] = ModinDtypes(DtypesDescriptor(remaining_dtype=dtype))
                else:
                    kw["dtypes"] = (
                        pandas.Series(dtypes, index=new_columns)
                        if is_list_like(dtypes)
                        else pandas.Series(
                            [pandas.api.types.pandas_dtype(dtypes)] * len(new_columns),
                            index=new_columns,
                        )
                    )
        is_index_materialized = ModinIndex.is_materialized_index(new_index)
        is_columns_materialized = ModinIndex.is_materialized_index(new_columns)
        if axis == 0:
            if (
                is_columns_materialized
                and len(new_partitions.shape) > 1
                and new_partitions.shape[1] == 1
            ):
                kw["column_widths"] = [len(new_columns)]
        elif axis == 1:
            if is_index_materialized and new_partitions.shape[0] == 1:
                kw["row_lengths"] = [len(new_index)]
        if not keep_partitioning:
            if kw["row_lengths"] is None and is_index_materialized:
                if axis == 0:
                    kw["row_lengths"] = get_length_list(
                        axis_len=len(new_index),
                        num_splits=new_partitions.shape[0],
                        min_block_size=MinRowPartitionSize.get(),
                    )
                elif axis == 1:
                    if self._row_lengths_cache is not None and len(new_index) == sum(
                        self._row_lengths_cache
                    ):
                        kw["row_lengths"] = self._row_lengths_cache
            if kw["column_widths"] is None and is_columns_materialized:
                if axis == 1:
                    kw["column_widths"] = get_length_list(
                        axis_len=len(new_columns),
                        num_splits=new_partitions.shape[1],
                        min_block_size=MinColumnPartitionSize.get(),
                    )
                elif axis == 0:
                    if self._column_widths_cache is not None and len(
                        new_columns
                    ) == sum(self._column_widths_cache):
                        kw["column_widths"] = self._column_widths_cache
        else:
            if axis == 0:
                if (
                    kw["row_lengths"] is None
                    and self._row_lengths_cache is not None
                    and is_index_materialized
                    and len(new_index) == sum(self._row_lengths_cache)
                    # to avoid problems that may arise when filtering empty dataframes
                    and all(r != 0 for r in self._row_lengths_cache)
                ):
                    kw["row_lengths"] = self._row_lengths_cache
            elif axis == 1:
                if (
                    kw["column_widths"] is None
                    and self._column_widths_cache is not None
                    and is_columns_materialized
                    and len(new_columns) == sum(self._column_widths_cache)
                    # to avoid problems that may arise when filtering empty dataframes
                    and all(w != 0 for w in self._column_widths_cache)
                ):
                    kw["column_widths"] = self._column_widths_cache

        result = self.__constructor__(
            new_partitions,
            index=new_index,
            columns=new_columns,
            **kw,
            pandas_backend=self._pandas_backend,
        )
        if sync_labels and new_index is not None:
            result.synchronize_labels(axis=0)
        if sync_labels and new_columns is not None:
            result.synchronize_labels(axis=1)
        return result

    def _check_if_axes_identical(self, other: PandasDataframe, axis: int = 0) -> bool:
        """
        Check whether indices/partitioning along the specified `axis` are identical when compared with `other`.

        Parameters
        ----------
        other : PandasDataframe
            Dataframe to compare indices/partitioning with.
        axis : int, default: 0

        Returns
        -------
        bool
        """
        if self.has_axis_cache(axis) and other.has_axis_cache(axis):
            self_cache, other_cache = self._get_axis_cache(axis), other._get_axis_cache(
                axis
            )
            equal_indices = self_cache.equals(other_cache)
            if equal_indices:
                equal_lengths = self_cache.compare_partition_lengths_if_possible(
                    other_cache
                )
                if isinstance(equal_lengths, bool):
                    return equal_lengths
                return self._get_axis_lengths(axis) == other._get_axis_lengths(axis)
            return False
        return self.get_axis(axis).equals(
            other.get_axis(axis)
        ) and self._get_axis_lengths(axis) == other._get_axis_lengths(axis)

    def _copartition(
        self, axis, other, how, sort=None, force_repartition=False, fill_value=None
    ):
        """
        Copartition two Modin DataFrames.

        Perform aligning of partitions, index and partition blocks.

        Parameters
        ----------
        axis : {0, 1}
            Axis to copartition along (0 - rows, 1 - columns).
        other : PandasDataframe
            Other Modin DataFrame(s) to copartition against.
        how : str
            How to manage joining the index object ("left", "right", etc.).
        sort : bool, default: None
            Whether sort the joined index or not.
            If ``None``, sort is defined in depend on labels equality along the axis.
        force_repartition : bool, default: False
            Whether force the repartitioning or not. By default,
            this method will skip repartitioning if it is possible. This is because
            reindexing is extremely inefficient. Because this method is used to
            `join` or `append`, it is vital that the internal indices match.
        fill_value : any, optional
            Value to use for missing values.

        Returns
        -------
        tuple
            Tuple containing:
                1) 2-d NumPy array of aligned left partitions
                2) list of 2-d NumPy arrays of aligned right partitions
                3) joined index along ``axis``, may be ``ModinIndex`` if not materialized
                4) If materialized, list with sizes of partitions along axis that partitioning
                   was done on, otherwise ``None``. This list will be empty if and only if all
                   the frames are empty.
        """
        if isinstance(other, type(self)):
            other = [other]

        if not force_repartition and all(
            o._check_if_axes_identical(self, axis) for o in other
        ):
            return (
                self._partitions,
                [o._partitions for o in other],
                self.copy_axis_cache(axis, copy_lengths=True),
                self._get_axis_lengths_cache(axis),
            )

        if sort is None:
            sort = not all(self.get_axis(axis).equals(o.get_axis(axis)) for o in other)

        self_index = self.get_axis(axis)
        others_index = [o.get_axis(axis) for o in other]
        joined_index, make_reindexer = self._join_index_objects(
            axis, [self_index] + others_index, how, sort, fill_value
        )

        frames = [self] + other
        non_empty_frames_idx = [
            i for i, o in enumerate(frames) if o._partitions.size != 0
        ]

        # If all frames are empty
        if len(non_empty_frames_idx) == 0:
            return (
                self._partitions,
                [o._partitions for o in other],
                joined_index,
                # There are no partition sizes because the resulting dataframe
                # has no partitions.
                [],
            )

        base_frame_idx = non_empty_frames_idx[0]
        other_frames = frames[base_frame_idx + 1 :]

        # Picking first non-empty frame
        base_frame = frames[non_empty_frames_idx[0]]
        base_index = base_frame.get_axis(axis)

        # define conditions for reindexing and repartitioning `self` frame
        do_reindex_base = not base_index.equals(joined_index)
        do_repartition_base = force_repartition or do_reindex_base

        # Perform repartitioning and reindexing for `base_frame` if needed.
        # Also define length of base and frames. We will need to know the
        # lengths for alignment.
        if do_repartition_base:
            reindexed_base = base_frame._partition_mgr_cls.map_axis_partitions(
                axis,
                base_frame._partitions,
                make_reindexer(do_reindex_base, base_frame_idx),
            )
            if axis:
                base_lengths = [obj.width() for obj in reindexed_base[0]]
            else:
                base_lengths = [obj.length() for obj in reindexed_base.T[0]]
        else:
            reindexed_base = base_frame._partitions
            base_lengths = base_frame.column_widths if axis else base_frame.row_lengths

        others_lengths = [o._get_axis_lengths(axis) for o in other_frames]

        # define conditions for reindexing and repartitioning `other` frames
        do_reindex_others = [
            not o.get_axis(axis).equals(joined_index) for o in other_frames
        ]

        do_repartition_others = [None] * len(other_frames)
        for i in range(len(other_frames)):
            do_repartition_others[i] = (
                force_repartition
                or do_reindex_others[i]
                or others_lengths[i] != base_lengths
            )

        # perform repartitioning and reindexing for `other_frames` if needed
        reindexed_other_list = [None] * len(other_frames)
        for i in range(len(other_frames)):
            if do_repartition_others[i]:
                # indices of others frame start from `base_frame_idx` + 1
                reindexed_other_list[i] = other_frames[
                    i
                ]._partition_mgr_cls.map_axis_partitions(
                    axis,
                    other_frames[i]._partitions,
                    make_reindexer(do_repartition_others[i], base_frame_idx + 1 + i),
                    lengths=base_lengths,
                )
            else:
                reindexed_other_list[i] = other_frames[i]._partitions
        reindexed_frames = (
            [frames[i]._partitions for i in range(base_frame_idx)]
            + [reindexed_base]
            + reindexed_other_list
        )
        return (reindexed_frames[0], reindexed_frames[1:], joined_index, base_lengths)

    @lazy_metadata_decorator(apply_axis="both")
    def n_ary_op(
        self,
        op,
        right_frames: list[PandasDataframe],
        join_type="outer",
        sort=None,
        copartition_along_columns=True,
        labels="replace",
        dtypes: Optional[pandas.Series] = None,
    ) -> PandasDataframe:
        """
        Perform an n-opary operation by joining with other Modin DataFrame(s).

        Parameters
        ----------
        op : callable
            Function to apply after the join.
        right_frames : list of PandasDataframe
            Modin DataFrames to join with.
        join_type : str, default: "outer"
            Type of join to apply.
        sort : bool, default: None
            Whether to sort index and columns or not.
        copartition_along_columns : bool, default: True
            Whether to perform copartitioning along columns or not.
            For some ops this isn't needed (e.g., `fillna`).
        labels : {"replace", "drop"}, default: "replace"
            Whether use labels from joined DataFrame or drop altogether to make
            them be computed lazily later.
        dtypes : pandas.Series, optional
            Dtypes of the resultant dataframe, this argument will be
            received if the resultant dtypes of n-opary operation is precomputed.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        left_parts, list_of_right_parts, joined_index, row_lengths = self._copartition(
            0,
            right_frames,
            join_type,
            sort=sort,
        )
        if copartition_along_columns:
            new_left_frame = self.__constructor__(
                left_parts,
                joined_index,
                self.copy_columns_cache(copy_lengths=True),
                row_lengths,
                self._column_widths_cache,
                pandas_backend=self._pandas_backend,
            )
            new_right_frames = [
                self.__constructor__(
                    right_parts,
                    joined_index,
                    right_frame.copy_columns_cache(copy_lengths=True),
                    row_lengths,
                    right_frame._column_widths_cache,
                    pandas_backend=self._pandas_backend,
                )
                for right_parts, right_frame in zip(list_of_right_parts, right_frames)
            ]

            (
                left_parts,
                list_of_right_parts,
                joined_columns,
                column_widths,
            ) = new_left_frame._copartition(
                1,
                new_right_frames,
                join_type,
                sort=sort,
            )
        else:
            joined_columns = self.copy_columns_cache(copy_lengths=True)
            column_widths = self._column_widths_cache

        new_frame = (
            np.array([])
            if len(left_parts) == 0
            or any(len(right_parts) == 0 for right_parts in list_of_right_parts)
            else self._partition_mgr_cls.n_ary_operation(
                left_parts, op, list_of_right_parts
            )
        )
        if labels == "drop":
            joined_index = joined_columns = row_lengths = column_widths = None

        return self.__constructor__(
            new_frame,
            joined_index,
            joined_columns,
            row_lengths,
            column_widths,
            dtypes,
            pandas_backend=self._pandas_backend,
        )

    @lazy_metadata_decorator(apply_axis="both")
    def concat(
        self,
        axis: Union[int, Axis],
        others: Union[PandasDataframe, List[PandasDataframe]],
        how,
        sort,
    ) -> PandasDataframe:
        """
        Concatenate `self` with one or more other Modin DataFrames.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            Axis to concatenate over.
        others : list
            List of Modin DataFrames to concatenate with.
        how : str
            Type of join to use for the axis.
        sort : bool
            Whether sort the result or not.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        axis = Axis(axis)
        new_widths = None
        new_lengths = None

        def _compute_new_widths():
            widths = None
            if self._column_widths_cache is not None and all(
                o._column_widths_cache is not None for o in others
            ):
                widths = self._column_widths_cache + [
                    width for o in others for width in o._column_widths_cache
                ]
            return widths

        # Fast path for equivalent columns and partitioning
        if axis == Axis.ROW_WISE and all(
            o._check_if_axes_identical(self, axis=1) for o in others
        ):
            joined_index = self.copy_columns_cache(copy_lengths=True)
            left_parts = self._partitions
            right_parts = [o._partitions for o in others]
            new_widths = self._column_widths_cache
        elif axis == Axis.COL_WISE and all(
            o._check_if_axes_identical(self, axis=0) for o in others
        ):
            joined_index = self.copy_index_cache(copy_lengths=True)
            left_parts = self._partitions
            right_parts = [o._partitions for o in others]
            new_lengths = self._row_lengths_cache
            # we can only do this for COL_WISE because `concat` might rebalance partitions for ROW_WISE
            new_widths = _compute_new_widths()
        else:
            (
                left_parts,
                right_parts,
                joined_index,
                partition_sizes_along_axis,
            ) = self._copartition(
                axis.value ^ 1, others, how, sort=sort, force_repartition=False
            )
            if axis == Axis.COL_WISE:
                new_lengths = partition_sizes_along_axis
                new_widths = _compute_new_widths()
            else:
                new_widths = partition_sizes_along_axis
        new_partitions, new_lengths2 = self._partition_mgr_cls.concat(
            axis.value, left_parts, right_parts
        )
        if new_lengths is None:
            new_lengths = new_lengths2
        new_dtypes = None
        new_index = None
        new_columns = None
        if axis == Axis.ROW_WISE:
            if all(obj.has_materialized_index for obj in (self, *others)):
                new_index = self.index.append([other.index for other in others])
            new_columns = joined_index
            frames = [self] + others
            # TODO: should we wrap all `concat` call into "try except" block?
            # `ModinDtypes.concat` can throw exception in case of duplicate values
            new_dtypes = ModinDtypes.concat([frame._dtypes for frame in frames], axis=1)
            # If we have already cached the length of each row in at least one
            # of the row's partitions, we can build new_lengths for the new
            # frame. Typically, if we know the length for any partition in a
            # row, we know the length for the first partition in the row. So
            # just check the lengths of the first column of partitions.
            if not new_lengths:
                new_lengths = []
                if new_partitions.size > 0:
                    if all(
                        part._length_cache is not None for part in new_partitions.T[0]
                    ):
                        new_lengths = self._get_lengths(new_partitions.T[0], axis)
                    else:
                        new_lengths = None
        else:
            if all(obj.has_materialized_columns for obj in (self, *others)):
                new_columns = self.columns.append([other.columns for other in others])
            new_index = joined_index
            try:
                new_dtypes = ModinDtypes.concat(
                    [self.copy_dtypes_cache()] + [o.copy_dtypes_cache() for o in others]
                )
            except NotImplementedError:
                new_dtypes = None
            # If we have already cached the width of each column in at least one
            # of the column's partitions, we can build new_widths for the new
            # frame. Typically, if we know the width for any partition in a
            # column, we know the width for the first partition in the column.
            # So just check the widths of the first row of partitions.
            if not new_widths:
                new_widths = []
                if new_partitions.size > 0:
                    if all(part._width_cache is not None for part in new_partitions[0]):
                        new_widths = self._get_lengths(new_partitions[0], axis)
                    else:
                        new_widths = None

        return self.__constructor__(
            new_partitions,
            new_index,
            new_columns,
            new_lengths,
            new_widths,
            new_dtypes,
            pandas_backend=self._pandas_backend,
        )

    def _apply_func_to_range_partitioning_broadcast(
        self,
        right,
        func,
        key,
        new_index=None,
        new_columns=None,
        new_dtypes: Optional[Union[ModinDtypes, pandas.Series]] = None,
    ):
        """
        Apply `func` against two dataframes using range-partitioning implementation.

        The method first builds range-partitioning for both dataframes using the data from
        `self[key]`, after that, it applies `func` row-wise to `self` frame and
        broadcasts row-parts of `right` to `self`.

        Parameters
        ----------
        right : PandasDataframe
        func : callable(left : pandas.DataFrame, right : pandas.DataFrame) -> pandas.DataFrame
        key : list of labels
            Columns to use to build range-partitioning. Must present in both dataframes.
        new_index : pandas.Index, optional
            Index values to write to the result's cache.
        new_columns : pandas.Index, optional
            Column values to write to the result's cache.
        new_dtypes : pandas.Series or ModinDtypes, optional
            Dtype values to write to the result's cache.

        Returns
        -------
        PandasDataframe
        """
        if self._partitions.shape[0] == 1:
            result = self.broadcast_apply_full_axis(
                axis=1,
                func=func,
                new_columns=new_columns,
                dtypes=new_dtypes,
                other=right,
            )
            return result

        if not isinstance(key, list):
            key = [key]

        shuffling_functions = ShuffleSortFunctions(
            self,
            key,
            ascending=True,
            ideal_num_new_partitions=self._partitions.shape[0],
        )

        # here we want to get indices of those partitions that hold the key columns
        key_indices = self.columns.get_indexer_for(key)
        partition_indices = np.unique(
            np.digitize(key_indices, np.cumsum(self.column_widths))
        )

        new_partitions = self._partition_mgr_cls.shuffle_partitions(
            self._partitions,
            partition_indices,
            shuffling_functions,
            func,
            right_partitions=right._partitions,
        )

        return self.__constructor__(
            new_partitions,
            index=new_index,
            columns=new_columns,
            dtypes=new_dtypes,
            pandas_backend=self._pandas_backend,
        )

    @lazy_metadata_decorator(apply_axis="both")
    def groupby(
        self,
        axis: Union[int, Axis],
        internal_by: List[str],
        external_by: List[PandasDataframe],
        by_positions: List[int],
        operator: Callable,
        result_schema: Optional[Dict[Hashable, type]] = None,
        align_result_columns: bool = False,
        series_groupby: bool = False,
        add_missing_cats: bool = False,
        **kwargs: dict,
    ) -> PandasDataframe:
        """
        Generate groups based on values in the input column(s) and perform the specified operation on each.

        Parameters
        ----------
        axis : int or modin.core.dataframe.base.utils.Axis
            The axis to apply the grouping over.
        internal_by : list of strings
            One or more column labels from the `self` dataframe to use for grouping.
        external_by : list of PandasDataframes
            PandasDataframes to group by (may be specified along with or without `internal_by`).
        by_positions : list of ints
            Specifies the order of grouping by `internal_by` and `external_by` columns.
            Each element in `by_positions` specifies an index from either `external_by` or `internal_by`.
            Indices for `external_by` are positive and start from 0. Indices for `internal_by` are negative
            and start from -1 (so in order to convert them to a valid indices one should do ``-idx - 1``).
            '''
            by_positions = [0, -1, 1, -2, 2, 3]
            internal_by = ["col1", "col2"]
            external_by = [sr1, sr2, sr3, sr4]

            df.groupby([sr1, "col1", sr2, "col2", sr3, sr4])
            '''.
        operator : callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame
            The operation to carry out on each of the groups. The operator is another
            algebraic operator with its own user-defined function parameter, depending
            on the output desired by the user.
        result_schema : dict, optional
            Mapping from column labels to data types that represents the types of the output dataframe.
        align_result_columns : bool, default: False
            Whether to manually align columns between all the resulted row partitions.
            This flag is helpful when dealing with UDFs as they can change the partition's shape
            and labeling unpredictably, resulting in an invalid dataframe.
        series_groupby : bool, default: False
            Whether to convert a one-column DataFrame to a Series before performing groupby.
        add_missing_cats : bool, default: False
            Whether to add missing categories from `by` columns to the result.
        **kwargs : dict
            Additional arguments to pass to the ``df.groupby`` method (besides the 'by' argument).

        Returns
        -------
        PandasDataframe
            A new PandasDataframe containing the groupings specified, with the operator
                applied to each group.

        Notes
        -----
        No communication between groups is allowed in this algebra implementation.

        The number of rows (columns if axis=1) returned by the user-defined function
        passed to the groupby may be at most the number of rows in the group, and
        may be as small as a single row.

        Unlike the pandas API, an intermediate "GROUP BY" object is not present in this
        algebra implementation.
        """
        axis = Axis(axis)
        if axis != Axis.ROW_WISE:
            raise NotImplementedError(
                f"Algebra groupby only implemented row-wise. {axis.name} axis groupby not implemented yet!"
            )

        has_external_grouper = len(external_by) > 0
        skip_on_aligning_flag = "__skip_me_on_aligning__"
        duplicated_suffix = "__duplicated_suffix__"
        duplicated_pattern = r"_[\d]*__duplicated_suffix__"
        kwargs["observed"] = True
        level = kwargs.get("level")

        if level is not None and not isinstance(level, list):
            level = [level]

        def apply_func(df):  # pragma: no cover
            if has_external_grouper:
                external_grouper = df["grouper"]
                external_grouper = [
                    # `df.groupby()` can only take a list of Series'es, so splitting
                    # the df into a list of individual Series'es
                    external_grouper.iloc[:, i]
                    for i in range(len(external_grouper.columns))
                ]

                # renaming 'None' and duplicated names back to their original names
                for obj in external_grouper:
                    if not isinstance(obj, pandas.Series):
                        continue
                    name = obj.name
                    if isinstance(name, str):
                        if name.startswith(MODIN_UNNAMED_SERIES_LABEL):
                            name = None
                        elif name.endswith(duplicated_suffix):
                            name = re.sub(duplicated_pattern, "", name)
                    elif isinstance(name, tuple):
                        if name[-1].endswith(duplicated_suffix):
                            name = (
                                *name[:-1],
                                re.sub(duplicated_pattern, "", name[-1]),
                            )
                    obj.name = name

                df = df["data"]
            else:
                external_grouper = []

            by = []
            # restoring original order of 'by' columns
            for idx in by_positions:
                if idx >= 0:
                    by.append(external_grouper[idx])
                else:
                    by.append(internal_by[-idx - 1])

            if series_groupby:
                df = df.squeeze(axis=1)

            if kwargs.get("level") is not None:
                assert len(by) == 0
                # passing an empty list triggers an error
                by = None

            result = operator(df.groupby(by, **kwargs))

            if align_result_columns and df.empty and result.empty:
                # We want to align columns only of those frames that actually performed
                # some groupby aggregation, if an empty frame was originally passed
                # (an empty bin on reshuffling was created) then there were no groupby
                # executed over this partition and so it has incorrect columns
                # that shouldn't be considered on the aligning phase
                result.attrs[skip_on_aligning_flag] = True
            return result

        if has_external_grouper:
            grouper = (
                external_by[0]
                if len(external_by) == 1
                else external_by[0].concat(
                    axis=1, others=external_by[1:], how="left", sort=False
                )
            )

            new_grouper_cols = []
            columns_were_changed = False
            same_columns = {}
            # duplicated names break range-partitioning mechanism, so renaming them.
            # original names will be reverted in the actual groupby kernel
            for col in grouper.columns:
                suffix = same_columns.get(col)
                if suffix is None:
                    same_columns[col] = 0
                else:
                    same_columns[col] += 1
                    col = (
                        (*col[:-1], f"{col[-1]}_{suffix}{duplicated_suffix}")
                        if isinstance(col, tuple)
                        else f"{col}_{suffix}{duplicated_suffix}"
                    )
                    columns_were_changed = True
                new_grouper_cols.append(col)

            if columns_were_changed:
                grouper.columns = pandas.Index(new_grouper_cols)
            grouper_key_columns = grouper.columns
            data = self
            data_key_columns = internal_by
        else:
            grouper = self
            grouper_key_columns = internal_by
            data, data_key_columns = None, None

        result = grouper._apply_func_to_range_partitioning(
            key_columns=grouper_key_columns,
            func=apply_func,
            data=data,
            data_key_columns=data_key_columns,
            level=level,
        )
        # no need aligning columns if there's only one row partition
        if add_missing_cats or align_result_columns and result._partitions.shape[0] > 1:
            # FIXME: the current reshuffling implementation guarantees us that there's only one column
            # partition in the result, so we should never hit this exception for now, however
            # in the future, we might want to make this implementation more broader
            if result._partitions.shape[1] > 1:
                raise NotImplementedError(
                    "Aligning columns is not yet implemented for multiple column partitions."
                )

            # There're two implementations:
            #   1. The first one work faster, but may stress the network a lot in cluster mode since
            #      it gathers all the dataframes in a single ray-kernel.
            #   2. The second one works slower, but only gathers light pandas.Index objects,
            #      so there should be less stress on the network.
            if add_missing_cats or not IsRayCluster.get():
                if self.has_materialized_dtypes:
                    original_dtypes = pandas.Series(
                        {
                            # lazy proxies hold a reference to another modin's DataFrame which can be
                            # a problem during serialization, in this scenario we don't need actual
                            # categorical values, so a "category" string will be enough
                            name: (
                                "category"
                                if isinstance(dtype, LazyProxyCategoricalDtype)
                                else dtype
                            )
                            for name, dtype in self.dtypes.items()
                        }
                    )
                else:
                    original_dtypes = None

                def compute_aligned_columns(*dfs, initial_columns=None, by=None):
                    """Take row partitions, filter empty ones, and return joined columns for them."""
                    if align_result_columns:
                        valid_dfs = [
                            df
                            for df in dfs
                            if not df.attrs.get(skip_on_aligning_flag, False)
                        ]

                        if len(valid_dfs) == 0 and len(dfs) != 0:
                            valid_dfs = dfs

                        # Using '.concat()' on empty-slices instead of 'Index.join()'
                        # in order to get identical behavior to pandas when it joins
                        # results of different groups
                        combined_cols = pandas.concat(
                            [df.iloc[:0] for df in valid_dfs], axis=0, join="outer"
                        ).columns
                    else:
                        combined_cols = dfs[0].columns

                    masks = None
                    if add_missing_cats:
                        masks, combined_cols = add_missing_categories_to_groupby(
                            dfs,
                            by,
                            operator,
                            initial_columns,
                            combined_cols,
                            is_udf_agg=align_result_columns,
                            kwargs=kwargs.copy(),
                            initial_dtypes=original_dtypes,
                        )
                    return (
                        (combined_cols, masks)
                        if align_result_columns
                        else (None, masks)
                    )

                external_by_cols = [
                    None if col.startswith(MODIN_UNNAMED_SERIES_LABEL) else col
                    for obj in external_by
                    for col in obj.columns
                ]
                by = []
                # restoring original order of 'by' columns
                for idx in by_positions:
                    if idx >= 0:
                        by.append(external_by_cols[idx])
                    else:
                        by.append(internal_by[-idx - 1])

                # Passing all partitions to the 'compute_aligned_columns' kernel to get
                # aligned columns
                parts = result._partitions.flatten()
                aligned_columns = parts[0].apply(
                    compute_aligned_columns,
                    *[part._data for part in parts[1:]],
                    initial_columns=pandas.Index(external_by_cols).append(self.columns),
                    by=by,
                )

                def apply_aligned(df, args, partition_idx):
                    combined_cols, mask = args
                    if mask is not None and mask.get(partition_idx) is not None:
                        values = mask[partition_idx]

                        original_names = df.index.names
                        # TODO: inserting 'values' based on 'searchsorted' result might be more efficient
                        # in cases of small amount of 'values'
                        df = pandas.concat([df, values])
                        if kwargs["sort"]:
                            df = df.sort_index(axis=0)
                        df.index.names = original_names
                    if combined_cols is not None:
                        df = df.reindex(columns=combined_cols)
                    return df

                # Lazily applying aligned columns to partitions
                new_partitions = self._partition_mgr_cls.lazy_map_partitions(
                    result._partitions,
                    apply_aligned,
                    func_args=(aligned_columns._data,),
                    enumerate_partitions=True,
                )
            else:

                def join_cols(df, *cols):
                    """Join `cols` and apply the joined columns to `df`."""
                    valid_cols = [
                        pandas.DataFrame(columns=col) for col in cols if col is not None
                    ]
                    if len(valid_cols) == 0:
                        return df
                    # Using '.concat()' on empty-slices instead of 'Index.join()'
                    # in order to get identical behavior to pandas when it joins
                    # results of different groups
                    result_col = pandas.concat(valid_cols, axis=0, join="outer").columns
                    return df.reindex(columns=result_col)

                # Getting futures for columns of non-empty partitions
                cols = [
                    part.apply(
                        lambda df: (
                            None
                            if df.attrs.get(skip_on_aligning_flag, False)
                            else df.columns
                        )
                    )._data
                    for part in result._partitions.flatten()
                ]

                # Lazily joining and applying the aligned columns
                new_partitions = self._partition_mgr_cls.lazy_map_partitions(
                    result._partitions,
                    join_cols,
                    func_args=cols,
                )
            result = self.__constructor__(
                new_partitions,
                index=result.copy_index_cache(),
                row_lengths=result._row_lengths_cache,
                pandas_backend=self._pandas_backend,
            )

        if (
            not result.has_materialized_index
            and not has_external_grouper
            and level is None
        ):
            by_dtypes = ModinDtypes(self._dtypes).lazy_get(internal_by)
            if by_dtypes.is_materialized:
                new_index = ModinIndex(value=result, axis=0, dtypes=by_dtypes)
                result.set_index_cache(new_index)

        if result_schema is not None:
            new_dtypes = pandas.Series(result_schema)

            result.set_dtypes_cache(new_dtypes)
            result.set_columns_cache(new_dtypes.index)

        return result

    @lazy_metadata_decorator(apply_axis="both")
    def groupby_reduce(
        self,
        axis,
        by,
        map_func,
        reduce_func,
        new_index=None,
        new_columns=None,
        apply_indices=None,
    ):
        """
        Groupby another Modin DataFrame dataframe and aggregate the result.

        Parameters
        ----------
        axis : {0, 1}
            Axis to groupby and aggregate over.
        by : PandasDataframe or None
            A Modin DataFrame to group by.
        map_func : callable
            Map component of the aggregation.
        reduce_func : callable
            Reduce component of the aggregation.
        new_index : pandas.Index, optional
            Index of the result. We may know this in advance,
            and if not provided it must be computed.
        new_columns : pandas.Index, optional
            Columns of the result. We may know this in advance,
            and if not provided it must be computed.
        apply_indices : list-like, optional
            Indices of `axis ^ 1` to apply groupby over.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        by_parts = by if by is None else by._partitions
        if by is None:
            self._propagate_index_objs(axis=0)

        if apply_indices is not None:
            numeric_indices = self.get_axis(axis ^ 1).get_indexer_for(apply_indices)
            apply_indices = list(
                self._get_dict_of_block_index(axis ^ 1, numeric_indices).keys()
            )

        if by_parts is not None:
            # inplace operation
            if by_parts.shape[axis] != self._partitions.shape[axis]:
                self._filter_empties(compute_metadata=False)
        new_partitions = self._partition_mgr_cls.groupby_reduce(
            axis, self._partitions, by_parts, map_func, reduce_func, apply_indices
        )
        return self.__constructor__(
            new_partitions,
            index=new_index,
            columns=new_columns,
            pandas_backend=self._pandas_backend,
        )

    @classmethod
    def from_pandas(cls, df):
        """
        Create a Modin DataFrame from a pandas DataFrame.

        Parameters
        ----------
        df : pandas.DataFrame
            A pandas DataFrame.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        new_index = df.index
        new_columns = df.columns
        new_dtypes = df.dtypes
        new_frame, pandas_backend, new_lengths, new_widths = (
            cls._partition_mgr_cls.from_pandas(df, True)
        )
        return cls(
            new_frame,
            new_index,
            new_columns,
            new_lengths,
            new_widths,
            dtypes=new_dtypes,
            pandas_backend=pandas_backend,
        )

    @classmethod
    def from_arrow(cls, at):
        """
        Create a Modin DataFrame from an Arrow Table.

        Parameters
        ----------
        at : pyarrow.table
            Arrow Table.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        new_frame, pandas_backend, new_lengths, new_widths = (
            cls._partition_mgr_cls.from_arrow(at, return_dims=True)
        )
        new_columns = Index.__new__(Index, data=at.column_names, dtype="O")
        new_index = Index.__new__(RangeIndex, data=range(at.num_rows))
        new_dtypes = pandas.Series(
            [cls._arrow_type_to_dtype(col.type) for col in at.columns],
            index=at.column_names,
        )
        return cls(
            partitions=new_frame,
            index=new_index,
            columns=new_columns,
            row_lengths=new_lengths,
            column_widths=new_widths,
            dtypes=new_dtypes,
            pandas_backend=pandas_backend,
        )

    @classmethod
    def _arrow_type_to_dtype(cls, arrow_type):
        """
        Convert an arrow data type to a pandas data type.

        Parameters
        ----------
        arrow_type : arrow dtype
            Arrow data type to be converted to a pandas data type.

        Returns
        -------
        object
            Any dtype compatible with pandas.
        """
        import pyarrow

        try:
            # TODO: should we map arrow types to pyarrow-backed pandas types?
            # It seems like this might help avoid the expense of transferring
            # data between backends (numpy and pyarrow), but we need to be sure
            # how this fits into the type inference system in pandas.
            res = arrow_type.to_pandas_dtype()
        # Conversion to pandas is not implemented for some arrow types,
        # perform manual conversion for them:
        except NotImplementedError:
            if pyarrow.types.is_time(arrow_type):
                res = np.dtype(datetime.time)
            else:
                raise

        if not isinstance(res, (np.dtype, str)):
            return np.dtype(res)
        return res

    @lazy_metadata_decorator(apply_axis="both")
    def to_pandas(self):
        """
        Convert this Modin DataFrame to a pandas DataFrame.

        Returns
        -------
        pandas.DataFrame
        """
        df = self._partition_mgr_cls.to_pandas(self._partitions)
        if df.empty:
            df = pandas.DataFrame(columns=self.columns, index=self.index)
            if len(df.columns) and self.has_materialized_dtypes:
                df = df.astype(self.dtypes)
        else:
            for axis, has_external_index in enumerate(
                ["has_materialized_index", "has_materialized_columns"]
            ):
                # no need to check external and internal axes since in that case
                # external axes will be computed from internal partitions
                if getattr(self, has_external_index):
                    external_index = self.columns if axis else self.index
                    ErrorMessage.catch_bugs_and_request_email(
                        not df.axes[axis].equals(external_index),
                        f"Internal and external indices on axis {axis} do not match.",
                    )
                    # have to do this in order to assign some potentially missing metadata,
                    # the ones that were set to the external index but were never propagated
                    # into the internal ones
                    df = df.set_axis(axis=axis, labels=external_index, copy=False)

        return df

    def to_numpy(self, **kwargs):
        """
        Convert this Modin DataFrame to a NumPy array.

        Parameters
        ----------
        **kwargs : dict
            Additional keyword arguments to be passed in `to_numpy`.

        Returns
        -------
        np.ndarray
        """
        arr = self._partition_mgr_cls.to_numpy(self._partitions, **kwargs)
        ErrorMessage.catch_bugs_and_request_email(
            self.has_materialized_index
            and len(arr) != len(self.index)
            or self.has_materialized_columns
            and len(arr[0]) != len(self.columns)
        )
        return arr

    @lazy_metadata_decorator(apply_axis=None, transpose=True)
    def transpose(self):
        """
        Transpose the index and columns of this Modin DataFrame.

        Reflect this Modin DataFrame over its main diagonal
        by writing rows as columns and vice-versa.

        Returns
        -------
        PandasDataframe
            New Modin DataFrame.
        """
        new_partitions = self._partition_mgr_cls.lazy_map_partitions(
            self._partitions, lambda df: df.T
        ).T
        if self.has_materialized_dtypes:
            new_dtypes = pandas.Series(
                np.full(len(self.index), find_common_type(self.dtypes.values)),
                index=self.index,
            )
        else:
            new_dtypes = None
        return self.__constructor__(
            new_partitions,
            self.copy_columns_cache(copy_lengths=True),
            self.copy_index_cache(copy_lengths=True),
            self._column_widths_cache,
            self._row_lengths_cache,
            dtypes=new_dtypes,
            pandas_backend=self._pandas_backend,
        )

    @lazy_metadata_decorator(apply_axis="both")
    def finalize(self):
        """
        Perform all deferred calls on partitions.

        This makes `self` Modin Dataframe independent of a history of queries
        that were used to build it.
        """
        self._partition_mgr_cls.finalize(self._partitions)

    def wait_computations(self):
        """Wait for all computations to complete without materializing data."""
        self._partition_mgr_cls.wait_partitions(self._partitions.flatten())

    def support_materialization_in_worker_process(self) -> bool:
        """
        Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object.

        Returns
        -------
        bool
        """
        return True

    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
        """
        Get a Modin DataFrame that implements the dataframe exchange protocol.

        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.

        Parameters
        ----------
        nan_as_null : bool, default: False
            A keyword intended for the consumer to tell the producer
            to overwrite null values in the data with ``NaN`` (or ``NaT``).
            This currently has no effect; once support for nullable extension
            dtypes is added, this value should be propagated to columns.
        allow_copy : bool, default: True
            A keyword that defines whether or not the library is allowed
            to make a copy of the data. For example, copying data would be necessary
            if a library supports strided buffers, given that this protocol
            specifies contiguous buffers. Currently, if the flag is set to ``False``
            and a copy is needed, a ``RuntimeError`` will be raised.

        Returns
        -------
        ProtocolDataframe
            A dataframe object following the dataframe protocol specification.
        """
        from modin.core.dataframe.pandas.interchange.dataframe_protocol.dataframe import (
            PandasProtocolDataframe,
        )

        return PandasProtocolDataframe(
            self, nan_as_null=nan_as_null, allow_copy=allow_copy
        )

    @classmethod
    def from_interchange_dataframe(cls, df: ProtocolDataframe) -> PandasDataframe:
        """
        Convert a DataFrame implementing the dataframe exchange protocol to a Core Modin Dataframe.

        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.

        Parameters
        ----------
        df : ProtocolDataframe
            The DataFrame object supporting the dataframe exchange protocol.

        Returns
        -------
        PandasDataframe
            A new Core Modin Dataframe object.
        """
        if type(df) is cls:
            return df

        if not hasattr(df, "__dataframe__"):
            raise ValueError(
                "`df` does not support DataFrame exchange protocol, i.e. `__dataframe__` method"
            )

        from modin.core.dataframe.pandas.interchange.dataframe_protocol.from_dataframe import (
            from_dataframe_to_pandas,
        )

        ErrorMessage.default_to_pandas(message="`from_dataframe`")
        pandas_df = from_dataframe_to_pandas(df)
        return cls.from_pandas(pandas_df)

    def case_when(self, caselist):
        """
        Replace values where the conditions are True.

        This is Series.case_when() implementation and, thus, it's designed to work
        only with single-column DataFrames.

        Parameters
        ----------
        caselist : list of tuples

        Returns
        -------
        PandasDataframe
        """
        # The import is here to avoid an incorrect module initialization when running tests.
        # This module is loaded before `pytest_configure()` is called. If `pytest_configure()`
        # changes the engine, the `remote_function` decorator will not be valid.
        from modin.core.execution.utils import remote_function

        @remote_function
        def remote_fn(df, name, caselist):  # pragma: no cover
            caselist = [
                tuple(
                    (
                        data.squeeze(axis=1)
                        if isinstance(data, pandas.DataFrame)
                        else data
                    )
                    for data in case_tuple
                )
                for case_tuple in caselist
            ]
            return pandas.DataFrame({name: df.squeeze(axis=1).case_when(caselist)})

        cls = type(self)
        use_map = True
        is_trivial_idx = None
        name = self.columns[0]
        # Lists of modin frames: first for conditions, second for replacements
        modin_lists = [[], []]
        # Fill values for conditions and replacements respectively
        fill_values = [True, None]
        new_caselist = []
        for case_tuple in caselist:
            new_case = []
            for data, modin_list, fill_value in zip(
                case_tuple, modin_lists, fill_values
            ):
                if isinstance(data, cls):
                    modin_list.append(data)
                elif callable(data):
                    data = remote_function(data)
                elif isinstance(data, pandas.Series):
                    use_map = False
                    if is_trivial_idx is None:
                        self_idx = self.index
                        length = len(self_idx)
                        is_trivial_idx = is_trivial_index(self_idx)
                    if is_trivial_idx and is_trivial_index(data.index):
                        data = data[:length]
                        diff = length - len(data)
                        if diff > 0:
                            data = pandas.concat(
                                [data, pandas.Series([fill_value] * diff)],
                                ignore_index=True,
                            )
                    else:
                        data = data.reindex(self_idx, fill_value=fill_value)
                elif use_map and is_list_like(data):
                    use_map = False
                new_case.append(data)
            new_caselist.append(tuple(new_case))

        if modin_lists[0] or modin_lists[1]:
            # Copartition modin frames
            use_map = False
            columns = self.columns
            column_widths = [1]
            for modin_list, fill_value in zip(modin_lists, fill_values):
                _, list_of_right_parts, joined_index, row_lengths = self._copartition(
                    Axis.ROW_WISE.value,
                    modin_list,
                    how="left",
                    sort=False,
                    fill_value=fill_value,
                )
                modin_list.clear()
                modin_list.extend(
                    self.__constructor__(
                        part,
                        joined_index,
                        columns,
                        row_lengths,
                        column_widths,
                        pandas_backend=self._pandas_backend,
                    )
                    for part in list_of_right_parts
                )

            # Replace modin frames with copartitioned
            caselist = new_caselist
            new_caselist = []
            for i in range(2):
                modin_lists[i] = iter(modin_lists[i])
            for case_tuple in caselist:
                new_case = tuple(
                    next(modin_list) if isinstance(data, cls) else data
                    for data, modin_list in zip(case_tuple, modin_lists)
                )
                new_caselist.append(new_case)

        # If all the conditions are callable and the replacements are either
        # callable or scalar, use map().
        if use_map:
            return self.map(func=remote_fn, func_args=[name, new_caselist], lazy=True)

        # Get the chunk of data corresponding the the specified partition
        def map_data(
            part_idx,
            part_len,
            data,
            data_offset,
            fill_value,
        ):
            if isinstance(data, cls):
                return data._partitions[part_idx][0]._data
            if isinstance(data, pandas.Series):
                return data[data_offset : data_offset + part_len]
            return (
                data[data_offset : data_offset + part_len]
                if is_list_like(data)
                else data
            )

        parts = [p[0] for p in self._partitions]
        lengths = self.row_lengths
        new_parts = []
        data_offset = 0

        # Split the data and apply the remote function to each partition
        # with the corresponding chunk of data
        for i, part, part_len in zip(range(len(parts)), parts, lengths):
            cases = [
                tuple(
                    map_data(i, part_len, data, data_offset, fill_value)
                    for data, fill_value in zip(c, (True, None))
                )
                for c in new_caselist
            ]
            new_parts.append(
                part.add_to_apply_calls(
                    remote_fn,
                    name,
                    cases,
                    length=part_len,
                    width=1,
                )
            )
            data_offset += part_len
        new_parts = np.array([[p] for p in new_parts])
        return self.__constructor__(
            new_parts,
            columns=self.columns,
            index=self.index,
            row_lengths=lengths,
            column_widths=[1],
            pandas_backend=self._pandas_backend,
        )


================================================
FILE: modin/core/dataframe/pandas/dataframe/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Collection of algebra utility functions, used to shuffle data across partitions."""

import abc
from collections import namedtuple
from typing import TYPE_CHECKING, Callable, Optional, Union

import numpy as np
import pandas
from pandas._libs.tslibs import to_offset
from pandas.core.dtypes.common import is_list_like, is_numeric_dtype
from pandas.core.resample import _get_timestamp_range_edges

from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings

if TYPE_CHECKING:
    from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe

ColumnInfo = namedtuple("ColumnInfo", ["name", "pivots", "is_numeric"])


class ShuffleFunctions:
    """
    Defines an interface to perform the sampling, quantiles picking, and the splitting stages for the range-partitioning building.

    Parameters
    ----------
    modin_frame : PandasDataframe
        The frame to build the range-partitioning for.
    columns : str or list of strings
        The column/columns to use as a key.
    ascending : bool
        Whether the ranges should be in ascending or descending order.
    ideal_num_new_partitions : int
        The ideal number of new partitions.
    **kwargs : dict
        Additional keyword arguments.
    """

    def __init__(
        self, modin_frame, columns, ascending, ideal_num_new_partitions, **kwargs
    ):
        pass

    @abc.abstractmethod
    def sample_fn(self, partition: pandas.DataFrame) -> pandas.DataFrame:
        """
        Pick samples over the given partition.

        Parameters
        ----------
        partition : pandas.DataFrame

        Returns
        -------
        pandas.DataFrame:
            The samples for the partition.
        """
        pass

    @abc.abstractmethod
    def pivot_fn(self, samples: "list[pandas.DataFrame]") -> int:
        """
        Determine quantiles from the given samples and save it for the future ``.split_fn()`` calls.

        Parameters
        ----------
        samples : list of pandas.DataFrames

        Returns
        -------
        int
            The number of bins the ``.split_fn()`` will return.
        """
        pass

    @abc.abstractmethod
    def split_fn(self, partition: pandas.DataFrame) -> "tuple[pandas.DataFrame, ...]":
        """
        Split the given dataframe into the range-partitions defined by the preceding call of the ``.pivot_fn()``.

        Parameters
        ----------
        partition : pandas.DataFrame

        Returns
        -------
        tuple of pandas.DataFrames

        Notes
        -----
        In order to call this method you must call the ``.pivot_fn()`` first.
        """
        pass


@_inherit_docstrings(ShuffleFunctions)
class ShuffleSortFunctions(ShuffleFunctions):
    """
    Perform the sampling, quantiles picking, and the splitting stages for the range-partitioning building.

    Parameters
    ----------
    modin_frame : PandasDataframe
        The frame to build the range-partitioning for.
    columns : str, list of strings or None
        The column/columns to use as a key. Can't be specified along with `level`.
    ascending : bool
        Whether the ranges should be in ascending or descending order.
    ideal_num_new_partitions : int
        The ideal number of new partitions.
    level : list of strings or ints, or None
        Index level(s) to use as a key. Can't be specified along with `columns`.
    closed_on_right : bool, default: False
        Whether to include the right limit in range-partitioning.
            True:  bins[i - 1] < x <= bins[i]
            False: bins[i - 1] <= x < bins[i]
    **kwargs : dict
        Additional keyword arguments.
    """

    def __init__(
        self,
        modin_frame: "PandasDataframe",
        columns: Optional[Union[str, list]],
        ascending: Union[list, bool],
        ideal_num_new_partitions: int,
        level: Optional[list[Union[str, int]]] = None,
        closed_on_right: bool = False,
        **kwargs: dict,
    ):
        self.frame_len = len(modin_frame)
        self.ideal_num_new_partitions = ideal_num_new_partitions
        self.columns = columns if is_list_like(columns) else [columns]
        self.ascending = ascending
        self.kwargs = kwargs.copy()
        self.level = level
        self.columns_info = None
        self.closed_on_right = closed_on_right

    def sample_fn(self, partition: pandas.DataFrame) -> pandas.DataFrame:
        if self.level is not None:
            partition = self._index_to_df_zero_copy(partition, self.level)
        else:
            partition = partition[self.columns]
        return self.pick_samples_for_quantiles(
            partition, self.ideal_num_new_partitions, self.frame_len
        )

    def pivot_fn(self, samples: "list[pandas.DataFrame]") -> int:
        key = self.kwargs.get("key", None)
        samples = pandas.concat(samples, axis=0, copy=False)

        columns_info: "list[ColumnInfo]" = []
        number_of_groups = 1
        cols = []
        for i, col in enumerate(samples.columns):
            num_pivots = int(self.ideal_num_new_partitions / number_of_groups)
            if num_pivots < 2 and len(columns_info):
                break
            column_val = samples[col]
            cols.append(col)
            is_numeric = is_numeric_dtype(column_val.dtype)

            # When we are not sorting numbers, we need our quantiles to not do arithmetic on the values
            method = "linear" if is_numeric else "inverted_cdf"
            pivots = self.pick_pivots_from_samples_for_sort(
                column_val, num_pivots, method, key
            )
            columns_info.append(
                ColumnInfo(
                    self.level[i] if self.level is not None else col,
                    pivots,
                    is_numeric,
                )
            )
            number_of_groups *= len(pivots) + 1
        self.columns_info = columns_info
        return number_of_groups

    def split_fn(
        self,
        partition: pandas.DataFrame,
    ) -> "tuple[pandas.DataFrame, ...]":
        ErrorMessage.catch_bugs_and_request_email(
            failure_condition=self.columns_info is None,
            extra_log="The 'split_fn' doesn't have proper metadata, the probable reason is that it was called before 'pivot_fn'",
        )
        return self.split_partitions_using_pivots_for_sort(
            partition,
            self.columns_info,
            self.ascending,
            keys_are_index_levels=self.level is not None,
            closed_on_right=self.closed_on_right,
            **self.kwargs,
        )

    @staticmethod
    def _find_quantiles(
        df: Union[pandas.DataFrame, pandas.Series], quantiles: list, method: str
    ) -> np.ndarray:
        """
        Find quantiles of a given dataframe using the specified method.

        We use this method to provide backwards compatibility with NumPy versions < 1.23 (e.g. when
        the user is using Modin in compat mode). This is basically a wrapper around `np.quantile` that
        ensures we provide the correct `method` argument - i.e. if we are dealing with objects (which
        may or may not support algebra), we do not want to use a method to find quantiles that will
        involve algebra operations (e.g. mean) between the objects, since that may fail.

        Parameters
        ----------
        df : pandas.DataFrame or pandas.Series
            The data to pick quantiles from.
        quantiles : list[float]
            The quantiles to compute.
        method : str
            The method to use. `linear` if dealing with numeric types, otherwise `inverted_cdf`.

        Returns
        -------
        np.ndarray
            A NumPy array with the quantiles of the data.
        """
        if method == "linear":
            # This is the default method for finding quantiles, so it does not need to be specified,
            # which keeps backwards compatibility with older versions of NumPy that do not have a
            # `method` keyword argument in np.quantile.
            return np.unique(np.quantile(df, quantiles))
        else:
            try:
                return np.unique(np.quantile(df, quantiles, method=method))
            except Exception:
                # In this case, we're dealing with an array of objects, but the current version of
                # NumPy does not have a `method` kwarg. We need to use the older kwarg, `interpolation`
                # instead.
                return np.unique(np.quantile(df, quantiles, interpolation="lower"))

    @staticmethod
    def pick_samples_for_quantiles(
        df: pandas.DataFrame,
        num_partitions: int,
        length: int,
    ) -> pandas.DataFrame:
        """
        Pick samples over the given partition.

        This function picks samples from the given partition using the TeraSort algorithm - each
        value is sampled with probability 1 / m * ln(n * t) where m = total_length / num_partitions,
        t = num_partitions, and n = total_length.

        Parameters
        ----------
        df : pandas.Dataframe
            The masked dataframe to pick samples from.
        num_partitions : int
            The number of partitions.
        length : int
            The total length.

        Returns
        -------
        pandas.DataFrame:
            The samples for the partition.

        Notes
        -----
        This sampling algorithm is inspired by TeraSort. You can find more information about TeraSort
        and the sampling algorithm at https://www.cse.cuhk.edu.hk/~taoyf/paper/sigmod13-mr.pdf.
        """
        m = length / num_partitions
        probability = (1 / m) * np.log(num_partitions * length)
        return df.sample(frac=probability)

    def pick_pivots_from_samples_for_sort(
        self,
        samples: pandas.Series,
        ideal_num_new_partitions: int,
        method: str = "linear",
        key: Optional[Callable] = None,
    ) -> np.ndarray:
        """
        Determine quantiles from the given samples.

        This function takes as input the quantiles calculated over all partitions from
        `sample_func` defined above, and determines a final NPartitions.get() quantiles
        to use to roughly sort the entire dataframe. It does so by collating all the samples
        and computing NPartitions.get() quantiles for the overall set.

        Parameters
        ----------
        samples : pandas.Series
            The samples computed by ``get_partition_quantiles_for_sort``.
        ideal_num_new_partitions : int
            The ideal number of new partitions.
        method : str, default: linear
            The method to use when picking quantiles.
        key : Callable, default: None
            The key to use on the samples when picking pivots.

        Returns
        -------
        np.ndarray
            A list of overall quantiles.
        """
        samples = samples.to_numpy()
        # We don't call `np.unique` on the samples, since if a quantile shows up in multiple
        # partition's samples, this is probably an indicator of skew in the dataset, and we
        # want our final partitions to take this into account.
        if key is not None:
            samples = key(samples)
        # We don't want to pick very many quantiles if we have a very small dataframe.
        num_quantiles = ideal_num_new_partitions
        quantiles = [i / num_quantiles for i in range(1, num_quantiles)]
        # If we only desire 1 partition, we need to ensure that we're not trying to find quantiles
        # from an empty list of pivots.
        if len(quantiles) > 0:
            return self._find_quantiles(samples, quantiles, method)
        return np.array([])

    @staticmethod
    def split_partitions_using_pivots_for_sort(
        df: pandas.DataFrame,
        columns_info: "list[ColumnInfo]",
        ascending: bool,
        keys_are_index_levels: bool = False,
        closed_on_right: bool = False,
        **kwargs: dict,
    ) -> "tuple[pandas.DataFrame, ...]":
        """
        Split the given dataframe into the partitions specified by `pivots` in `columns_info`.

        This function takes as input a row-axis partition, as well as the quantiles determined
        by the `pivot_func` defined above. It then splits the input dataframe into NPartitions.get()
        dataframes, with the elements in the i-th split belonging to the i-th partition, as determined
        by the quantiles we're using.

        Parameters
        ----------
        df : pandas.Dataframe
            The partition to split.
        columns_info : list of ColumnInfo
            Information regarding keys and pivots for range partitioning.
        ascending : bool
            The ascending flag.
        keys_are_index_levels : bool, default: False
            Whether `columns_info` describes index levels or actual columns from `df`.
        closed_on_right : bool, default: False
            Whether to include the right limit in range-partitioning.
                True:  bins[i - 1] < x <= bins[i]
                False: bins[i - 1] <= x < bins[i]
        **kwargs : dict
            Additional keyword arguments.

        Returns
        -------
        tuple[pandas.DataFrame]
            A tuple of the splits from this partition.
        """
        if len(columns_info) == 0:
            # We can return the dataframe with zero changes if there were no pivots passed
            return (df,)

        key_data = (
            ShuffleSortFunctions._index_to_df_zero_copy(
                df, [col_info.name for col_info in columns_info]
            )
            if keys_are_index_levels
            else df[[col_info.name for col_info in columns_info]]
        )
        na_index = key_data.isna().squeeze(axis=1)
        if na_index.ndim == 2:
            na_index = na_index.any(axis=1)
        na_rows = df[na_index]
        non_na_rows = df[~na_index]

        def get_group(grp, key, df):
            """Get a group with the `key` from the `grp`, if it doesn't exist return an empty slice of `df`."""
            try:
                return grp.get_group(key)
            except KeyError:
                return pandas.DataFrame(index=df.index[:0], columns=df.columns).astype(
                    df.dtypes
                )

        groupby_codes = []
        group_keys = []
        for col_info in columns_info:
            pivots = col_info.pivots
            if len(pivots) == 0:
                continue
            # If `ascending=False` and we are dealing with a numeric dtype, we can pass in a reversed list
            # of pivots, and `np.digitize` will work correctly. For object dtypes, we use `np.searchsorted`
            # which breaks when we reverse the pivots.
            if not ascending and col_info.is_numeric:
                # `key` is already applied to `pivots` in the `pick_pivots_from_samples_for_sort` function.
                pivots = pivots[::-1]
            group_keys.append(range(len(pivots) + 1))
            key = kwargs.pop("key", None)
            cols_to_digitize = (
                non_na_rows.index.get_level_values(col_info.name)
                if keys_are_index_levels
                else non_na_rows[col_info.name]
            )
            if key is not None:
                cols_to_digitize = key(cols_to_digitize)

            if cols_to_digitize.ndim == 2:
                cols_to_digitize = cols_to_digitize.squeeze()

            if col_info.is_numeric:
                groupby_col = np.digitize(
                    cols_to_digitize, pivots, right=closed_on_right
                )
                # `np.digitize` returns results based off of the sort order of the pivots it is passed.
                # When we only have one unique value in our pivots, `np.digitize` assumes that the pivots
                # are sorted in ascending order, and gives us results based off of that assumption - so if
                # we actually want to sort in descending order, we need to swap the new indices.
                if not ascending and len(np.unique(pivots)) == 1:
                    groupby_col = len(pivots) - groupby_col
            else:
                groupby_col = np.searchsorted(
                    pivots,
                    cols_to_digitize,
                    side="left" if closed_on_right else "right",
                )
                # Since np.searchsorted requires the pivots to be in ascending order, if we want to sort
                # in descending order, we need to swap the new indices.
                if not ascending:
                    groupby_col = len(pivots) - groupby_col
            groupby_codes.append(groupby_col)

        if len(group_keys) == 0:
            # We can return the dataframe with zero changes if there were no pivots passed
            return (df,)
        elif len(group_keys) == 1:
            group_keys = group_keys[0]
        else:
            group_keys = pandas.MultiIndex.from_product(group_keys)

        if len(non_na_rows) == 1:
            groups = [
                # taking an empty slice for an index's metadata
                (
                    pandas.DataFrame(index=df.index[:0], columns=df.columns).astype(
                        df.dtypes
                    )
                    if key != groupby_codes[0]
                    else non_na_rows
                )
                for key in group_keys
            ]
        else:
            grouped = non_na_rows.groupby(groupby_codes)
            groups = [get_group(grouped, key, df) for key in group_keys]
        index_to_insert_na_vals = (
            -1 if kwargs.get("na_position", "last") == "last" else 0
        )
        groups[index_to_insert_na_vals] = pandas.concat(
            [groups[index_to_insert_na_vals], na_rows]
        ).astype(df.dtypes)
        return tuple(groups)

    @staticmethod
    def _index_to_df_zero_copy(
        df: pandas.DataFrame, levels: list[Union[str, int]]
    ) -> pandas.DataFrame:
        """
        Convert index `level` of `df` to a ``pandas.DataFrame``.

        Parameters
        ----------
        df : pandas.DataFrame
        levels : list of labels or ints
            Index level to convert to a dataframe.

        Returns
        -------
        pandas.DataFrame
            The columns in the resulting dataframe use the same data arrays as the index levels
            in the original `df`, so no copies.
        """
        # calling 'df.index.to_frame()' creates a copy of the index, so doing the conversion manually
        # to avoid the copy
        data = {
            (
                df.index.names[lvl] if isinstance(lvl, int) else lvl
            ): df.index.get_level_values(lvl)
            for lvl in levels
        }
        index_data = pandas.DataFrame(data, index=df.index, copy=False)
        return index_data


@_inherit_docstrings(ShuffleSortFunctions)
class ShuffleResample(ShuffleSortFunctions):
    def __init__(
        self,
        modin_frame: "PandasDataframe",
        columns: Union[str, list],
        ascending: Union[list, bool],
        ideal_num_new_partitions: int,
        resample_kwargs: dict,
        **kwargs: dict,
    ):
        resample_kwargs = resample_kwargs.copy()
        rule = resample_kwargs.pop("rule")

        if resample_kwargs["closed"] is None:
            # this rule regarding the default value of 'closed' is inherited
            # from pandas documentation for 'pandas.DataFrame.resample'
            if rule in ("ME", "YE", "QE", "BME", "BA", "BQE", "W"):
                resample_kwargs["closed"] = "right"
            else:
                resample_kwargs["closed"] = "left"

        super().__init__(
            modin_frame,
            columns,
            ascending,
            ideal_num_new_partitions,
            closed_on_right=resample_kwargs["closed"] == "right",
            **kwargs,
        )

        resample_kwargs["freq"] = to_offset(rule)
        self.resample_kwargs = resample_kwargs

    @staticmethod
    def pick_samples_for_quantiles(
        df: pandas.DataFrame,
        num_partitions: int,
        length: int,
    ) -> pandas.DataFrame:
        # to build proper bins we need min and max timestamp of the whole DatetimeIndex,
        # so computing it in each partition
        return pandas.concat([df.min().to_frame().T, df.max().to_frame().T])

    def pick_pivots_from_samples_for_sort(
        self,
        samples: np.ndarray,
        ideal_num_new_partitions: int,
        method: str = "linear",
        key: Optional[Callable] = None,
    ) -> np.ndarray:
        if key is not None:
            raise NotImplementedError(key)

        max_value = samples.max()

        first, last = _get_timestamp_range_edges(
            samples.min(),
            max_value,
            self.resample_kwargs["freq"],
            unit=samples.dt.unit,
            closed=self.resample_kwargs["closed"],
            origin=self.resample_kwargs["origin"],
            offset=self.resample_kwargs["offset"],
        )

        all_bins = pandas.date_range(
            start=first,
            end=last,
            freq=self.resample_kwargs["freq"],
            ambiguous=True,
            nonexistent="shift_forward",
            unit=samples.dt.unit,
        )

        all_bins = self._adjust_bin_edges(
            all_bins,
            max_value,
            freq=self.resample_kwargs["freq"],
            closed=self.resample_kwargs["closed"],
        )

        # take pivot values with an even interval
        step = 1 / ideal_num_new_partitions
        bins = [
            all_bins[int(len(all_bins) * i * step)]
            for i in range(1, ideal_num_new_partitions)
        ]
        return bins

    def _adjust_bin_edges(
        self,
        binner: pandas.DatetimeIndex,
        end_timestamp,
        freq,
        closed,
    ) -> pandas.DatetimeIndex:
        """
        Adjust bin edges.

        This function was copied & simplified from ``pandas.core.resample.TimeGrouper._adjuct_bin_edges()``.

        Parameters
        ----------
        binner : pandas.DatetimeIndex
        end_timestamp : pandas.Timestamp
        freq : str
        closed : bool

        Returns
        -------
        pandas.DatetimeIndex
        """
        # Some hacks for > daily data, see pandas-dev/pandas#1471, pandas-dev/pandas#1458, pandas-dev/pandas#1483

        if freq.name not in ("BME", "ME", "W") and freq.name.split("-")[0] not in (
            "BQE",
            "BYE",
            "QE",
            "YE",
            "W",
        ):
            return binner

        # If the right end-point is on the last day of the month, roll forwards
        # until the last moment of that day. Note that we only do this for offsets
        # which correspond to the end of a super-daily period - "month start", for
        # example, is excluded.
        if closed == "right":
            # GH 21459, GH 9119: Adjust the bins relative to the wall time
            edges_dti = binner.tz_localize(None)
            edges_dti = (
                edges_dti
                + pandas.Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit)
                - pandas.Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit)
            )
            binner = edges_dti.tz_localize(binner.tz)

        # intraday values on last day
        if binner[-2] > end_timestamp:
            binner = binner[:-1]
        return binner

    @staticmethod
    def split_partitions_using_pivots_for_sort(
        df: pandas.DataFrame,
        columns_info: "list[ColumnInfo]",
        ascending: bool,
        closed_on_right: bool = True,
        **kwargs: dict,
    ) -> "tuple[pandas.DataFrame, ...]":
        def add_attr(df, timestamp):
            if "bin_bounds" in df.attrs:
                df.attrs["bin_bounds"] = (*df.attrs["bin_bounds"], timestamp)
            else:
                df.attrs["bin_bounds"] = (timestamp,)
            return df

        result = ShuffleSortFunctions.split_partitions_using_pivots_for_sort(
            df, columns_info, ascending, **kwargs
        )
        # it's required for each bin to know its bounds in order for resampling to work
        # properly when down-sampling occurs. Reach here for an example:
        # https://github.com/modin-project/modin/pull/7140#discussion_r1549246505
        # We're writing the bounds as 'attrs' to avoid duplications in the final partition
        for i, pivot in enumerate(columns_info[0].pivots):
            add_attr(result[i], pivot - pandas.Timedelta(1, unit="ns"))
            if i + 1 <= len(result):
                add_attr(result[i + 1], pivot + pandas.Timedelta(1, unit="ns"))
        return result


def lazy_metadata_decorator(apply_axis=None, axis_arg=-1, transpose=False):
    """
    Lazily propagate metadata for the ``PandasDataframe``.

    This decorator first adds the minimum required reindexing operations
    to each partition's queue of functions to be lazily applied for
    each PandasDataframe in the arguments by applying the function
    run_f_on_minimally_updated_metadata. The decorator also sets the
    flags for deferred metadata synchronization on the function result
    if necessary.

    Parameters
    ----------
    apply_axis : str, default: None
        The axes on which to apply the reindexing operations to the `self._partitions` lazily.
        Case None: No lazy metadata propagation.
        Case "both": Add reindexing operations on both axes to partition queue.
        Case "opposite": Add reindexing operations complementary to given axis.
        Case "rows": Add reindexing operations on row axis to partition queue.
    axis_arg : int, default: -1
        The index or column axis.
    transpose : bool, default: False
        Boolean for if a transpose operation is being used.

    Returns
    -------
    Wrapped Function.
    """

    def decorator(f):
        from functools import wraps

        @wraps(f)
        def run_f_on_minimally_updated_metadata(self, *args, **kwargs):
            from .dataframe import PandasDataframe

            for obj in (
                [self]
                + [o for o in args if isinstance(o, PandasDataframe)]
                + [v for v in kwargs.values() if isinstance(v, PandasDataframe)]
                + [
                    d
                    for o in args
                    if isinstance(o, list)
                    for d in o
                    if isinstance(d, PandasDataframe)
                ]
                + [
                    d
                    for _, o in kwargs.items()
                    if isinstance(o, list)
                    for d in o
                    if isinstance(d, PandasDataframe)
                ]
            ):
                if apply_axis == "both":
                    if obj._deferred_index and obj._deferred_column:
                        obj._propagate_index_objs(axis=None)
                    elif obj._deferred_index:
                        obj._propagate_index_objs(axis=0)
                    elif obj._deferred_column:
                        obj._propagate_index_objs(axis=1)
                elif apply_axis == "opposite":
                    if "axis" not in kwargs:
                        axis = args[axis_arg]
                    else:
                        axis = kwargs["axis"]
                    if axis == 0 and obj._deferred_column:
                        obj._propagate_index_objs(axis=1)
                    elif axis == 1 and obj._deferred_index:
                        obj._propagate_index_objs(axis=0)
                elif apply_axis == "rows":
                    obj._propagate_index_objs(axis=0)
            result = f(self, *args, **kwargs)
            if apply_axis is None and not transpose:
                result._deferred_index = self._deferred_index
                result._deferred_column = self._deferred_column
            elif apply_axis is None and transpose:
                result._deferred_index = self._deferred_column
                result._deferred_column = self._deferred_index
            elif apply_axis == "opposite":
                if axis == 0:
                    result._deferred_index = self._deferred_index
                else:
                    result._deferred_column = self._deferred_column
            elif apply_axis == "rows":
                result._deferred_column = self._deferred_column
            return result

        return run_f_on_minimally_updated_metadata

    return decorator


def add_missing_categories_to_groupby(
    dfs,
    by,
    operator,
    initial_columns,
    combined_cols,
    is_udf_agg,
    kwargs,
    initial_dtypes=None,
):
    """
    Generate values for missing categorical values to be inserted into groupby result.

    This function is used to emulate behavior of ``groupby(observed=False)`` parameter,
    it takes groupby result that was computed using ``groupby(observed=True)``
    and computes results for categorical values that are not presented in `dfs`.

    Parameters
    ----------
    dfs : list of pandas.DataFrames
        Row partitions containing groupby results.
    by : list of hashable
        Column labels that were used to perform groupby.
    operator : callable
        Aggregation function that was used during groupby.
    initial_columns : pandas.Index
        Column labels of the original dataframe.
    combined_cols : pandas.Index
        Column labels of the groupby result.
    is_udf_agg : bool
        Whether ``operator`` is a UDF.
    kwargs : dict
        Parameters that were passed to ``groupby(by, **kwargs)``.
    initial_dtypes : pandas.Series, optional
        Dtypes of the original dataframe. If not specified, assume it's ``int64``.

    Returns
    -------
    masks : dict[int, pandas.DataFrame]
        Mapping between partition idx and a dataframe with results for missing categorical values
        to insert to this partition.
    new_combined_cols : pandas.Index
        New column labels of the groupby result. If ``is_udf_agg is True``, then ``operator``
        may change the resulted columns.
    """
    kwargs["observed"] = False
    new_combined_cols = combined_cols

    ### At first we need to compute missing categorical values
    indices = [df.index for df in dfs]
    # total_index contains all categorical values that resided in the result,
    # missing values are computed differently depending on whether we're grouping
    # on multiple groupers or not
    total_index = indices[0].append(indices[1:])
    if isinstance(total_index, pandas.MultiIndex):
        if all(
            not isinstance(level, pandas.CategoricalIndex)
            for level in total_index.levels
        ):
            return {}, new_combined_cols
        missing_cats_dtype = {
            name: (
                level.dtype
                if isinstance(level.dtype, pandas.CategoricalDtype)
                # it's a bit confusing but we have to convert the remaining 'by' columns to categoricals
                # in order to compute a proper fill value later in the code
                else pandas.CategoricalDtype(level)
            )
            for level, name in zip(total_index.levels, total_index.names)
        }
        # if we're grouping on multiple groupers, then the missing categorical values is a
        # carthesian product of (actual_missing_categorical_values X all_values_of_another_groupers)
        complete_index = pandas.MultiIndex.from_product(
            [
                value.categories.astype(total_level.dtype)
                for total_level, value in zip(
                    total_index.levels, missing_cats_dtype.values()
                )
            ],
            names=by,
        )
        missing_index = complete_index[~complete_index.isin(total_index)]
    else:
        if not isinstance(total_index, pandas.CategoricalIndex):
            return {}, new_combined_cols
        # if we're grouping on a single grouper then we simply compute the difference
        # between categorical values in the result and the values defined in categorical dtype
        missing_index = total_index.categories.difference(total_index.values)
        missing_cats_dtype = {by[0]: pandas.CategoricalDtype(missing_index)}
    missing_index.names = by

    if len(missing_index) == 0:
        return {}, new_combined_cols

    ### At this stage we want to get a fill_value for missing categorical values
    if is_udf_agg and isinstance(total_index, pandas.MultiIndex):
        # if grouping on multiple columns and aggregating with an UDF, then the
        # fill value is always `np.nan`
        missing_values = pandas.DataFrame({0: [np.nan]})
    else:
        # In case of a udf aggregation we're forced to run the operator against each
        # missing category, as in theory it can return different results for each
        # empty group. In other cases it's enough to run the operator against a single
        # missing categorical and then broadcast the fill value to each missing value
        if not is_udf_agg:
            missing_cats_dtype = {
                key: pandas.CategoricalDtype(value.categories[:1])
                for key, value in missing_cats_dtype.items()
            }

        empty_df = pandas.DataFrame(columns=initial_columns)
        # HACK: default 'object' dtype doesn't fit our needs, as most of the aggregations
        # fail on a non-numeric columns, ideally, we need dtypes of the original dataframe,
        # however, 'int64' also works fine here if the original schema is not available
        empty_df = empty_df.astype(
            "int64" if initial_dtypes is None else initial_dtypes
        )
        empty_df = empty_df.astype(missing_cats_dtype)
        missing_values = operator(empty_df.groupby(by, **kwargs))

    if is_udf_agg and not isinstance(total_index, pandas.MultiIndex):
        missing_values = missing_values.drop(columns=by, errors="ignore")
        new_combined_cols = pandas.concat(
            [
                pandas.DataFrame(columns=combined_cols),
                missing_values.iloc[:0],
            ],
            axis=0,
            join="outer",
        ).columns
    else:
        # HACK: If the aggregation has failed, the result would be empty. Assuming the
        # fill value to be `np.nan` here (this may not always be correct!!!)
        fill_value = np.nan if len(missing_values) == 0 else missing_values.iloc[0, 0]
        missing_values = pandas.DataFrame(
            fill_value, index=missing_index, columns=combined_cols
        )

    # restoring original categorical dtypes for the indices (MultiIndex already have proper dtypes)
    if not isinstance(missing_values.index, pandas.MultiIndex):
        missing_values.index = missing_values.index.astype(total_index.dtype)

    ### Then we decide to which missing categorical values should go to which partition
    if not kwargs["sort"]:
        # If the result is allowed to be unsorted, simply insert all the missing
        # categories to the last partition
        mask = {len(indices) - 1: missing_values}
        return mask, new_combined_cols

    # If the result has to be sorted, we have to assign missing categoricals to proper partitions.
    # For that purpose we define bins with corner values of each partition and then using either
    # np.digitize or np.searchsorted find correct bins for each missing categorical value.
    # Example: part0-> [0, 1, 2]; part1-> [3, 4, 10, 12]; part2-> [15, 17, 20, 100]
    #          bins -> [2, 12] # took last values of each partition excluding the last partition
    #                            (every value that's matching 'x > part[-2][-1]' should go to the
    #                             last partition, meaning that including the last value of the last
    #                             partitions doesn't make sense)
    #          missing_cats ->                    [-2, 5, 6, 14, 21, 120]
    #          np.digitize(missing_cats, bins) -> [ 0, 1, 1,  2,  2,  2]
    #                                               ^-- mapping between values and partition idx to insert
    bins = []
    old_bins_to_new = {}
    offset = 0
    # building bins by taking last values of each partition excluding the last partition
    for idx in indices[:-1]:
        if len(idx) == 0:
            # if a partition is empty, we can't use its values to define a bin, thus we simply
            # skip it and remember the number of skipped partitions as an 'offset'
            offset += 1
            continue
        # remember the number of skipped partitions before this bin, in order to restore original
        # indexing at the end
        old_bins_to_new[len(bins)] = offset
        # for MultiIndices we always use the very first level for bins as using multiple levels
        # doesn't affect the result
        bins.append(idx[-1][0] if isinstance(idx, pandas.MultiIndex) else idx[-1])
    old_bins_to_new[len(bins)] = offset

    if len(bins) == 0:
        # insert values to the first non-empty partition
        return {old_bins_to_new.get(0, 0): missing_values}, new_combined_cols

    # we used the very first level of MultiIndex to build bins, meaning that we also have
    # to use values of the first index's level for 'digitize'
    lvl_zero = (
        missing_values.index.levels[0]
        if isinstance(missing_values.index, pandas.MultiIndex)
        else missing_values.index
    )
    if pandas.api.types.is_any_real_numeric_dtype(lvl_zero):
        part_idx = np.digitize(lvl_zero, bins, right=True)
    else:
        part_idx = np.searchsorted(bins, lvl_zero)

    ### In the end we build a dictionary mapping partition index to a dataframe with missing categoricals
    ### to be inserted into this partition
    masks = {}
    if isinstance(total_index, pandas.MultiIndex):
        for idx, values in pandas.RangeIndex(len(lvl_zero)).groupby(part_idx).items():
            masks[idx] = missing_values[
                pandas.Index(missing_values.index.codes[0]).isin(values)
            ]
    else:
        frame_idx = missing_values.index.to_frame()
        for idx, values in lvl_zero.groupby(part_idx).items():
            masks[idx] = missing_values[frame_idx.iloc[:, 0].isin(values)]

    # Restore the original indexing by adding the amount of skipped missing partitions
    masks = {key + old_bins_to_new[key]: value for key, value in masks.items()}
    return masks, new_combined_cols


================================================
FILE: modin/core/dataframe/pandas/interchange/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe functionality related to data exchange protocols and optimized for pandas storage format."""


================================================
FILE: modin/core/dataframe/pandas/interchange/dataframe_protocol/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Base Modin Dataframe functionality related to the dataframe exchange protocol and optimized for pandas storage format.

See more in https://data-apis.org/dataframe-protocol/latest/index.html.
"""


================================================
FILE: modin/core/dataframe/pandas/interchange/dataframe_protocol/buffer.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Dataframe exchange protocol implementation.

See more in https://data-apis.org/dataframe-protocol/latest/index.html.

Notes
-----
- Interpreting a raw pointer (as in ``Buffer.ptr``) is annoying and unsafe to
  do in pure Python. It's more general but definitely less friendly than having
  ``to_arrow`` and ``to_numpy`` methods. So for the buffers which lack
  ``__dlpack__`` (e.g., because the column dtype isn't supported by DLPack),
  this is worth looking at again.
"""

import enum
from typing import Tuple

import numpy as np

from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
    ProtocolBuffer,
)
from modin.utils import _inherit_docstrings


@_inherit_docstrings(ProtocolBuffer)
class PandasProtocolBuffer(ProtocolBuffer):
    """
    Data in the buffer is guaranteed to be contiguous in memory.

    Note that there is no dtype attribute present, a buffer can be thought of
    as simply a block of memory. However, if the column that the buffer is
    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
    implemented, then that dtype information will be contained in the return
    value from ``__dlpack__``.

    This distinction is useful to support both (a) data exchange via DLPack on a
    buffer and (b) dtypes like variable-length strings which do not have a
    fixed number of bytes per element.

    Parameters
    ----------
    x : np.ndarray
        Data to be held by ``Buffer``.
    allow_copy : bool, default: True
        A keyword that defines whether or not the library is allowed
        to make a copy of the data. For example, copying data would be necessary
        if a library supports strided buffers, given that this protocol
        specifies contiguous buffers. Currently, if the flag is set to ``False``
        and a copy is needed, a ``RuntimeError`` will be raised.
    """

    def __init__(self, x: np.ndarray, allow_copy: bool = True) -> None:
        if not x.strides == (x.dtype.itemsize,):
            # The protocol does not support strided buffers, so a copy is
            # necessary. If that's not allowed, we need to raise an exception.
            if allow_copy:
                x = x.copy()
            else:
                raise RuntimeError(
                    "Exports cannot be zero-copy in the case "
                    + "of a non-contiguous buffer"
                )

        # Store the numpy array in which the data resides as a private
        # attribute, so we can use it to retrieve the public attributes
        self._x = x

    @property
    def bufsize(self) -> int:
        return self._x.size * self._x.dtype.itemsize

    @property
    def ptr(self) -> int:
        return self._x.__array_interface__["data"][0]

    def __dlpack__(self):
        raise NotImplementedError("__dlpack__")

    def __dlpack_device__(self) -> Tuple[enum.IntEnum, int]:
        class Device(enum.IntEnum):
            CPU = 1

        return (Device.CPU, None)

    def __repr__(self) -> str:
        """
        Return a string representation for a particular ``PandasProtocolBuffer``.

        Returns
        -------
        str
        """
        return (
            "Buffer("
            + str(
                {
                    "bufsize": self.bufsize,
                    "ptr": self.ptr,
                    "device": self.__dlpack_device__()[0].name,
                }
            )
            + ")"
        )


================================================
FILE: modin/core/dataframe/pandas/interchange/dataframe_protocol/column.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Dataframe exchange protocol implementation.

See more in https://data-apis.org/dataframe-protocol/latest/index.html.

Notes
-----
- Interpreting a raw pointer (as in ``Buffer.ptr``) is annoying and unsafe to
  do in pure Python. It's more general but definitely less friendly than having
  ``to_arrow`` and ``to_numpy`` methods. So for the buffers which lack
  ``__dlpack__`` (e.g., because the column dtype isn't supported by DLPack),
  this is worth looking at again.
"""

from __future__ import annotations

from functools import cached_property
from typing import Any, Dict, Iterable, Optional, Tuple

import numpy as np
import pandas

from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
    CategoricalDescription,
    ProtocolColumn,
)
from modin.core.dataframe.base.interchange.dataframe_protocol.utils import (
    ColumnNullType,
    DTypeKind,
    pandas_dtype_to_arrow_c,
)
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
from modin.utils import _inherit_docstrings

from .buffer import PandasProtocolBuffer
from .exception import NoOffsetsBuffer, NoValidityBuffer

_NO_VALIDITY_BUFFER = {
    ColumnNullType.NON_NULLABLE: "This column is non-nullable so does not have a mask",
    ColumnNullType.USE_NAN: "This column uses NaN as null so does not have a separate mask",
    ColumnNullType.USE_SENTINEL: "This column uses a sentinel value so does not have a mask",
}


@_inherit_docstrings(ProtocolColumn)
class PandasProtocolColumn(ProtocolColumn):
    """
    A column object, with only the methods and properties required by the interchange protocol defined.

    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length strings).

    TBD: Arrow has a separate "null" dtype, and has no separate mask concept.
         Instead, it seems to use "children" for both columns with a bit mask,
         and for nested dtypes. Unclear whether this is elegant or confusing.
         This design requires checking the null representation explicitly.
         The Arrow design requires checking:
         1. the ARROW_FLAG_NULLABLE (for sentinel values)
         2. if a column has two children, combined with one of those children
            having a null dtype.
         Making the mask concept explicit seems useful. One null dtype would
         not be enough to cover both bit and byte masks, so that would mean
         even more checking if we did it the Arrow way.
    TBD: there's also the "chunk" concept here, which is implicit in Arrow as
         multiple buffers per array (= column here). Semantically it may make
         sense to have both: chunks were meant for example for lazy evaluation
         of data which doesn't fit in memory, while multiple buffers per column
         could also come from doing a selection operation on a single
         contiguous buffer.
         Given these concepts, one would expect chunks to be all of the same
         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),
         while multiple buffers could have data-dependent lengths. Not an issue
         in pandas if one column is backed by a single NumPy array, but in
         Arrow it seems possible.
         Are multiple chunks *and* multiple buffers per column necessary for
         the purposes of this interchange protocol, or must producers either
         reuse the chunk concept for this or copy the data?

    Parameters
    ----------
    column : PandasDataframe
        A ``PandasDataframe`` object.
    allow_copy : bool, default: True
        A keyword that defines whether or not the library is allowed
        to make a copy of the data. For example, copying data would be necessary
        if a library supports strided buffers, given that this protocol
        specifies contiguous buffers. Currently, if the flag is set to ``False``
        and a copy is needed, a ``RuntimeError`` will be raised.

    Notes
    -----
    This Column object can only be produced by ``__dataframe__``,
    so doesn't need its own version or ``__column__`` protocol.
    """

    def __init__(self, column: PandasDataframe, allow_copy: bool = True) -> None:
        if not isinstance(column, PandasDataframe):
            raise NotImplementedError(f"Columns of type {type(column)} not handled yet")

        self._col = column
        self._allow_copy = allow_copy

    def size(self) -> int:
        return len(self._col.index)

    @property
    def offset(self) -> int:
        return 0

    @cached_property
    def dtype(self) -> Tuple[DTypeKind, int, str, str]:
        dtype = self._col.dtypes.iloc[0]

        if isinstance(dtype, pandas.CategoricalDtype):
            pandas_series = self._col.to_pandas().squeeze(axis=1)
            codes = pandas_series.values.codes
            (
                _,
                bitwidth,
                c_arrow_dtype_f_str,
                _,
            ) = self._dtype_from_primitive_pandas_dtype(codes.dtype)
            dtype_cache = (
                DTypeKind.CATEGORICAL,
                bitwidth,
                c_arrow_dtype_f_str,
                "=",
            )
        elif pandas.api.types.is_string_dtype(dtype):
            dtype_cache = (DTypeKind.STRING, 8, pandas_dtype_to_arrow_c(dtype), "=")
        else:
            dtype_cache = self._dtype_from_primitive_pandas_dtype(dtype)

        return dtype_cache

    def _dtype_from_primitive_pandas_dtype(
        self, dtype
    ) -> Tuple[DTypeKind, int, str, str]:
        """
        Deduce dtype specific for the protocol from pandas dtype.

        See `self.dtype` for details.

        Parameters
        ----------
        dtype : any
            A pandas dtype.

        Returns
        -------
        tuple
        """
        _np_kinds = {
            "i": DTypeKind.INT,
            "u": DTypeKind.UINT,
            "f": DTypeKind.FLOAT,
            "b": DTypeKind.BOOL,
            "M": DTypeKind.DATETIME,
        }
        kind = _np_kinds.get(dtype.kind, None)
        if kind is None:
            raise NotImplementedError(
                f"Data type {dtype} not supported by the dataframe exchange protocol"
            )
        return (
            kind,
            dtype.itemsize * 8,
            pandas_dtype_to_arrow_c(dtype),
            dtype.byteorder,
        )

    @property
    def describe_categorical(self) -> CategoricalDescription:
        if self.dtype[0] != DTypeKind.CATEGORICAL:
            raise TypeError(
                "`describe_categorical only works on a column with "
                + "categorical dtype!"
            )

        pandas_series = self._col.to_pandas().squeeze(axis=1)
        cat_frame = type(self._col).from_pandas(
            pandas.DataFrame({"cat": pandas_series.cat.categories})
        )
        return {
            "is_ordered": pandas_series.cat.ordered,
            "is_dictionary": True,
            "categories": PandasProtocolColumn(cat_frame, self._allow_copy),
        }

    @property
    def describe_null(self) -> Tuple[int, Any]:
        nulls = {
            DTypeKind.FLOAT: (ColumnNullType.USE_NAN, None),
            DTypeKind.DATETIME: (ColumnNullType.USE_NAN, None),
            DTypeKind.INT: (ColumnNullType.NON_NULLABLE, None),
            DTypeKind.UINT: (ColumnNullType.NON_NULLABLE, None),
            DTypeKind.BOOL: (ColumnNullType.NON_NULLABLE, None),
            # Null values for categoricals are stored as `-1` sentinel values
            # in the category date (e.g., `col.values.codes` is int8 np.ndarray)
            DTypeKind.CATEGORICAL: (ColumnNullType.USE_SENTINEL, -1),
            # follow Arrow in using 1 as valid value and 0 for missing/null value
            DTypeKind.STRING: (ColumnNullType.USE_BYTEMASK, 0),
        }

        kind = self.dtype[0]
        try:
            null, value = nulls[kind]
        except KeyError:
            raise NotImplementedError(f"Data type {kind} not yet supported")

        return null, value

    @cached_property
    def null_count(self) -> int:

        def map_func(df):
            return df.isna()

        def reduce_func(df):
            return pandas.DataFrame(df.sum())

        intermediate_df = self._col.tree_reduce(0, map_func, reduce_func)
        # Set ``pandas.RangeIndex(1)`` to index and column labels because
        # 1) We internally use `MODIN_UNNAMED_SERIES_LABEL` for labels of a reduced axis
        # 2) The return value of `reduce_func` is a pandas DataFrame with
        # index and column labels set to ``pandas.RangeIndex(1)``
        # 3) We further use `to_pandas().squeeze()` to get an integer value of the null count.
        # Otherwise, we get mismatching internal and external indices for both axes
        intermediate_df.index = pandas.RangeIndex(1)
        intermediate_df.columns = pandas.RangeIndex(1)
        return intermediate_df.to_pandas().squeeze(axis=1).item()

    @property
    def metadata(self) -> Dict[str, Any]:
        return {"modin.index": self._col.index}

    def num_chunks(self) -> int:
        return self._col._partitions.shape[0]

    def get_chunks(
        self, n_chunks: Optional[int] = None
    ) -> Iterable["PandasProtocolColumn"]:
        cur_n_chunks = self.num_chunks()
        n_rows = self.size()
        if n_chunks is None or n_chunks == cur_n_chunks:
            cum_row_lengths = np.cumsum([0] + self._col.row_lengths)
            for i in range(len(cum_row_lengths) - 1):
                yield PandasProtocolColumn(
                    self._col.take_2d_labels_or_positional(
                        row_positions=range(cum_row_lengths[i], cum_row_lengths[i + 1]),
                        col_positions=None,
                    ),
                    allow_copy=self._col._allow_copy,
                )
            return

        if n_chunks % cur_n_chunks != 0:
            raise RuntimeError(
                "The passed `n_chunks` must be a multiple of `self.num_chunks()`."
            )

        if n_chunks > n_rows:
            raise RuntimeError(
                "The passed `n_chunks` value is bigger than `self.num_rows()`."
            )

        chunksize = n_rows // n_chunks
        new_lengths = [chunksize] * n_chunks
        new_lengths[-1] = n_rows % n_chunks + new_lengths[-1]

        new_partitions = self._col._partition_mgr_cls.map_axis_partitions(
            0,
            self._col._partitions,
            lambda df: df,
            keep_partitioning=False,
            lengths=new_lengths,
        )
        new_df = self._col.__constructor__(
            new_partitions,
            self._col.index,
            self._col.columns,
            new_lengths,
            self._col.column_widths,
        )
        cum_row_lengths = np.cumsum([0] + new_df.row_lengths)
        for i in range(len(cum_row_lengths) - 1):
            yield PandasProtocolColumn(
                new_df.take_2d_labels_or_positional(
                    row_positions=range(cum_row_lengths[i], cum_row_lengths[i + 1]),
                    col_positions=None,
                ),
                allow_copy=self._allow_copy,
            )

    def get_buffers(self) -> Dict[str, Any]:
        buffers = {}
        buffers["data"] = self._get_data_buffer()
        try:
            buffers["validity"] = self._get_validity_buffer()
        except NoValidityBuffer:
            buffers["validity"] = None

        try:
            buffers["offsets"] = self._get_offsets_buffer()
        except NoOffsetsBuffer:
            buffers["offsets"] = None

        return buffers

    _data_buffer_cache = None

    def _get_data_buffer(
        self,
    ) -> Tuple[PandasProtocolBuffer, Any]:  # Any is for self.dtype tuple
        """
        Return the buffer containing the data and the buffer's associated dtype.

        Returns
        -------
        tuple
            The data buffer.
        """
        if self._data_buffer_cache is not None:
            return self._data_buffer_cache

        dtype = self.dtype
        if dtype[0] in (
            DTypeKind.INT,
            DTypeKind.UINT,
            DTypeKind.FLOAT,
            DTypeKind.BOOL,
            DTypeKind.DATETIME,
        ):
            buffer = PandasProtocolBuffer(
                self._col.to_numpy().flatten(), allow_copy=self._allow_copy
            )
        elif dtype[0] == DTypeKind.CATEGORICAL:
            pandas_series = self._col.to_pandas().squeeze(axis=1)
            codes = pandas_series.values.codes
            buffer = PandasProtocolBuffer(codes, allow_copy=self._allow_copy)
            dtype = self._dtype_from_primitive_pandas_dtype(codes.dtype)
        elif dtype[0] == DTypeKind.STRING:
            # Marshal the strings from a NumPy object array into a byte array
            buf = self._col.to_numpy().flatten()
            b = bytearray()

            # TODO: this for-loop is slow; can be implemented in Cython/C/C++ later
            for i in range(buf.size):
                if type(buf[i]) is str:
                    b.extend(buf[i].encode(encoding="utf-8"))

            # Convert the byte array to a pandas "buffer" using a NumPy array as the backing store
            buffer = PandasProtocolBuffer(np.frombuffer(b, dtype="uint8"))

            # Define the dtype for the returned buffer
            dtype = (
                DTypeKind.STRING,
                8,
                "u",
                "=",
            )  # note: currently only support native endianness
        else:
            raise NotImplementedError(f"Data type {self._col.dtype[0]} not handled yet")

        self._data_buffer_cache = (buffer, dtype)
        return self._data_buffer_cache

    _validity_buffer_cache = None

    def _get_validity_buffer(self) -> Tuple[PandasProtocolBuffer, Any]:
        """
        Get the validity buffer.

        The buffer contains the mask values indicating
        missing data and the buffer's associated dtype.

        Returns
        -------
        tuple
            The validity buffer.

        Raises
        ------
        ``NoValidityBuffer`` if null representation is not a bit or byte mask.
        """
        if self._validity_buffer_cache is not None:
            return self._validity_buffer_cache

        null, invalid = self.describe_null

        if self.dtype[0] == DTypeKind.STRING:
            # For now, have the mask array be comprised of bytes, rather than a bit array
            buf = self._col.to_numpy().flatten()

            # Determine the encoding for valid values
            valid = invalid == 0
            invalid = not valid

            mask = np.empty(shape=(len(buf),), dtype=np.bool_)
            for i, obj in enumerate(buf):
                mask[i] = valid if isinstance(obj, str) else invalid

            # Convert the mask array to a Pandas "buffer" using a NumPy array as the backing store
            buffer = PandasProtocolBuffer(mask)

            # Define the dtype of the returned buffer
            dtype = (DTypeKind.BOOL, 8, "b", "=")

            self._validity_buffer_cache = (buffer, dtype)
            return self._validity_buffer_cache

        try:
            msg = _NO_VALIDITY_BUFFER[null]
        except KeyError:
            raise NotImplementedError("See self.describe_null")

        raise NoValidityBuffer(msg)

    _offsets_buffer_cache = None

    def _get_offsets_buffer(self) -> Tuple[PandasProtocolBuffer, Any]:
        """
        Get the offsets buffer.

        The buffer contains the offset values for variable-size binary data
        (e.g., variable-length strings) and the buffer's associated dtype.

        Returns
        -------
        tuple
            The offsets buffer.

        Raises
        ------
        ``NoOffsetsBuffer`` if the data buffer does not have an associated offsets buffer.
        """
        if self._offsets_buffer_cache is not None:
            return self._offsets_buffer_cache

        if self.dtype[0] == DTypeKind.STRING:
            # For each string, we need to manually determine the next offset
            values = self._col.to_numpy().flatten()
            ptr = 0
            offsets = [ptr] + [None] * len(values)
            for i, v in enumerate(values):
                # For missing values (in this case, `np.nan` values), we don't increment the pointer)
                if type(v) is str:
                    b = v.encode(encoding="utf-8")
                    ptr += len(b)

                offsets[i + 1] = ptr

            # Convert the list of offsets to a NumPy array of signed 64-bit integers (note: Arrow allows the offsets array to be either `int32` or `int64`; here, we default to the latter)
            buf = np.asarray(offsets, dtype="int64")

            # Convert the offsets to a Pandas "buffer" using the NumPy array as the backing store
            buffer = PandasProtocolBuffer(buf)

            # Assemble the buffer dtype info
            dtype = (
                DTypeKind.INT,
                64,
                "l",
                "=",
            )  # note: currently only support native endianness
        else:
            raise NoOffsetsBuffer(
                "This column has a fixed-length dtype so does not have an offsets buffer"
            )

        self._offsets_buffer_cache = (buffer, dtype)
        return self._offsets_buffer_cache


================================================
FILE: modin/core/dataframe/pandas/interchange/dataframe_protocol/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Dataframe exchange protocol implementation.

See more in https://data-apis.org/dataframe-protocol/latest/index.html.

Notes
-----
- Interpreting a raw pointer (as in ``Buffer.ptr``) is annoying and unsafe to
  do in pure Python. It's more general but definitely less friendly than having
  ``to_arrow`` and ``to_numpy`` methods. So for the buffers which lack
  ``__dlpack__`` (e.g., because the column dtype isn't supported by DLPack),
  this is worth looking at again.
"""

import collections
from typing import Any, Dict, Iterable, Optional, Sequence

import numpy as np

from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
    ProtocolDataframe,
)
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
from modin.utils import _inherit_docstrings

from .column import PandasProtocolColumn


@_inherit_docstrings(ProtocolDataframe)
class PandasProtocolDataframe(ProtocolDataframe):
    """
    A data frame class, with only the methods required by the interchange protocol defined.

    Instances of this (private) class are returned from ``modin.pandas.DataFrame.__dataframe__``
    as objects with the methods and attributes defined on this class.

    A "data frame" represents an ordered collection of named columns.
    A column's "name" must be a unique string. Columns may be accessed by name or by position.
    This could be a public data frame class, or an object with the methods and
    attributes defined on this DataFrame class could be returned from the
    ``__dataframe__`` method of a public data frame class in a library adhering
    to the dataframe interchange protocol specification.

    Parameters
    ----------
    df : PandasDataframe
        A ``PandasDataframe`` object.
    nan_as_null : bool, default:False
        A keyword intended for the consumer to tell the producer
        to overwrite null values in the data with ``NaN`` (or ``NaT``).
        This currently has no effect; once support for nullable extension
        dtypes is added, this value should be propagated to columns.
    allow_copy : bool, default: True
        A keyword that defines whether or not the library is allowed
        to make a copy of the data. For example, copying data would be necessary
        if a library supports strided buffers, given that this protocol
        specifies contiguous buffers. Currently, if the flag is set to ``False``
        and a copy is needed, a ``RuntimeError`` will be raised.
    """

    def __init__(
        self,
        df: PandasDataframe,
        nan_as_null: bool = False,
        allow_copy: bool = True,
    ) -> None:
        self._df = df
        self._nan_as_null = nan_as_null
        self._allow_copy = allow_copy

    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
        return PandasProtocolDataframe(
            self._df, nan_as_null=nan_as_null, allow_copy=allow_copy
        )

    @property
    def metadata(self) -> Dict[str, Any]:
        return {"modin.index": self._df.index}

    def num_columns(self) -> int:
        return len(self._df.columns)

    def num_rows(self) -> int:
        return len(self._df.index)

    def num_chunks(self) -> int:
        return self._df._partitions.shape[0]

    def column_names(self) -> Iterable[str]:
        for col in self._df.columns:
            yield col

    def get_column(self, i: int) -> PandasProtocolColumn:
        return PandasProtocolColumn(
            self._df.take_2d_labels_or_positional(
                row_positions=None, col_positions=[i]
            ),
            allow_copy=self._allow_copy,
        )

    def get_column_by_name(self, name: str) -> PandasProtocolColumn:
        return PandasProtocolColumn(
            self._df.take_2d_labels_or_positional(
                row_positions=None, col_labels=[name]
            ),
            allow_copy=self._allow_copy,
        )

    def get_columns(self) -> Iterable[PandasProtocolColumn]:
        for name in self._df.columns:
            yield PandasProtocolColumn(
                self._df.take_2d_labels_or_positional(
                    row_positions=None, col_labels=[name]
                ),
                allow_copy=self._allow_copy,
            )

    def select_columns(self, indices: Sequence[int]) -> "PandasProtocolDataframe":
        if not isinstance(indices, collections.abc.Sequence):
            raise ValueError("`indices` is not a sequence")

        return PandasProtocolDataframe(
            self._df.take_2d_labels_or_positional(
                row_positions=None, col_positions=indices
            ),
            allow_copy=self._allow_copy,
        )

    def select_columns_by_name(self, names: Sequence[str]) -> "PandasProtocolDataframe":
        if not isinstance(names, collections.abc.Sequence):
            raise ValueError("`names` is not a sequence")

        return PandasProtocolDataframe(
            self._df.take_2d_labels_or_positional(row_positions=None, col_labels=names),
            allow_copy=self._allow_copy,
        )

    def get_chunks(
        self, n_chunks: Optional[int] = None
    ) -> Iterable["PandasProtocolDataframe"]:
        cur_n_chunks = self.num_chunks()
        n_rows = self.num_rows()
        if n_chunks is None or n_chunks == cur_n_chunks:
            cum_row_lengths = np.cumsum([0] + self._df.row_lengths)
            for i in range(len(cum_row_lengths) - 1):
                yield PandasProtocolDataframe(
                    self._df.take_2d_labels_or_positional(
                        row_positions=range(cum_row_lengths[i], cum_row_lengths[i + 1]),
                        col_positions=None,
                    ),
                    allow_copy=self._allow_copy,
                )
            return
        if n_chunks % cur_n_chunks != 0:
            raise RuntimeError(
                "The passed `n_chunks` must be a multiple of `self.num_chunks()`."
            )

        if n_chunks > n_rows:
            raise RuntimeError(
                "The passed `n_chunks` value is bigger than `self.num_rows()`."
            )

        chunksize = n_rows // n_chunks
        new_lengths = [chunksize] * n_chunks
        new_lengths[-1] = n_rows % n_chunks + new_lengths[-1]

        new_partitions = self._df._partition_mgr_cls.map_axis_partitions(
            0,
            self._df._partitions,
            lambda df: df,
            keep_partitioning=False,
            lengths=new_lengths,
        )
        new_df = self._df.__constructor__(
            new_partitions,
            self._df.index,
            self._df.columns,
            new_lengths,
            self._df.column_widths,
        )
        cum_row_lengths = np.cumsum([0] + new_df.row_lengths)
        for i in range(len(cum_row_lengths) - 1):
            yield PandasProtocolDataframe(
                new_df.take_2d_labels_or_positional(
                    row_positions=range(cum_row_lengths[i], cum_row_lengths[i + 1]),
                    col_positions=None,
                ),
                allow_copy=self._allow_copy,
            )


================================================
FILE: modin/core/dataframe/pandas/interchange/dataframe_protocol/exception.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Exceptions that can be caught by dataframe exchange protocol implementation for pandas storage format."""


class NoValidityBuffer(Exception):
    """Exception to be raised if there is no validity buffer for ``PandasProtocolColumn``."""

    pass


class NoOffsetsBuffer(Exception):
    """Exception to be raised if there is no offsets buffer for ``PandasProtocolColumn``."""

    pass


================================================
FILE: modin/core/dataframe/pandas/interchange/dataframe_protocol/from_dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses the functions building a ``pandas.DataFrame`` from a DataFrame exchange protocol object."""

import ctypes
import re
from typing import Any, Optional, Tuple, Union

import numpy as np
import pandas

from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
    ProtocolBuffer,
    ProtocolColumn,
    ProtocolDataframe,
)
from modin.core.dataframe.base.interchange.dataframe_protocol.utils import (
    ArrowCTypes,
    ColumnNullType,
    DTypeKind,
    Endianness,
)

np_types_map = {
    DTypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},
    DTypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64},
    DTypeKind.FLOAT: {32: np.float32, 64: np.float64},
    # Consider bitmask to be a uint8 dtype to parse the bits later
    DTypeKind.BOOL: {1: np.uint8, 8: bool},
}


def from_dataframe_to_pandas(df: ProtocolDataframe, n_chunks: Optional[int] = None):
    """
    Build a ``pandas.DataFrame`` from an object supporting the DataFrame exchange protocol, i.e. `__dataframe__` method.

    Parameters
    ----------
    df : ProtocolDataframe
        Object supporting the exchange protocol, i.e. `__dataframe__` method.
    n_chunks : int, optional
        Number of chunks to split `df`.

    Returns
    -------
    pandas.DataFrame
    """
    if not hasattr(df, "__dataframe__"):
        raise ValueError("`df` does not support __dataframe__")

    df = df.__dataframe__()
    if isinstance(df, dict):
        df = df["dataframe"]

    pandas_dfs = []
    for chunk in df.get_chunks(n_chunks):
        pandas_df = protocol_df_chunk_to_pandas(chunk)
        pandas_dfs.append(pandas_df)

    pandas_df = pandas.concat(pandas_dfs, axis=0, ignore_index=True)

    index_obj = df.metadata.get("modin.index", df.metadata.get("pandas.index", None))
    if index_obj is not None:
        pandas_df.index = index_obj

    return pandas_df


def protocol_df_chunk_to_pandas(df):
    """
    Convert exchange protocol chunk to ``pandas.DataFrame``.

    Parameters
    ----------
    df : ProtocolDataframe

    Returns
    -------
    pandas.DataFrame
    """
    # We need a dict of columns here, with each column being a NumPy array (at
    # least for now, deal with non-NumPy dtypes later).
    columns = dict()
    buffers = []  # hold on to buffers, keeps memory alive
    for name in df.column_names():
        if not isinstance(name, str):
            raise ValueError(f"Column {name} is not a string")
        if name in columns:
            raise ValueError(f"Column {name} is not unique")
        col = df.get_column_by_name(name)
        columns[name], buf = unpack_protocol_column(col)
        buffers.append(buf)

    pandas_df = pandas.DataFrame(columns)
    pandas_df._buffers = buffers
    return pandas_df


def unpack_protocol_column(
    col: ProtocolColumn,
) -> Tuple[Union[np.ndarray, pandas.Series], Any]:
    """
    Unpack an interchange protocol column to a pandas-ready column.

    Parameters
    ----------
    col : ProtocolColumn
        Column to unpack.

    Returns
    -------
    tuple
        Tuple of resulting column (either an ndarray or a series) and the object
        which keeps memory referenced by the column alive.
    """
    dtype = col.dtype[0]
    if dtype in (
        DTypeKind.INT,
        DTypeKind.UINT,
        DTypeKind.FLOAT,
        DTypeKind.BOOL,
    ):
        return primitive_column_to_ndarray(col)
    elif dtype == DTypeKind.CATEGORICAL:
        return categorical_column_to_series(col)
    elif dtype == DTypeKind.STRING:
        return string_column_to_ndarray(col)
    elif dtype == DTypeKind.DATETIME:
        return datetime_column_to_ndarray(col)
    else:
        raise NotImplementedError(f"Data type {dtype} not handled yet")


def primitive_column_to_ndarray(col: ProtocolColumn) -> Tuple[np.ndarray, Any]:
    """
    Convert a column holding one of the primitive dtypes (int, uint, float or bool) to a NumPy array.

    Parameters
    ----------
    col : ProtocolColumn

    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object that keeps the memory alive.
    """
    buffers = col.get_buffers()

    data_buff, data_dtype = buffers["data"]
    data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size())

    data = set_nulls(data, col, buffers["validity"])
    return data, buffers


def categorical_column_to_series(col: ProtocolColumn) -> Tuple[pandas.Series, Any]:
    """
    Convert a column holding categorical data to a pandas Series.

    Parameters
    ----------
    col : ProtocolColumn

    Returns
    -------
    tuple
        Tuple of pandas.Series holding the data and the memory owner object that keeps the memory alive.
    """
    cat_descr = col.describe_categorical
    ordered, is_dict, categories = (
        cat_descr["is_ordered"],
        cat_descr["is_dictionary"],
        cat_descr["categories"],
    )

    if not is_dict or categories is None:
        raise NotImplementedError("Non-dictionary categoricals not supported yet")

    buffers = col.get_buffers()

    codes_buff, codes_dtype = buffers["data"]
    codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size())

    # Doing module in order to not get ``IndexError`` for out-of-bounds sentinel values in `codes`
    cat_values, categories_buf = unpack_protocol_column(categories)
    values = cat_values[codes % len(cat_values)]

    cat = pandas.Categorical(values, categories=cat_values, ordered=ordered)
    data = pandas.Series(cat)

    data = set_nulls(data, col, buffers["validity"])
    return data, [buffers, categories_buf]


def _inverse_null_buf(buf: np.ndarray, null_kind: ColumnNullType) -> np.ndarray:
    """
    Inverse the boolean value of buffer storing either bit- or bytemask.

    Parameters
    ----------
    buf : np.ndarray
        Buffer to inverse the boolean value for.
    null_kind : {ColumnNullType.USE_BYTEMASK, ColumnNullType.USE_BITMASK}
        How to treat the buffer.

    Returns
    -------
    np.ndarray
        Logically inversed buffer.
    """
    if null_kind == ColumnNullType.USE_BITMASK:
        return ~buf
    assert (
        null_kind == ColumnNullType.USE_BYTEMASK
    ), f"Unexpected null kind: {null_kind}"
    # bytemasks use 0 for `False` and anything else for `True`, so convert to bool
    # by direct comparison instead of bitwise reversal like we do for bitmasks
    return buf == 0


def string_column_to_ndarray(col: ProtocolColumn) -> Tuple[np.ndarray, Any]:
    """
    Convert a column holding string data to a NumPy array.

    Parameters
    ----------
    col : ProtocolColumn

    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object that keeps the memory alive.
    """
    null_kind, sentinel_val = col.describe_null

    if null_kind not in (
        ColumnNullType.NON_NULLABLE,
        ColumnNullType.USE_BITMASK,
        ColumnNullType.USE_BYTEMASK,
    ):
        raise NotImplementedError(
            f"{null_kind} null kind is not yet supported for string columns."
        )

    buffers = col.get_buffers()

    # Retrieve the data buffer containing the UTF-8 code units
    data_buff, protocol_data_dtype = buffers["data"]
    # We're going to reinterpret the buffer as uint8, so making sure we can do it safely
    assert protocol_data_dtype[1] == 8  # bitwidth == 8
    assert protocol_data_dtype[2] == ArrowCTypes.STRING  # format_str == utf-8
    # Convert the buffers to NumPy arrays, in order to go from STRING to an equivalent ndarray,
    # we claim that the buffer is uint8 (i.e., a byte array)
    data_dtype = (
        DTypeKind.UINT,
        8,
        ArrowCTypes.UINT8,
        Endianness.NATIVE,
    )
    # Specify zero offset as we don't want to chunk the string data
    data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size())

    # Retrieve the offsets buffer containing the index offsets demarcating the beginning and end of each string
    offset_buff, offset_dtype = buffers["offsets"]
    # Offsets buffer contains start-stop positions of strings in the data buffer,
    # meaning that it has more elements than in the data buffer, do `col.size() + 1` here
    # to pass a proper offsets buffer size
    offsets = buffer_to_ndarray(
        offset_buff, offset_dtype, col.offset, length=col.size() + 1
    )

    null_pos = None
    if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
        valid_buff, valid_dtype = buffers["validity"]
        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())
        if sentinel_val == 0:
            null_pos = _inverse_null_buf(null_pos, null_kind)

    # Assemble the strings from the code units
    str_list = [None] * col.size()
    for i in range(col.size()):
        # Check for missing values
        if null_pos is not None and null_pos[i]:
            str_list[i] = np.nan
            continue

        # Extract a range of code units
        units = data[offsets[i] : offsets[i + 1]]

        # Convert the list of code units to bytes
        str_bytes = bytes(units)

        # Create the string
        string = str_bytes.decode(encoding="utf-8")

        # Add to our list of strings
        str_list[i] = string

    # Convert the string list to a NumPy array
    return np.asarray(str_list, dtype="object"), buffers


def datetime_column_to_ndarray(col: ProtocolColumn) -> Tuple[np.ndarray, Any]:
    """
    Convert a column holding DateTime data to a NumPy array.

    Parameters
    ----------
    col : ProtocolColumn

    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object that keeps the memory alive.
    """
    buffers = col.get_buffers()

    _, _, format_str, _ = col.dtype
    dbuf, dtype = buffers["data"]
    # Consider dtype being `uint` to get number of units passed since the 01.01.1970
    data = buffer_to_ndarray(
        dbuf,
        (
            DTypeKind.UINT,
            dtype[1],
            getattr(ArrowCTypes, f"UINT{dtype[1]}"),
            Endianness.NATIVE,
        ),
        col.offset,
        col.size(),
    )

    def parse_format_str(format_str, data):
        """Parse datetime `format_str` to interpret the `data`."""
        # timestamp 'ts{unit}:tz'
        timestamp_meta = re.match(r"ts([smun]):(.*)", format_str)
        if timestamp_meta:
            unit, tz = timestamp_meta.group(1), timestamp_meta.group(2)
            if tz != "":
                raise NotImplementedError("Timezones are not supported yet")
            if unit != "s":
                # the format string describes only a first letter of the unit, add one extra
                # letter to make the unit in numpy-style: 'm' -> 'ms', 'u' -> 'us', 'n' -> 'ns'
                unit += "s"
            data = data.astype(f"datetime64[{unit}]")
            return data

        # date 'td{Days/Ms}'
        date_meta = re.match(r"td([Dm])", format_str)
        if date_meta:
            unit = date_meta.group(1)
            if unit == "D":
                # NumPy doesn't support DAY unit, so converting days to seconds
                # (converting to uint64 to avoid overflow)
                data = (data.astype(np.uint64) * (24 * 60 * 60)).astype("datetime64[s]")
            elif unit == "m":
                data = data.astype("datetime64[ms]")
            else:
                raise NotImplementedError(f"Date unit is not supported: {unit}")
            return data

        raise NotImplementedError(f"DateTime kind is not supported: {format_str}")

    data = parse_format_str(format_str, data)
    data = set_nulls(data, col, buffers["validity"])
    return data, buffers


def buffer_to_ndarray(
    buffer: ProtocolBuffer,
    dtype: Tuple[DTypeKind, int, str, str],
    offset: int = 0,
    length: Optional[int] = None,
) -> np.ndarray:
    """
    Build a NumPy array from the passed buffer.

    Parameters
    ----------
    buffer : ProtocolBuffer
        Buffer to build a NumPy array from.
    dtype : tuple
        Data type of the buffer conforming protocol dtypes format.
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    length : int, optional
        If the buffer is a bit-mask, specifies a number of bits to read
        from the buffer. Has no effect otherwise.

    Returns
    -------
    np.ndarray

    Notes
    -----
    The returned array doesn't own the memory. A user of the function must keep the memory
    owner object alive as long as the returned NumPy array is being used.
    """
    kind, bit_width, _, _ = dtype

    column_dtype = np_types_map.get(kind, {}).get(bit_width, None)
    if column_dtype is None:
        raise NotImplementedError(f"Convertion for {dtype} is not yet supported.")

    # TODO: No DLPack yet, so need to construct a new ndarray from the data pointer
    # and size in the buffer plus the dtype on the column. Use DLPack as NumPy supports
    # it since https://github.com/numpy/numpy/pull/19083
    ctypes_type = np.ctypeslib.as_ctypes_type(column_dtype)
    data_pointer = ctypes.cast(
        buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type)
    )

    if bit_width == 1:
        assert length is not None, "`length` must be specified for a bit-mask buffer."
        arr = np.ctypeslib.as_array(data_pointer, shape=(buffer.bufsize,))
        return bitmask_to_bool_ndarray(arr, length, first_byte_offset=offset % 8)
    else:
        return np.ctypeslib.as_array(
            data_pointer, shape=(buffer.bufsize // (bit_width // 8),)
        )


def bitmask_to_bool_ndarray(
    bitmask: np.ndarray, mask_length: int, first_byte_offset: int = 0
) -> np.ndarray:
    """
    Convert bit-mask to a boolean NumPy array.

    Parameters
    ----------
    bitmask : np.ndarray[uint8]
        NumPy array of uint8 dtype representing the bitmask.
    mask_length : int
        Number of elements in the mask to interpret.
    first_byte_offset : int, default: 0
        Number of elements to offset from the start of the first byte.

    Returns
    -------
    np.ndarray[bool]
    """
    bytes_to_skip = first_byte_offset // 8
    bitmask = bitmask[bytes_to_skip:]
    first_byte_offset %= 8

    bool_mask = np.zeros(mask_length, dtype=bool)

    # Proccessing the first byte separately as it has its own offset
    val = bitmask[0]
    mask_idx = 0
    bits_in_first_byte = min(8 - first_byte_offset, mask_length)
    for j in range(bits_in_first_byte):
        if val & (1 << (j + first_byte_offset)):
            bool_mask[mask_idx] = True
        mask_idx += 1

    # `mask_length // 8` describes how many full bytes to process
    for i in range((mask_length - bits_in_first_byte) // 8):
        # doing `+ 1` as we already processed the first byte
        val = bitmask[i + 1]
        for j in range(8):
            if val & (1 << j):
                bool_mask[mask_idx] = True
            mask_idx += 1

    if len(bitmask) > 1:
        # Processing reminder of last byte
        val = bitmask[-1]
        for j in range(len(bool_mask) - mask_idx):
            if val & (1 << j):
                bool_mask[mask_idx] = True
            mask_idx += 1

    return bool_mask


def set_nulls(
    data: Union[np.ndarray, pandas.Series],
    col: ProtocolColumn,
    validity: Tuple[ProtocolBuffer, Tuple[DTypeKind, int, str, str]],
    allow_modify_inplace: bool = True,
):
    """
    Set null values for the data according to the column null kind.

    Parameters
    ----------
    data : np.ndarray or pandas.Series
        Data to set nulls in.
    col : ProtocolColumn
        Column object that describes the `data`.
    validity : tuple(ProtocolBuffer, dtype) or None
        The return value of ``col.buffers()``. We do not access the ``col.buffers()``
        here to not take the ownership of the memory of buffer objects.
    allow_modify_inplace : bool, default: True
        Whether to modify the `data` inplace when zero-copy is possible (True) or always
        modify a copy of the `data` (False).

    Returns
    -------
    np.ndarray or pandas.Series
        Data with the nulls being set.
    """
    null_kind, sentinel_val = col.describe_null
    null_pos = None

    if null_kind == ColumnNullType.USE_SENTINEL:
        null_pos = data == sentinel_val
    elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
        valid_buff, valid_dtype = validity
        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())
        if sentinel_val == 0:
            null_pos = _inverse_null_buf(null_pos, null_kind)
    elif null_kind in (ColumnNullType.NON_NULLABLE, ColumnNullType.USE_NAN):
        pass
    else:
        raise NotImplementedError(f"Null kind {null_kind} is not yet supported.")

    if null_pos is not None and np.any(null_pos):
        if not allow_modify_inplace:
            data = data.copy()
        try:
            data[null_pos] = None
        except TypeError:
            # TypeError happens if the `data` dtype appears to be non-nullable in numpy notation
            # (bool, int, uint), if such happens, cast the `data` to nullable float dtype.
            data = data.astype(float)
            data[null_pos] = None

    return data


================================================
FILE: modin/core/dataframe/pandas/metadata/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Utilities and classes to handle work with metadata."""

from .dtypes import (
    DtypesDescriptor,
    LazyProxyCategoricalDtype,
    ModinDtypes,
    extract_dtype,
)
from .index import ModinIndex

__all__ = [
    "ModinDtypes",
    "ModinIndex",
    "LazyProxyCategoricalDtype",
    "DtypesDescriptor",
    "extract_dtype",
]


================================================
FILE: modin/core/dataframe/pandas/metadata/dtypes.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module contains class ``ModinDtypes``."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Optional, Union

import pandas
from pandas._typing import DtypeObj, IndexLabel
from pandas.core.dtypes.cast import find_common_type

if TYPE_CHECKING:
    from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
    from .index import ModinIndex

from modin.error_message import ErrorMessage


class DtypesDescriptor:
    """
    Describes partial dtypes.

    Parameters
    ----------
    known_dtypes : dict[IndexLabel, DtypeObj] or pandas.Series, optional
        Columns that we know dtypes for.
    cols_with_unknown_dtypes : list[IndexLabel], optional
        Column names that have unknown dtypes. If specified together with `remaining_dtype`, must describe all
        columns with unknown dtypes, otherwise, the missing columns will be assigned to `remaining_dtype`.
        If `cols_with_unknown_dtypes` is incomplete, you must specify `know_all_names=False`.
    remaining_dtype : DtypeObj, optional
        Dtype for columns that are not present neither in `known_dtypes` nor in `cols_with_unknown_dtypes`.
        This parameter is intended to describe columns that we known dtypes for, but don't know their
        names yet. Note, that this parameter DOESN'T describe dtypes for columns from `cols_with_unknown_dtypes`.
    parent_df : PandasDataframe, optional
        Dataframe object for which we describe dtypes. This dataframe will be used to compute
        missing dtypes on ``.materialize()``.
    columns_order : dict[int, IndexLabel], optional
        Order of columns in the dataframe. If specified, must describe all the columns of the dataframe.
    know_all_names : bool, default: True
        Whether `known_dtypes` and `cols_with_unknown_dtypes` contain all column names for this dataframe besides those,
        that are being described by `remaining_dtype`.
        One can't pass `know_all_names=False` together with `remaining_dtype` as this creates ambiguity
        on how to interpret missing columns (whether they belong to `remaining_dtype` or not).
    _schema_is_known : bool, optional
        Whether `known_dtypes` describe all columns in the dataframe. This parameter intended mostly
        for internal use.
    """

    def __init__(
        self,
        known_dtypes: Optional[Union[dict[IndexLabel, DtypeObj], pandas.Series]] = None,
        cols_with_unknown_dtypes: Optional[list[IndexLabel]] = None,
        remaining_dtype: Optional[DtypeObj] = None,
        parent_df: Optional[PandasDataframe] = None,
        columns_order: Optional[dict[int, IndexLabel]] = None,
        know_all_names: bool = True,
        _schema_is_known: Optional[bool] = None,
    ):
        if not know_all_names and remaining_dtype is not None:
            raise ValueError(
                "It's not allowed to pass 'remaining_dtype' and 'know_all_names=False' at the same time."
            )
        # columns with known dtypes
        self._known_dtypes: dict[IndexLabel, DtypeObj] = (
            {} if known_dtypes is None else dict(known_dtypes)
        )
        if known_dtypes is not None and len(self._known_dtypes) != len(known_dtypes):
            raise NotImplementedError(
                "Duplicated column names are not yet supported by DtypesDescriptor"
            )
        # columns with unknown dtypes (they're not described by 'remaining_dtype')
        if cols_with_unknown_dtypes is not None and len(
            set(cols_with_unknown_dtypes)
        ) != len(cols_with_unknown_dtypes):
            raise NotImplementedError(
                "Duplicated column names are not yet supported by DtypesDescriptor"
            )
        self._cols_with_unknown_dtypes: list[IndexLabel] = (
            [] if cols_with_unknown_dtypes is None else cols_with_unknown_dtypes
        )
        # whether 'known_dtypes' describe all columns in the dataframe
        self._schema_is_known: Optional[bool] = _schema_is_known
        if self._schema_is_known is None:
            self._schema_is_known = False
            if (
                # if 'cols_with_unknown_dtypes' was explicitly specified as an empty list and
                # we don't have any 'remaining_dtype', then we assume that 'known_dtypes' are complete
                cols_with_unknown_dtypes is not None
                and know_all_names
                and remaining_dtype is None
                and len(self._known_dtypes) > 0
            ):
                self._schema_is_known = len(cols_with_unknown_dtypes) == 0

        self._know_all_names: bool = know_all_names
        # a common dtype for columns that are not present in 'known_dtypes' nor in 'cols_with_unknown_dtypes'
        self._remaining_dtype: Optional[DtypeObj] = remaining_dtype
        self._parent_df: Optional[PandasDataframe] = parent_df
        if columns_order is None:
            self._columns_order: Optional[dict[int, IndexLabel]] = None
            # try to compute '._columns_order' using 'parent_df'
            self.columns_order
        else:
            if remaining_dtype is not None:
                raise ValueError(
                    "Passing 'columns_order' and 'remaining_dtype' is ambiguous. You have to manually "
                    + "complete 'known_dtypes' using the information from 'columns_order' and 'remaining_dtype'."
                )
            elif not self._know_all_names:
                raise ValueError(
                    "Passing 'columns_order' and 'know_all_names=False' is ambiguous. You have to manually "
                    + "complete 'cols_with_unknown_dtypes' using the information from 'columns_order' "
                    + "and pass 'know_all_names=True'."
                )
            elif len(columns_order) != (
                len(self._cols_with_unknown_dtypes) + len(self._known_dtypes)
            ):
                raise ValueError(
                    "The length of 'columns_order' doesn't match to 'known_dtypes' and 'cols_with_unknown_dtypes'"
                )
            self._columns_order: Optional[dict[int, IndexLabel]] = columns_order

    def update_parent(self, new_parent: PandasDataframe):
        """
        Set new parent dataframe.

        Parameters
        ----------
        new_parent : PandasDataframe
        """
        self._parent_df = new_parent
        LazyProxyCategoricalDtype.update_dtypes(self._known_dtypes, new_parent)
        # try to compute '._columns_order' using 'new_parent'
        self.columns_order

    @property
    def columns_order(self) -> Optional[dict[int, IndexLabel]]:
        """
        Get order of columns for the described dataframe if available.

        Returns
        -------
        dict[int, IndexLabel] or None
        """
        if self._columns_order is not None:
            return self._columns_order
        if self._parent_df is None or not self._parent_df.has_materialized_columns:
            return None

        actual_columns = self._parent_df.columns
        self._normalize_self_levels(actual_columns)

        self._columns_order = {i: col for i, col in enumerate(actual_columns)}
        # we got information about new columns and thus can potentially
        # extend our knowledge about missing dtypes
        if len(self._columns_order) > (
            len(self._known_dtypes) + len(self._cols_with_unknown_dtypes)
        ):
            new_cols = [
                col
                for col in self._columns_order.values()
                if col not in self._known_dtypes
                and col not in self._cols_with_unknown_dtypes
            ]
            if self._remaining_dtype is not None:
                self._known_dtypes.update(
                    {col: self._remaining_dtype for col in new_cols}
                )
                self._remaining_dtype = None
                if len(self._cols_with_unknown_dtypes) == 0:
                    self._schema_is_known = True
            else:
                self._cols_with_unknown_dtypes.extend(new_cols)
        self._know_all_names = True
        return self._columns_order

    def __repr__(self):  # noqa: GL08
        return (
            f"DtypesDescriptor:\n\tknown dtypes: {self._known_dtypes};\n\t"
            + f"remaining dtype: {self._remaining_dtype};\n\t"
            + f"cols with unknown dtypes: {self._cols_with_unknown_dtypes};\n\t"
            + f"schema is known: {self._schema_is_known};\n\t"
            + f"has parent df: {self._parent_df is not None};\n\t"
            + f"columns order: {self._columns_order};\n\t"
            + f"know all names: {self._know_all_names}"
        )

    def __str__(self):  # noqa: GL08
        return self.__repr__()

    def lazy_get(
        self, ids: list[Union[IndexLabel, int]], numeric_index: bool = False
    ) -> DtypesDescriptor:
        """
        Get dtypes descriptor for a subset of columns without triggering any computations.

        Parameters
        ----------
        ids : list of index labels or positional indexers
            Columns for the subset.
        numeric_index : bool, default: False
            Whether `ids` are positional indixes or column labels to take.

        Returns
        -------
        DtypesDescriptor
            Descriptor that describes dtypes for columns specified in `ids`.
        """
        if len(set(ids)) != len(ids):
            raise NotImplementedError(
                "Duplicated column names are not yet supported by DtypesDescriptor"
            )

        if numeric_index:
            if self.columns_order is not None:
                ids = [self.columns_order[i] for i in ids]
            else:
                raise ValueError(
                    "Can't lazily get columns by positional indixers if the columns order is unknown"
                )

        result = {}
        unknown_cols = []
        columns_order = {}
        for i, col in enumerate(ids):
            columns_order[i] = col
            if col in self._cols_with_unknown_dtypes:
                unknown_cols.append(col)
                continue
            dtype = self._known_dtypes.get(col)
            if dtype is None and self._remaining_dtype is None:
                unknown_cols.append(col)
            elif dtype is None and self._remaining_dtype is not None:
                result[col] = self._remaining_dtype
            else:
                result[col] = dtype
        remaining_dtype = self._remaining_dtype if len(unknown_cols) != 0 else None
        return DtypesDescriptor(
            result,
            unknown_cols,
            remaining_dtype,
            self._parent_df,
            columns_order=columns_order,
        )

    def copy(self) -> DtypesDescriptor:
        """
        Get a copy of this descriptor.

        Returns
        -------
        DtypesDescriptor
        """
        return type(self)(
            # should access '.columns_order' first, as it may compute columns order
            # and complete the metadata for 'self'
            columns_order=(
                None if self.columns_order is None else self.columns_order.copy()
            ),
            known_dtypes=self._known_dtypes.copy(),
            cols_with_unknown_dtypes=self._cols_with_unknown_dtypes.copy(),
            remaining_dtype=self._remaining_dtype,
            parent_df=self._parent_df,
            know_all_names=self._know_all_names,
            _schema_is_known=self._schema_is_known,
        )

    def set_index(self, new_index: Union[pandas.Index, ModinIndex]) -> DtypesDescriptor:
        """
        Set new column names for this descriptor.

        Parameters
        ----------
        new_index : pandas.Index or ModinIndex

        Returns
        -------
        DtypesDescriptor
            New descriptor with updated column names.

        Notes
        -----
        Calling this method on a descriptor that returns ``None`` for ``.columns_order``
        will result into information lose.
        """
        if len(new_index) != len(set(new_index)):
            raise NotImplementedError(
                "Duplicated column names are not yet supported by DtypesDescriptor"
            )

        if self.columns_order is None:
            # we can't map new columns to old columns and lost all dtypes :(
            return DtypesDescriptor(
                cols_with_unknown_dtypes=new_index,
                columns_order={i: col for i, col in enumerate(new_index)},
                parent_df=self._parent_df,
                know_all_names=True,
            )

        new_self = self.copy()
        renamer = {old_c: new_index[i] for i, old_c in new_self.columns_order.items()}
        new_self._known_dtypes = {
            renamer[old_col]: value for old_col, value in new_self._known_dtypes.items()
        }
        new_self._cols_with_unknown_dtypes = [
            renamer[old_col] for old_col in new_self._cols_with_unknown_dtypes
        ]
        new_self._columns_order = {
            i: renamer[old_col] for i, old_col in new_self._columns_order.items()
        }
        return new_self

    def equals(self, other: DtypesDescriptor) -> bool:
        """
        Compare two descriptors for equality.

        Parameters
        ----------
        other : DtypesDescriptor

        Returns
        -------
        bool
        """
        return (
            self._known_dtypes == other._known_dtypes
            and set(self._cols_with_unknown_dtypes)
            == set(other._cols_with_unknown_dtypes)
            and self._remaining_dtype == other._remaining_dtype
            and self._schema_is_known == other._schema_is_known
            and self.columns_order == other.columns_order
            and self._know_all_names == other._know_all_names
        )

    @property
    def is_materialized(self) -> bool:
        """
        Whether this descriptor contains information about all dtypes in the dataframe.

        Returns
        -------
        bool
        """
        return self._schema_is_known

    def _materialize_all_names(self):
        """Materialize missing column names."""
        if self._know_all_names:
            return

        all_cols = self._parent_df.columns
        self._normalize_self_levels(all_cols)
        for col in all_cols:
            if (
                col not in self._known_dtypes
                and col not in self._cols_with_unknown_dtypes
            ):
                self._cols_with_unknown_dtypes.append(col)

        self._know_all_names = True

    def _materialize_cols_with_unknown_dtypes(self):
        """Compute dtypes for cols specified in `._cols_with_unknown_dtypes`."""
        if (
            len(self._known_dtypes) == 0
            and len(self._cols_with_unknown_dtypes) == 0
            and not self._know_all_names
        ):
            # here we have to compute dtypes for all columns in the dataframe,
            # so avoiding columns materialization by setting 'subset=None'
            subset = None
        else:
            if not self._know_all_names:
                self._materialize_all_names()
            subset = self._cols_with_unknown_dtypes

        if subset is None or len(subset) > 0:
            self._known_dtypes.update(self._parent_df._compute_dtypes(subset))

        self._know_all_names = True
        self._cols_with_unknown_dtypes = []

    def materialize(self):
        """Complete information about dtypes."""
        if self.is_materialized:
            return
        if self._parent_df is None:
            raise RuntimeError(
                "It's not allowed to call '.materialize()' before '._parent_df' is specified."
            )

        self._materialize_cols_with_unknown_dtypes()

        if self._remaining_dtype is not None:
            cols = self._parent_df.columns
            self._normalize_self_levels(cols)
            self._known_dtypes.update(
                {
                    col: self._remaining_dtype
                    for col in cols
                    if col not in self._known_dtypes
                }
            )

        # we currently not guarantee for dtypes to be in a proper order:
        # https://github.com/modin-project/modin/blob/8a332c1597c54d36f7ccbbd544e186b689f9ceb1/modin/pandas/test/utils.py#L644-L646
        # so restoring the order only if it's possible
        if self.columns_order is not None:
            assert len(self.columns_order) == len(self._known_dtypes)
            self._known_dtypes = {
                self.columns_order[i]: self._known_dtypes[self.columns_order[i]]
                for i in range(len(self.columns_order))
            }

        self._schema_is_known = True
        self._remaining_dtype = None
        self._parent_df = None

    def to_series(self) -> pandas.Series:
        """
        Convert descriptor to a pandas Series.

        Returns
        -------
        pandas.Series
        """
        self.materialize()
        return pandas.Series(self._known_dtypes)

    def get_dtypes_set(self) -> set[DtypeObj]:
        """
        Get a set of dtypes from the descriptor.

        Returns
        -------
        set[DtypeObj]
        """
        if len(self._cols_with_unknown_dtypes) > 0 or not self._know_all_names:
            self._materialize_cols_with_unknown_dtypes()
        known_dtypes: set[DtypeObj] = set(self._known_dtypes.values())
        if self._remaining_dtype is not None:
            known_dtypes.add(self._remaining_dtype)
        return known_dtypes

    @classmethod
    def _merge_dtypes(
        cls, values: list[Union[DtypesDescriptor, pandas.Series, None]]
    ) -> DtypesDescriptor:
        """
        Union columns described by ``values`` and compute common dtypes for them.

        Parameters
        ----------
        values : list of DtypesDescriptors, pandas.Series or Nones

        Returns
        -------
        DtypesDescriptor
        """
        known_dtypes = {}
        cols_with_unknown_dtypes = []
        know_all_names = True
        dtypes_are_unknown = False

        # index - joined column names, columns - dtypes taken from 'values'
        #        0     1     2      3
        #  col1  int   bool  float  int
        #  col2  int   int   int    int
        #  colN  bool  bool  bool   int
        dtypes_matrix = pandas.DataFrame()

        for i, val in enumerate(values):
            if isinstance(val, cls):
                know_all_names &= val._know_all_names
                dtypes = val._known_dtypes.copy()
                dtypes.update({col: "unknown" for col in val._cols_with_unknown_dtypes})
                if val._remaining_dtype is not None:
                    # we can't process remaining dtypes, so just discarding them
                    know_all_names = False

                # setting a custom name to the Series to prevent duplicated names
                # in the 'dtypes_matrix'
                series = pandas.Series(dtypes, name=i)
                dtypes_matrix = pandas.concat([dtypes_matrix, series], axis=1)
                if not (val._know_all_names and val._remaining_dtype is None):
                    dtypes_matrix.fillna(
                        value={
                            # If we encountered a 'NaN' while 'val' describes all the columns, then
                            # it means, that the missing columns for this instance will be filled with NaNs (floats),
                            # otherwise, it may indicate missing columns that this 'val' has no info about,
                            # meaning that we shouldn't try computing a new dtype for this column,
                            # so marking it as 'unknown'
                            i: "unknown",
                        },
                        inplace=True,
                    )
            elif isinstance(val, pandas.Series):
                dtypes_matrix = pandas.concat([dtypes_matrix, val], axis=1)
            elif val is None:
                # one of the 'dtypes' is None, meaning that we wouldn't been infer a valid result dtype,
                # however, we're continuing our loop so we would at least know the columns we're missing
                # dtypes for
                dtypes_are_unknown = True
                know_all_names = False
            else:
                raise NotImplementedError(type(val))

        if dtypes_are_unknown:
            return DtypesDescriptor(
                cols_with_unknown_dtypes=dtypes_matrix.index.tolist(),
                know_all_names=know_all_names,
            )

        def combine_dtypes(row):
            if (row == "unknown").any():
                return "unknown"
            row = row.fillna(pandas.api.types.pandas_dtype("float"))
            return find_common_type(list(row.values))

        dtypes = dtypes_matrix.apply(combine_dtypes, axis=1)

        for col, dtype in dtypes.items():
            if dtype == "unknown":
                cols_with_unknown_dtypes.append(col)
            else:
                known_dtypes[col] = dtype

        return DtypesDescriptor(
            known_dtypes,
            cols_with_unknown_dtypes,
            remaining_dtype=None,
            know_all_names=know_all_names,
        )

    @classmethod
    def concat(
        cls, values: list[Union[DtypesDescriptor, pandas.Series, None]], axis: int = 0
    ) -> DtypesDescriptor:
        """
        Concatenate dtypes descriptors into a single descriptor.

        Parameters
        ----------
        values : list of DtypesDescriptors and pandas.Series
        axis : int, default: 0
            If ``axis == 0``: concatenate column names. This implements the logic of
            how dtypes are combined on ``pd.concat([df1, df2], axis=1)``.
            If ``axis == 1``: perform a union join for the column names described by
            `values` and then find common dtypes for the columns appeared to be in
            an intersection. This implements the logic of how dtypes are combined on
            ``pd.concat([df1, df2], axis=0).dtypes``.

        Returns
        -------
        DtypesDescriptor
        """
        if axis == 1:
            return cls._merge_dtypes(values)
        known_dtypes = {}
        cols_with_unknown_dtypes = []
        schema_is_known = True
        # some default value to not mix it with 'None'
        remaining_dtype = "default"
        know_all_names = True

        for val in values:
            if isinstance(val, cls):
                all_new_cols = (
                    list(val._known_dtypes.keys()) + val._cols_with_unknown_dtypes
                )
                if any(
                    col in known_dtypes or col in cols_with_unknown_dtypes
                    for col in all_new_cols
                ):
                    raise NotImplementedError(
                        "Duplicated column names are not yet supported by DtypesDescriptor"
                    )
                know_all_names &= val._know_all_names
                known_dtypes.update(val._known_dtypes)
                cols_with_unknown_dtypes.extend(val._cols_with_unknown_dtypes)
                if know_all_names:
                    if (
                        remaining_dtype == "default"
                        and val._remaining_dtype is not None
                    ):
                        remaining_dtype = val._remaining_dtype
                    elif (
                        remaining_dtype != "default"
                        and val._remaining_dtype is not None
                        and remaining_dtype != val._remaining_dtype
                    ):
                        remaining_dtype = None
                        know_all_names = False
                else:
                    remaining_dtype = None
                schema_is_known &= val._schema_is_known
            elif isinstance(val, pandas.Series):
                if any(
                    col in known_dtypes or col in cols_with_unknown_dtypes
                    for col in val.index
                ):
                    raise NotImplementedError(
                        "Duplicated column names are not yet supported by DtypesDescriptor"
                    )
                known_dtypes.update(val)
            elif val is None:
                remaining_dtype = None
                schema_is_known = False
                know_all_names = False
            else:
                raise NotImplementedError(type(val))
        return cls(
            known_dtypes,
            cols_with_unknown_dtypes,
            None if remaining_dtype == "default" else remaining_dtype,
            parent_df=None,
            _schema_is_known=schema_is_known,
            know_all_names=know_all_names,
        )

    @staticmethod
    def _normalize_levels(columns, reference=None):
        """
        Normalize levels of MultiIndex column names.

        The function fills missing levels with empty strings as pandas do:
        '''
        >>> columns = ["a", ("l1", "l2"), ("l1a", "l2a", "l3a")]
        >>> _normalize_levels(columns)
        [("a", "", ""), ("l1", "l2", ""), ("l1a", "l2a", "l3a")]
        >>> # with a reference
        >>> idx = pandas.MultiIndex(...)
        >>> idx.nlevels
        4
        >>> _normalize_levels(columns, reference=idx)
        [("a", "", "", ""), ("l1", "l2", "", ""), ("l1a", "l2a", "l3a", "")]
        '''

        Parameters
        ----------
        columns : sequence
            Labels to normalize. If dictionary, will replace keys with normalized columns.
        reference : pandas.Index, optional
            An index to match the number of levels with. If reference is a MultiIndex, then the reference number
            of levels should not be greater than the maximum number of levels in `columns`. If not specified,
            the `columns` themselves become a `reference`.

        Returns
        -------
        sequence
            Column values with normalized levels.
        dict[hashable, hashable]
            Mapping from old column names to new names, only contains column names that
            were changed.

        Raises
        ------
        ValueError
            When the reference number of levels is greater than the maximum number of levels
            in `columns`.
        """
        if reference is None:
            reference = columns

        if isinstance(reference, pandas.Index):
            max_nlevels = reference.nlevels
        else:
            max_nlevels = 1
            for col in reference:
                if isinstance(col, tuple):
                    max_nlevels = max(max_nlevels, len(col))

        # if the reference is a regular flat index, then no actions are required (the result will be
        # a flat index containing tuples of different lengths, this behavior fully matches pandas).
        # Yes, this shortcut skips the 'if max_columns_nlevels > max_nlevels' below check on purpose.
        if max_nlevels == 1:
            return columns, {}

        max_columns_nlevels = 1
        for col in columns:
            if isinstance(col, tuple):
                max_columns_nlevels = max(max_columns_nlevels, len(col))

        if max_columns_nlevels > max_nlevels:
            raise ValueError(
                f"The reference number of levels is greater than the maximum number of levels in columns: {max_columns_nlevels} > {max_nlevels}"
            )

        new_columns = []
        old_to_new_mapping = {}
        for col in columns:
            old_col = col
            if not isinstance(col, tuple):
                col = (col,)
            col = col + ("",) * (max_nlevels - len(col))
            new_columns.append(col)
            if old_col != col:
                old_to_new_mapping[old_col] = col

        return new_columns, old_to_new_mapping

    def _normalize_self_levels(self, reference=None):
        """
        Call ``self._normalize_levels()`` for known and unknown dtypes of this object.

        Parameters
        ----------
        reference : pandas.Index, optional
        """
        _, old_to_new_mapping = self._normalize_levels(
            self._known_dtypes.keys(), reference
        )
        for old_col, new_col in old_to_new_mapping.items():
            value = self._known_dtypes.pop(old_col)
            self._known_dtypes[new_col] = value
        self._cols_with_unknown_dtypes, _ = self._normalize_levels(
            self._cols_with_unknown_dtypes, reference
        )


class ModinDtypes:
    """
    A class that hides the various implementations of the dtypes needed for optimization.

    Parameters
    ----------
    value : pandas.Series, callable, DtypesDescriptor or ModinDtypes, optional
    """

    def __init__(
        self,
        value: Optional[Union[Callable, pandas.Series, DtypesDescriptor, ModinDtypes]],
    ):
        if callable(value) or isinstance(value, pandas.Series):
            self._value = value
        elif isinstance(value, DtypesDescriptor):
            self._value = value.to_series() if value.is_materialized else value
        elif isinstance(value, type(self)):
            self._value = value.copy()._value
        elif isinstance(value, None):
            self._value = DtypesDescriptor()
        else:
            raise ValueError(f"ModinDtypes doesn't work with '{value}'")

    def __repr__(self):  # noqa: GL08
        return f"ModinDtypes:\n\tvalue type: {type(self._value)};\n\tvalue:\n\t{self._value}"

    def __str__(self):  # noqa: GL08
        return self.__repr__()

    @property
    def is_materialized(self) -> bool:
        """
        Check if the internal representation is materialized.

        Returns
        -------
        bool
        """
        return isinstance(self._value, pandas.Series)

    def get_dtypes_set(self) -> set[DtypeObj]:
        """
        Get a set of dtypes from the descriptor.

        Returns
        -------
        set[DtypeObj]
        """
        if isinstance(self._value, DtypesDescriptor):
            return self._value.get_dtypes_set()
        if not self.is_materialized:
            self.get()
        return set(self._value.values)

    def maybe_specify_new_frame_ref(self, new_parent: PandasDataframe) -> ModinDtypes:
        """
        Set a new parent for the stored value if needed.

        Parameters
        ----------
        new_parent : PandasDataframe

        Returns
        -------
        ModinDtypes
            A copy of ``ModinDtypes`` with updated parent.
        """
        new_self = self.copy()
        if new_self.is_materialized:
            LazyProxyCategoricalDtype.update_dtypes(new_self._value, new_parent)
            return new_self
        if isinstance(self._value, DtypesDescriptor):
            new_self._value.update_parent(new_parent)
            return new_self
        return new_self

    def lazy_get(self, ids: list, numeric_index: bool = False) -> ModinDtypes:
        """
        Get new ``ModinDtypes`` for a subset of columns without triggering any computations.

        Parameters
        ----------
        ids : list of index labels or positional indexers
            Columns for the subset.
        numeric_index : bool, default: False
            Whether `ids` are positional indixes or column labels to take.

        Returns
        -------
        ModinDtypes
            ``ModinDtypes`` that describes dtypes for columns specified in `ids`.
        """
        if isinstance(self._value, DtypesDescriptor):
            res = self._value.lazy_get(ids, numeric_index)
            return ModinDtypes(res)
        elif callable(self._value):
            new_self = self.copy()
            old_value = new_self._value
            new_self._value = lambda: (
                old_value().iloc[ids] if numeric_index else old_value()[ids]
            )
            return new_self
        ErrorMessage.catch_bugs_and_request_email(
            failure_condition=not self.is_materialized
        )
        return ModinDtypes(self._value.iloc[ids] if numeric_index else self._value[ids])

    @classmethod
    def concat(cls, values: list, axis: int = 0) -> ModinDtypes:
        """
        Concatenate dtypes.

        Parameters
        ----------
        values : list of DtypesDescriptors, pandas.Series, ModinDtypes and Nones
        axis : int, default: 0
            If ``axis == 0``: concatenate column names. This implements the logic of
            how dtypes are combined on ``pd.concat([df1, df2], axis=1)``.
            If ``axis == 1``: perform a union join for the column names described by
            `values` and then find common dtypes for the columns appeared to be in
            an intersection. This implements the logic of how dtypes are combined on
            ``pd.concat([df1, df2], axis=0).dtypes``.

        Returns
        -------
        ModinDtypes
        """
        preprocessed_vals = []
        for val in values:
            if isinstance(val, cls):
                val = val._value
            if isinstance(val, (DtypesDescriptor, pandas.Series)) or val is None:
                preprocessed_vals.append(val)
            else:
                raise NotImplementedError(type(val))

        try:
            desc = DtypesDescriptor.concat(preprocessed_vals, axis=axis)
        except NotImplementedError as e:
            # 'DtypesDescriptor' doesn't support duplicated labels, however, if all values are pandas Series,
            # we still can perform concatenation using pure pandas
            if (
                # 'pd.concat(axis=1)' fails on duplicated labels anyway, so doing this logic
                # only in case 'axis=0'
                axis == 0
                and "duplicated" not in e.args[0].lower()
                or not all(isinstance(val, pandas.Series) for val in values)
            ):
                raise e
            desc = pandas.concat(values)
        return ModinDtypes(desc)

    def set_index(self, new_index: Union[pandas.Index, ModinIndex]) -> ModinDtypes:
        """
        Set new column names for stored dtypes.

        Parameters
        ----------
        new_index : pandas.Index or ModinIndex

        Returns
        -------
        ModinDtypes
            New ``ModinDtypes`` with updated column names.
        """
        new_self = self.copy()
        if self.is_materialized:
            new_self._value.index = new_index
            return new_self
        elif callable(self._value):
            old_val = new_self._value
            new_self._value = lambda: old_val().set_axis(new_index)
            return new_self
        elif isinstance(new_self._value, DtypesDescriptor):
            new_self._value = new_self._value.set_index(new_index)
            return new_self
        else:
            raise NotImplementedError()

    def get(self) -> pandas.Series:
        """
        Get the materialized internal representation.

        Returns
        -------
        pandas.Series
        """
        if not self.is_materialized:
            if callable(self._value):
                self._value = self._value()
                if self._value is None:
                    self._value = pandas.Series([])
            elif isinstance(self._value, DtypesDescriptor):
                self._value = self._value.to_series()
            else:
                raise NotImplementedError(type(self._value))
        return self._value

    def __len__(self):
        """
        Redirect the 'len' request to the internal representation.

        Returns
        -------
        int

        Notes
        -----
        Executing this function materializes the data.
        """
        if not self.is_materialized:
            self.get()
        return len(self._value)

    def __reduce__(self):
        """
        Serialize an object of this class.

        Returns
        -------
        tuple

        Notes
        -----
        The default implementation generates a recursion error. In a short:
        during the construction of the object, `__getattr__` function is called, which
        is not intended to be used in situations where the object is not initialized.
        """
        return (self.__class__, (self._value,))

    def __getattr__(self, name):
        """
        Redirect access to non-existent attributes to the internal representation.

        This is necessary so that objects of this class in most cases mimic the behavior
        of the ``pandas.Series``. The main limitations of the current approach are type
        checking and the use of this object where pandas dtypes are supposed to be used.

        Parameters
        ----------
        name : str
            Attribute name.

        Returns
        -------
        object
            Attribute.

        Notes
        -----
        Executing this function materializes the data.
        """
        if not self.is_materialized:
            self.get()
        return self._value.__getattribute__(name)

    def copy(self) -> ModinDtypes:
        """
        Copy an object without materializing the internal representation.

        Returns
        -------
        ModinDtypes
        """
        idx_cache = self._value
        if not callable(idx_cache):
            idx_cache = idx_cache.copy()
        return ModinDtypes(idx_cache)

    def __getitem__(self, key):  # noqa: GL08
        if not self.is_materialized:
            self.get()
        return self._value.__getitem__(key)

    def __setitem__(self, key, item):  # noqa: GL08
        if not self.is_materialized:
            self.get()
        self._value.__setitem__(key, item)

    def __iter__(self):  # noqa: GL08
        if not self.is_materialized:
            self.get()
        return iter(self._value)

    def __contains__(self, key):  # noqa: GL08
        if not self.is_materialized:
            self.get()
        return key in self._value


class LazyProxyCategoricalDtype(pandas.CategoricalDtype):
    """
    A lazy proxy representing ``pandas.CategoricalDtype``.

    Parameters
    ----------
    categories : list-like, optional
    ordered : bool, default: False

    Notes
    -----
    Important note! One shouldn't use the class' constructor to instantiate a proxy instance,
    it's intended only for compatibility purposes! In order to create a new proxy instance
    use the appropriate class method `._build_proxy(...)`.
    """

    def __init__(self, categories=None, ordered=False):
        # These will be initialized later inside of the `._build_proxy()` method
        self._parent, self._column_name, self._categories_val, self._materializer = (
            None,
            None,
            None,
            None,
        )
        super().__init__(categories, ordered)

    @staticmethod
    def update_dtypes(dtypes, new_parent):
        """
        Update a parent for categorical proxies in a dtype object.

        Parameters
        ----------
        dtypes : dict-like
            A dict-like object describing dtypes. The method will walk through every dtype
            an update parents for categorical proxies inplace.
        new_parent : object
        """
        for key, value in dtypes.items():
            if isinstance(value, LazyProxyCategoricalDtype):
                dtypes[key] = value._update_proxy(new_parent, column_name=key)

    def _update_proxy(self, parent, column_name):
        """
        Create a new proxy, if either parent or column name are different.

        Parameters
        ----------
        parent : object
            Source object to extract categories on demand.
        column_name : str
            Column name of the categorical column in the source object.

        Returns
        -------
        pandas.CategoricalDtype or LazyProxyCategoricalDtype
        """
        if self._is_materialized:
            # The parent has been materialized, we don't need a proxy anymore.
            return pandas.CategoricalDtype(self.categories, ordered=self._ordered)
        elif parent is self._parent and column_name == self._column_name:
            return self
        else:
            return self._build_proxy(parent, column_name, self._materializer)

    @classmethod
    def _build_proxy(cls, parent, column_name, materializer, dtype=None):
        """
        Construct a lazy proxy.

        Parameters
        ----------
        parent : object
            Source object to extract categories on demand.
        column_name : str
            Column name of the categorical column in the source object.
        materializer : callable(parent, column_name) -> pandas.CategoricalDtype
            A function to call in order to extract categorical values.
        dtype : dtype, optional
            The categories dtype.

        Returns
        -------
        LazyProxyCategoricalDtype
        """
        result = cls()
        result._parent = parent
        result._column_name = column_name
        result._materializer = materializer
        result._dtype = dtype
        return result

    def _get_dtype(self):
        """
        Get the categories dtype.

        Returns
        -------
        dtype
        """
        if self._dtype is None:
            self._dtype = self.categories.dtype
        return self._dtype

    def __reduce__(self):
        """
        Serialize an object of this class.

        Returns
        -------
        tuple

        Notes
        -----
        This object is serialized into a ``pandas.CategoricalDtype`` as an actual proxy can't be
        properly serialized because of the references it stores for its potentially distributed parent.
        """
        return (pandas.CategoricalDtype, (self.categories, self.ordered))

    @property
    def _categories(self):
        """
        Get materialized categorical values.

        Returns
        -------
        pandas.Index
        """
        if not self._is_materialized:
            self._materialize_categories()
        return self._categories_val

    @_categories.setter
    def _categories(self, categories):
        """
        Set new categorical values.

        Parameters
        ----------
        categories : list-like
        """
        self._categories_val = categories
        self._parent = None  # The parent is not required any more
        self._materializer = None
        self._dtype = None

    @property
    def _is_materialized(self) -> bool:
        """
        Check whether categorical values were already materialized.

        Returns
        -------
        bool
        """
        return self._categories_val is not None

    def _materialize_categories(self):
        """Materialize actual categorical values."""
        ErrorMessage.catch_bugs_and_request_email(
            failure_condition=self._parent is None,
            extra_log="attempted to materialize categories with parent being 'None'",
        )
        categoricals = self._materializer(self._parent, self._column_name)
        self._categories = categoricals.categories
        self._ordered = categoricals.ordered


def get_categories_dtype(
    cdt: Union[LazyProxyCategoricalDtype, pandas.CategoricalDtype],
) -> DtypeObj:
    """
    Get the categories dtype.

    Parameters
    ----------
    cdt : LazyProxyCategoricalDtype or pandas.CategoricalDtype

    Returns
    -------
    dtype
    """
    return (
        cdt._get_dtype()
        if isinstance(cdt, LazyProxyCategoricalDtype)
        else cdt.categories.dtype
    )


def extract_dtype(value) -> DtypeObj | pandas.Series:
    """
    Extract dtype(s) from the passed `value`.

    Parameters
    ----------
    value : object

    Returns
    -------
    DtypeObj or pandas.Series of DtypeObj
    """
    try:
        dtype = pandas.api.types.pandas_dtype(value)
    except (TypeError, ValueError):
        dtype = pandas.Series(value).dtype

    return dtype


================================================
FILE: modin/core/dataframe/pandas/metadata/index.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module contains class ModinIndex."""

import uuid
from typing import Optional

import pandas
from pandas.core.dtypes.common import is_list_like
from pandas.core.indexes.api import ensure_index


class ModinIndex:
    """
    A class that hides the various implementations of the index needed for optimization.

    Parameters
    ----------
    value : sequence, PandasDataframe or callable() -> (pandas.Index, list of ints), optional
        If a sequence passed this will be considered as the index values.
        If a ``PandasDataframe`` passed then it will be used to lazily extract indices
        when required, note that the `axis` parameter must be passed in this case.
        If a callable passed then it's expected to return a pandas Index and a list of
        partition lengths along the index axis.
        If ``None`` was passed, the index will be considered an incomplete and will raise
        a ``RuntimeError`` on an attempt of materialization. To complete the index object
        you have to use ``.maybe_specify_new_frame_ref()`` method.

    axis : int, optional
        Specifies an axis the object represents, serves as an optional hint. This parameter
        must be passed in case value is a ``PandasDataframe``.
    dtypes : pandas.Series, optional
        Materialized dtypes of index levels.
    """

    def __init__(self, value=None, axis=None, dtypes: Optional[pandas.Series] = None):
        from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe

        self._is_default_callable = False
        self._axis = axis
        self._dtypes = dtypes

        if callable(value):
            self._value = value
        elif isinstance(value, PandasDataframe):
            assert axis is not None
            self._value = self._get_default_callable(value, axis)
            self._is_default_callable = True
        elif value is None:
            assert axis is not None
            self._value = value
        else:
            self._value = ensure_index(value)

        self._lengths_cache = None
        # index/lengths ID's for faster comparison between other ModinIndex objects,
        # these should be propagated to the copies of the index
        self._index_id = uuid.uuid4()
        self._lengths_id = uuid.uuid4()

    def maybe_get_dtypes(self) -> Optional[pandas.Series]:
        """
        Get index dtypes if available.

        Returns
        -------
        pandas.Series or None
        """
        if self._dtypes is not None:
            return self._dtypes
        if self.is_materialized:
            self._dtypes = (
                self._value.dtypes
                if isinstance(self._value, pandas.MultiIndex)
                else pandas.Series([self._value.dtype], index=[self._value.name])
            )
            return self._dtypes
        return None

    @staticmethod
    def _get_default_callable(dataframe_obj, axis):
        """
        Build a callable extracting index labels and partitions lengths for the specified axis.

        Parameters
        ----------
        dataframe_obj : PandasDataframe
        axis : int
            0 - extract indices, 1 - extract columns.

        Returns
        -------
        callable() -> tuple(pandas.Index, list[ints])
        """
        return lambda: dataframe_obj._compute_axis_labels_and_lengths(axis)

    def maybe_specify_new_frame_ref(self, value, axis) -> "ModinIndex":
        """
        Set a new reference for a frame used to lazily extract index labels if it's needed.

        The method sets a new reference only if the indices are not yet materialized and
        if a PandasDataframe was originally passed to construct this index (so the ModinIndex
        object holds a reference to it). The reason the reference should be updated is that
        we don't want to hold in memory those frames that are already not needed. Once the
        reference is updated, the old frame will be garbage collected if there are no
        more references to it.

        Parameters
        ----------
        value : PandasDataframe
            New dataframe to reference.
        axis : int
            Axis to extract labels from.

        Returns
        -------
        ModinIndex
            New ModinIndex with the reference updated.
        """
        if self._value is not None and (
            not callable(self._value) or not self._is_default_callable
        ):
            return self

        new_index = self.copy(copy_lengths=True)
        new_index._axis = axis
        new_index._value = self._get_default_callable(value, new_index._axis)
        # if the '._value' was 'None' initially, then the '_is_default_callable' flag was
        # also being set to 'False', since now the '._value' is a default callable,
        # so we want to ensure that the flag is set to 'True'
        new_index._is_default_callable = True
        return new_index

    @property
    def is_materialized(self) -> bool:
        """
        Check if the internal representation is materialized.

        Returns
        -------
        bool
        """
        return self.is_materialized_index(self)

    @classmethod
    def is_materialized_index(cls, index) -> bool:
        """
        Check if the passed object represents a materialized index.

        Parameters
        ----------
        index : object
            An object to check.

        Returns
        -------
        bool
        """
        # importing here to avoid circular import issue
        from modin.pandas.indexing import is_range_like

        if isinstance(index, cls):
            index = index._value
        return is_list_like(index) or is_range_like(index) or isinstance(index, slice)

    def get(self, return_lengths=False) -> pandas.Index:
        """
        Get the materialized internal representation.

        Parameters
        ----------
        return_lengths : bool, default: False
            In some cases, during the index calculation, it's possible to get
            the lengths of the partitions. This flag allows this data to be used
            for optimization.

        Returns
        -------
        pandas.Index
        """
        if not self.is_materialized:
            if callable(self._value):
                index, self._lengths_cache = self._value()
                self._value = ensure_index(index)
            elif self._value is None:
                raise RuntimeError(
                    "It's not allowed to call '.materialize()' before '._value' is specified."
                )
            else:
                raise NotImplementedError(type(self._value))
        if return_lengths:
            return self._value, self._lengths_cache
        else:
            return self._value

    def equals(self, other: "ModinIndex") -> bool:
        """
        Check equality of the index values.

        Parameters
        ----------
        other : ModinIndex

        Returns
        -------
        bool
            The result of the comparison.
        """
        if self._index_id == other._index_id:
            return True

        if not self.is_materialized:
            self.get()

        if not other.is_materialized:
            other.get()

        return self._value.equals(other._value)

    def compare_partition_lengths_if_possible(self, other: "ModinIndex"):
        """
        Compare the partition lengths cache for the index being stored if possible.

        The ``ModinIndex`` object may sometimes store the information about partition
        lengths along the axis the index belongs to. If both `self` and `other` have
        this information or it can be inferred from them, the method returns
        a boolean - the result of the comparison, otherwise it returns ``None``
        as an indication that the comparison cannot be made.

        Parameters
        ----------
        other : ModinIndex

        Returns
        -------
        bool or None
            The result of the comparison if both `self` and `other` contain
            the lengths data, ``None`` otherwise.
        """
        if self._lengths_id == other._lengths_id:
            return True

        can_extract_lengths_from_self = self._lengths_cache is not None or callable(
            self._value
        )
        can_extract_lengths_from_other = other._lengths_cache is not None or callable(
            other._value
        )
        if can_extract_lengths_from_self and can_extract_lengths_from_other:
            return self.get(return_lengths=True)[1] == other.get(return_lengths=True)[1]
        return None

    def __len__(self):
        """
        Redirect the 'len' request to the internal representation.

        Returns
        -------
        int

        Notes
        -----
        Executing this function materializes the data.
        """
        if not self.is_materialized:
            self.get()
        return len(self._value)

    def __reduce__(self):
        """
        Serialize an object of this class.

        Returns
        -------
        tuple

        Notes
        -----
        The default implementation generates a recursion error. In a short:
        during the construction of the object, `__getattr__` function is called, which
        is not intended to be used in situations where the object is not initialized.
        """
        return (
            self.__class__,
            (self._value, self._axis),
            {
                "_lengths_cache": self._lengths_cache,
                "_index_id": self._index_id,
                "_lengths_id": self._lengths_id,
                "_is_default_callable": self._is_default_callable,
            },
        )

    def __getitem__(self, key):
        """
        Get an index value at the position of `key`.

        Parameters
        ----------
        key : int

        Returns
        -------
        label
        """
        if not self.is_materialized:
            self.get()
        return self._value[key]

    def __getattr__(self, name):
        """
        Redirect access to non-existent attributes to the internal representation.

        This is necessary so that objects of this class in most cases mimic the behavior
        of the ``pandas.Index``. The main limitations of the current approach are type
        checking and the use of this object where pandas indexes are supposed to be used.

        Parameters
        ----------
        name : str
            Attribute name.

        Returns
        -------
        object
            Attribute.

        Notes
        -----
        Executing this function materializes the data.
        """
        if not self.is_materialized:
            self.get()
        return self._value.__getattribute__(name)

    def copy(self, copy_lengths=False) -> "ModinIndex":
        """
        Copy an object without materializing the internal representation.

        Parameters
        ----------
        copy_lengths : bool, default: False
            Whether to copy the stored partition lengths to the
            new index object.

        Returns
        -------
        ModinIndex
        """
        idx_cache = self._value
        if idx_cache is not None and not callable(idx_cache):
            idx_cache = idx_cache.copy()
        result = ModinIndex(idx_cache, axis=self._axis, dtypes=self._dtypes)
        result._index_id = self._index_id
        result._is_default_callable = self._is_default_callable
        if copy_lengths:
            result._lengths_cache = self._lengths_cache
            result._lengths_id = self._lengths_id
        return result


================================================
FILE: modin/core/dataframe/pandas/partitioning/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe classes related to its partitioning and optimized for pandas storage format."""


================================================
FILE: modin/core/dataframe/pandas/partitioning/axis_partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module defines base interface for an axis partition of a Modin DataFrame."""

import warnings

import numpy as np
import pandas

from modin.config import MinColumnPartitionSize, MinRowPartitionSize
from modin.core.dataframe.base.partitioning.axis_partition import (
    BaseDataframeAxisPartition,
)
from modin.core.storage_formats.pandas.utils import (
    generate_result_of_axis_func_pandas,
    split_result_of_axis_func_pandas,
)

from .partition import PandasDataframePartition


class PandasDataframeAxisPartition(BaseDataframeAxisPartition):
    """
    An abstract class is created to simplify and consolidate the code for axis partition that run pandas.

    Because much of the code is similar, this allows us to reuse this code.

    Parameters
    ----------
    list_of_partitions : Union[list, PandasDataframePartition]
        List of ``PandasDataframePartition`` and
        ``PandasDataframeAxisPartition`` objects, or a single
        ``PandasDataframePartition``.
    get_ip : bool, default: False
        Whether to get node IP addresses to conforming partitions or not.
    full_axis : bool, default: True
        Whether or not the axis partition encompasses the whole axis.
    call_queue : list, optional
        A list of tuples (callable, args, kwargs) that contains deferred calls.
    length : the future's type or int, optional
        Length, or reference to length, of wrapped ``pandas.DataFrame``.
    width : the future's type or int, optional
        Width, or reference to width, of wrapped ``pandas.DataFrame``.
    """

    def __init__(
        self,
        list_of_partitions,
        get_ip=False,
        full_axis=True,
        call_queue=None,
        length=None,
        width=None,
    ):
        if isinstance(list_of_partitions, PandasDataframePartition):
            list_of_partitions = [list_of_partitions]
        self.full_axis = full_axis
        self.call_queue = call_queue or []
        self._length_cache = length
        self._width_cache = width
        # Check that all axis partition axes are the same in `list_of_partitions`
        # We should never have mismatching axis in the current implementation. We add this
        # defensive assertion to ensure that undefined behavior does not happen.
        assert (
            len(
                set(
                    obj.axis
                    for obj in list_of_partitions
                    if isinstance(obj, PandasDataframeAxisPartition)
                )
            )
            <= 1
        )
        self._list_of_constituent_partitions = list_of_partitions
        # Defer computing _list_of_block_partitions because we might need to
        # drain call queues for that.
        self._list_of_block_partitions = None

    @property
    def list_of_blocks(self):
        """
        Get the list of physical partition objects that compose this partition.

        Returns
        -------
        list
            A list of physical partition objects (``ray.ObjectRef``, ``distributed.Future`` e.g.).
        """
        # Defer draining call queue (which is hidden in `partition.list_of_blocks` call) until we get the partitions.
        # TODO Look into draining call queue at the same time as the task
        return [
            partition.list_of_blocks[0] for partition in self.list_of_block_partitions
        ]

    @property
    def list_of_block_partitions(self) -> list:
        """
        Get the list of block partitions that compose this partition.

        Returns
        -------
        List
            A list of ``PandasDataframePartition``.
        """
        if self._list_of_block_partitions is not None:
            return self._list_of_block_partitions
        self._list_of_block_partitions = []
        # Extract block partitions from the block and axis partitions that
        # constitute this partition.
        for partition in self._list_of_constituent_partitions:
            if isinstance(partition, PandasDataframeAxisPartition):
                if partition.axis == self.axis:
                    # We are building an axis partition out of another
                    # axis partition `partition` that contains its own list
                    # of block partitions, partition.list_of_block_partitions.
                    # `partition` may have its own call queue, which has to be
                    # applied to the entire `partition` before we execute any
                    # further operations on its block parittions.
                    partition.drain_call_queue()
                    self._list_of_block_partitions.extend(
                        partition.list_of_block_partitions
                    )
                else:
                    # If this axis partition is made of axis partitions
                    # for the other axes, squeeze such partitions into a single
                    # block so that this partition only holds a one-dimensional
                    # list of blocks. We could change this implementation to
                    # hold a 2-d list of blocks, but that would complicate the
                    # code quite a bit.
                    self._list_of_block_partitions.append(
                        partition.force_materialization().list_of_block_partitions[0]
                    )
            else:
                self._list_of_block_partitions.append(partition)
        return self._list_of_block_partitions

    @classmethod
    def _get_drain_func(cls):  # noqa: GL08
        return PandasDataframeAxisPartition.drain

    def drain_call_queue(self, num_splits=None):
        """
        Execute all operations stored in this partition's call queue.

        Parameters
        ----------
        num_splits : int, default: None
            The number of times to split the result object.
        """
        if len(self.call_queue) == 0:
            # this implicitly calls `drain_call_queue` for block partitions,
            # which might have deferred call queues
            _ = self.list_of_blocks
            return
        call_queue = self.call_queue
        try:
            # Clearing the queue before calling `.apply()` so it won't try to drain it repeatedly
            self.call_queue = []
            drained = self.apply(
                self._get_drain_func(), num_splits=num_splits, call_queue=call_queue
            )
        except Exception:
            # Restoring the call queue in case of an exception as it most likely wasn't drained
            self.call_queue = call_queue
            raise
        if not isinstance(drained, list):
            drained = [drained]
        self._list_of_block_partitions = drained

    def force_materialization(self, get_ip=False):
        """
        Materialize partitions into a single partition.

        Parameters
        ----------
        get_ip : bool, default: False
            Whether to get node ip address to a single partition or not.

        Returns
        -------
        PandasDataframeAxisPartition
            An axis partition containing only a single materialized partition.
        """
        materialized = super().force_materialization(get_ip=get_ip)
        self._list_of_block_partitions = materialized.list_of_block_partitions
        return materialized

    def apply(
        self,
        func,
        *args,
        num_splits=None,
        other_axis_partition=None,
        maintain_partitioning=True,
        lengths=None,
        manual_partition=False,
        **kwargs,
    ):
        """
        Apply a function to this axis partition along full axis.

        Parameters
        ----------
        func : callable
            The function to apply.
        *args : iterable
            Positional arguments to pass to `func`.
        num_splits : int, default: None
            The number of times to split the result object.
        other_axis_partition : PandasDataframeAxisPartition, default: None
            Another `PandasDataframeAxisPartition` object to be applied
            to func. This is for operations that are between two data sets.
        maintain_partitioning : bool, default: True
            Whether to keep the partitioning in the same
            orientation as it was previously or not. This is important because we may be
            operating on an individual AxisPartition and not touching the rest.
            In this case, we have to return the partitioning to its previous
            orientation (the lengths will remain the same). This is ignored between
            two axis partitions.
        lengths : iterable, default: None
            The list of lengths to shuffle the object.
        manual_partition : bool, default: False
            If True, partition the result with `lengths`.
        **kwargs : dict
            Additional keywords arguments to be passed in `func`.

        Returns
        -------
        list
            A list of `PandasDataframePartition` objects.
        """
        if not self.full_axis:
            # If this is not a full axis partition, it already contains a subset of
            # the full axis, so we shouldn't split the result further.
            num_splits = 1
        if len(self.call_queue) > 0:
            self.drain_call_queue()

        if num_splits is None:
            num_splits = len(self.list_of_blocks)

        if other_axis_partition is not None:
            if not isinstance(other_axis_partition, list):
                other_axis_partition = [other_axis_partition]

            # (other_shape[i-1], other_shape[i]) will indicate slice
            # to restore i-1 axis partition
            other_shape = np.cumsum(
                [0] + [len(o.list_of_blocks) for o in other_axis_partition]
            )

            return self._wrap_partitions(
                self.deploy_func_between_two_axis_partitions(
                    self.axis,
                    func,
                    args,
                    kwargs,
                    num_splits,
                    len(self.list_of_blocks),
                    other_shape,
                    *tuple(
                        self.list_of_blocks
                        + [
                            part
                            for axis_partition in other_axis_partition
                            for part in axis_partition.list_of_blocks
                        ]
                    ),
                    min_block_size=(
                        MinRowPartitionSize.get()
                        if self.axis == 0
                        else MinColumnPartitionSize.get()
                    ),
                )
            )
        result = self._wrap_partitions(
            self.deploy_axis_func(
                self.axis,
                func,
                args,
                kwargs,
                num_splits,
                maintain_partitioning,
                *self.list_of_blocks,
                min_block_size=(
                    MinRowPartitionSize.get()
                    if self.axis == 0
                    else MinColumnPartitionSize.get()
                ),
                lengths=lengths,
                manual_partition=manual_partition,
            )
        )
        if self.full_axis:
            return result
        else:
            # If this is not a full axis partition, just take out the single split in the result.
            return result[0]

    def split(
        self, split_func, num_splits, f_args=None, f_kwargs=None, extract_metadata=False
    ):
        """
        Split axis partition into multiple partitions using the `split_func`.

        Parameters
        ----------
        split_func : callable(pandas.DataFrame) -> list[pandas.DataFrame]
            A function that takes partition's content and split it into multiple chunks.
        num_splits : int
            The number of splits the `split_func` return.
        f_args : iterable, optional
            Positional arguments to pass to the `split_func`.
        f_kwargs : dict, optional
            Keyword arguments to pass to the `split_func`.
        extract_metadata : bool, default: False
            Whether to return metadata (length, width, ip) of the result. Passing `False` may relax
            the load on object storage as the remote function would return X times fewer futures
            (where X is the number of metadata values). Passing `False` makes sense for temporary
            results where you know for sure that the metadata will never be requested.

        Returns
        -------
        list
            List of wrapped remote partition objects.
        """
        f_args = tuple() if f_args is None else f_args
        f_kwargs = {} if f_kwargs is None else f_kwargs
        return self._wrap_partitions(
            self.deploy_splitting_func(
                self.axis,
                split_func,
                f_args,
                f_kwargs,
                num_splits,
                *self.list_of_blocks,
                extract_metadata=extract_metadata,
            ),
            extract_metadata=extract_metadata,
        )

    @classmethod
    def deploy_splitting_func(
        cls,
        axis,
        split_func,
        f_args,
        f_kwargs,
        num_splits,
        *partitions,
        extract_metadata=False,
    ):
        """
        Deploy a splitting function along a full axis.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        split_func : callable(pandas.DataFrame) -> list[pandas.DataFrame]
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to `split_func`.
        f_kwargs : dict
            Keyword arguments to pass to `split_func`.
        num_splits : int
            The number of splits the `split_func` return.
        *partitions : iterable
            All partitions that make up the full axis (row or column).
        extract_metadata : bool, default: False
            Whether to return metadata (length, width, ip) of the result. Note that `True` value
            is not supported in `PandasDataframeAxisPartition` class.

        Returns
        -------
        list
            A list of pandas DataFrames.
        """
        dataframe = pandas.concat(list(partitions), axis=axis, copy=False)
        # to reduce peak memory consumption
        del partitions
        return split_func(dataframe, *f_args, **f_kwargs)

    @classmethod
    def deploy_axis_func(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        maintain_partitioning,
        *partitions,
        min_block_size,
        lengths=None,
        manual_partition=False,
        return_generator=False,
    ):
        """
        Deploy a function along a full axis.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        func : callable
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to ``func``.
        f_kwargs : dict
            Keyword arguments to pass to ``func``.
        num_splits : int
            The number of splits to return (see `split_result_of_axis_func_pandas`).
        maintain_partitioning : bool
            If True, keep the old partitioning if possible.
            If False, create a new partition layout.
        *partitions : iterable
            All partitions that make up the full axis (row or column).
        min_block_size : int
            Minimum number of rows/columns in a single split.
        lengths : list, optional
            The list of lengths to shuffle the object.
        manual_partition : bool, default: False
            If True, partition the result with `lengths`.
        return_generator : bool, default: False
            Return a generator from the function, set to `True` for Ray backend
            as Ray remote functions can return Generators.

        Returns
        -------
        list | Generator
            A list or generator of pandas DataFrames.
        """
        len_partitions = len(partitions)
        lengths_partitions = [len(part) for part in partitions]
        widths_partitions = [len(part.columns) for part in partitions]

        dataframe = pandas.concat(list(partitions), axis=axis, copy=False)

        # to reduce peak memory consumption
        del partitions

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            try:
                result = func(dataframe, *f_args, **f_kwargs)
            except ValueError as err:
                if "assignment destination is read-only" in str(err):
                    result = func(dataframe.copy(), *f_args, **f_kwargs)
                else:
                    raise err

        # to reduce peak memory consumption
        del dataframe

        if num_splits == 1:
            # If we're not going to split the result, we don't need to specify
            # split lengths.
            lengths = None
        elif manual_partition:
            # The split function is expecting a list
            lengths = list(lengths)
        # We set lengths to None so we don't use the old lengths for the resulting partition
        # layout. This is done if the number of splits is changing or we are told not to
        # keep the old partitioning.
        elif num_splits != len_partitions or not maintain_partitioning:
            lengths = None
        else:
            if axis == 0:
                lengths = lengths_partitions
                if sum(lengths) != len(result):
                    lengths = None
            else:
                lengths = widths_partitions
                if sum(lengths) != len(result.columns):
                    lengths = None
        if return_generator:
            return generate_result_of_axis_func_pandas(
                axis,
                num_splits,
                result,
                min_block_size,
                lengths,
            )
        else:
            return split_result_of_axis_func_pandas(
                axis, num_splits, result, min_block_size, lengths
            )

    @classmethod
    def deploy_func_between_two_axis_partitions(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        len_of_left,
        other_shape,
        *partitions,
        min_block_size,
        return_generator=False,
    ):
        """
        Deploy a function along a full axis between two data sets.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        func : callable
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to ``func``.
        f_kwargs : dict
            Keyword arguments to pass to ``func``.
        num_splits : int
            The number of splits to return (see `split_result_of_axis_func_pandas`).
        len_of_left : int
            The number of values in `partitions` that belong to the left data set.
        other_shape : np.ndarray
            The shape of right frame in terms of partitions, i.e.
            (other_shape[i-1], other_shape[i]) will indicate slice to restore i-1 axis partition.
        *partitions : iterable
            All partitions that make up the full axis (row or column) for both data sets.
        min_block_size : int
            Minimum number of rows/columns in a single split.
        return_generator : bool, default: False
            Return a generator from the function, set to `True` for Ray backend
            as Ray remote functions can return Generators.

        Returns
        -------
        list | Generator
            A list or generator of pandas DataFrames.
        """
        lt_frame = pandas.concat(partitions[:len_of_left], axis=axis, copy=False)

        rt_parts = partitions[len_of_left:]

        # to reduce peak memory consumption
        del partitions

        # reshaping flattened `rt_parts` array into a frame with shape `other_shape`
        combined_axis = [
            pandas.concat(
                rt_parts[other_shape[i - 1] : other_shape[i]],
                axis=axis,
                copy=False,
            )
            for i in range(1, len(other_shape))
        ]

        # to reduce peak memory consumption
        del rt_parts

        rt_frame = pandas.concat(combined_axis, axis=axis ^ 1, copy=False)

        # to reduce peak memory consumption
        del combined_axis

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            result = func(lt_frame, rt_frame, *f_args, **f_kwargs)

        # to reduce peak memory consumption
        del lt_frame, rt_frame

        if return_generator:
            return generate_result_of_axis_func_pandas(
                axis,
                num_splits,
                result,
                min_block_size,
            )
        else:
            return split_result_of_axis_func_pandas(
                axis,
                num_splits,
                result,
                min_block_size,
            )

    @classmethod
    def drain(cls, df: pandas.DataFrame, call_queue: list):
        """
        Execute all operations stored in the call queue on the pandas object (helper function).

        Parameters
        ----------
        df : pandas.DataFrame
        call_queue : list
            Call queue that needs to be executed on pandas DataFrame.

        Returns
        -------
        pandas.DataFrame
        """
        for func, args, kwargs in call_queue:
            df = func(df, *args, **kwargs)
        return df

    def mask(self, row_indices, col_indices):
        """
        Create (synchronously) a mask that extracts the indices provided.

        Parameters
        ----------
        row_indices : list-like, slice or label
            The row labels for the rows to extract.
        col_indices : list-like, slice or label
            The column labels for the columns to extract.

        Returns
        -------
        PandasDataframeAxisPartition
            A new ``PandasDataframeAxisPartition`` object, materialized.
        """
        return (
            self.force_materialization()
            .list_of_block_partitions[0]
            .mask(row_indices, col_indices)
        )

    def to_pandas(self):
        """
        Convert the data in this partition to a ``pandas.DataFrame``.

        Returns
        -------
        pandas DataFrame.
        """
        return self.force_materialization().list_of_block_partitions[0].to_pandas()

    def to_numpy(self):
        """
        Convert the data in this partition to a ``numpy.array``.

        Returns
        -------
        NumPy array.
        """
        return self.force_materialization().list_of_block_partitions[0].to_numpy()

    _length_cache = None

    def length(self, materialize=True):
        """
        Get the length of this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int
            The length of the partition.
        """
        if self._length_cache is None:
            if self.axis == 0:
                self._length_cache = sum(
                    obj.length() for obj in self.list_of_block_partitions
                )
            else:
                self._length_cache = self.list_of_block_partitions[0].length(
                    materialize
                )
        return self._length_cache

    _width_cache = None

    def width(self, materialize=True):
        """
        Get the width of this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int
            The width of the partition.
        """
        if self._width_cache is None:
            if self.axis == 1:
                self._width_cache = sum(
                    obj.width() for obj in self.list_of_block_partitions
                )
            else:
                self._width_cache = self.list_of_block_partitions[0].width(materialize)
        return self._width_cache

    def wait(self):
        """Wait completing computations on the object wrapped by the partition."""
        pass

    def add_to_apply_calls(self, func, *args, length=None, width=None, **kwargs):
        """
        Add a function to the call queue.

        Parameters
        ----------
        func : callable or a future type
            Function to be added to the call queue.
        *args : iterable
            Additional positional arguments to be passed in `func`.
        length : A future type or int, optional
            Length, or reference to it, of wrapped ``pandas.DataFrame``.
        width : A future type or int, optional
            Width, or reference to it, of wrapped ``pandas.DataFrame``.
        **kwargs : dict
            Additional keyword arguments to be passed in `func`.

        Returns
        -------
        PandasDataframeAxisPartition
            A new ``PandasDataframeAxisPartition`` object.
        """
        return type(self)(
            self.list_of_block_partitions,
            full_axis=self.full_axis,
            call_queue=self.call_queue + [[func, args, kwargs]],
            length=length,
            width=width,
        )


================================================
FILE: modin/core/dataframe/pandas/partitioning/partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module defines base interface for a partition of a Modin DataFrame."""

from __future__ import annotations

import logging
import uuid
from abc import ABC
from copy import copy
from functools import cached_property

import pandas
from pandas.api.types import is_scalar

from modin.core.storage_formats.pandas.utils import length_fn_pandas, width_fn_pandas
from modin.logging import ClassLogger, get_logger
from modin.logging.config import LogLevel
from modin.pandas.indexing import compute_sliced_len


class PandasDataframePartition(
    ABC, ClassLogger, modin_layer="BLOCK-PARTITION", log_level=LogLevel.DEBUG
):  # pragma: no cover
    """
    An abstract class that is base for any partition class of ``pandas`` storage format.

    The class providing an API that has to be overridden by child classes.
    """

    _length_cache = None
    _width_cache = None
    _identity_cache = None
    _data = None
    execution_wrapper = None

    # these variables are intentionally initialized at runtime
    # so as not to initialize the engine during import
    _iloc_func = None

    def __init__(self):
        if type(self)._iloc_func is None:
            # Places `_iloc` function into the storage to speed up
            # remote function calls and caches the result.
            # It also postpones engine initialization, which happens
            # implicitly when `execution_wrapper.put` is called.
            if self.execution_wrapper is not None:
                type(self)._iloc_func = staticmethod(
                    self.execution_wrapper.put(self._iloc)
                )
            else:
                type(self)._iloc_func = staticmethod(self._iloc)

    @cached_property
    def __constructor__(self) -> type[PandasDataframePartition]:
        """
        Create a new instance of this object.

        Returns
        -------
        PandasDataframePartition
            New instance of pandas partition.
        """
        return type(self)

    def get(self):
        """
        Get the object wrapped by this partition.

        Returns
        -------
        object
            The object that was wrapped by this partition.

        Notes
        -----
        This is the opposite of the classmethod `put`.
        E.g. if you assign `x = PandasDataframePartition.put(1)`, `x.get()` should
        always return 1.
        """
        log = get_logger()
        self._is_debug(log) and log.debug(f"ENTER::Partition.get::{self._identity}")
        self.drain_call_queue()
        result = self.execution_wrapper.materialize(self._data)
        self._is_debug(log) and log.debug(f"EXIT::Partition.get::{self._identity}")
        return result

    @property
    def list_of_blocks(self):
        """
        Get the list of physical partition objects that compose this partition.

        Returns
        -------
        list
            A list of physical partition objects (``ray.ObjectRef``, ``distributed.Future`` e.g.).
        """
        # Defer draining call queue until we get the partitions.
        # TODO Look into draining call queue at the same time as the task
        self.drain_call_queue()
        return [self._data]

    def apply(self, func, *args, **kwargs):
        """
        Apply a function to the object wrapped by this partition.

        Parameters
        ----------
        func : callable
            Function to apply.
        *args : iterable
            Additional positional arguments to be passed in `func`.
        **kwargs : dict
            Additional keyword arguments to be passed in `func`.

        Returns
        -------
        PandasDataframePartition
            New `PandasDataframePartition` object.

        Notes
        -----
        It is up to the implementation how `kwargs` are handled. They are
        an important part of many implementations. As of right now, they
        are not serialized.
        """
        pass

    def add_to_apply_calls(self, func, *args, length=None, width=None, **kwargs):
        """
        Add a function to the call queue.

        Parameters
        ----------
        func : callable
            Function to be added to the call queue.
        *args : iterable
            Additional positional arguments to be passed in `func`.
        length : reference or int, optional
            Length, or reference to length, of wrapped ``pandas.DataFrame``.
        width : reference or int, optional
            Width, or reference to width, of wrapped ``pandas.DataFrame``.
        **kwargs : dict
            Additional keyword arguments to be passed in `func`.

        Returns
        -------
        PandasDataframePartition
            New `PandasDataframePartition` object with the function added to the call queue.

        Notes
        -----
        This function will be executed when `apply` is called. It will be executed
        in the order inserted; apply's func operates the last and return.
        """
        return self.__constructor__(
            self._data,
            call_queue=self.call_queue + [[func, args, kwargs]],
            length=length,
            width=width,
        )

    def drain_call_queue(self):
        """Execute all operations stored in the call queue on the object wrapped by this partition."""
        pass

    def wait(self):
        """Wait for completion of computations on the object wrapped by the partition."""
        pass

    def to_pandas(self):
        """
        Convert the object wrapped by this partition to a ``pandas.DataFrame``.

        Returns
        -------
        pandas.DataFrame

        Notes
        -----
        If the underlying object is a pandas DataFrame, this will likely
        only need to call `get`.
        """
        dataframe = self.get()
        assert isinstance(dataframe, (pandas.DataFrame, pandas.Series))
        return dataframe

    def to_numpy(self, **kwargs):
        """
        Convert the object wrapped by this partition to a NumPy array.

        Parameters
        ----------
        **kwargs : dict
            Additional keyword arguments to be passed in ``to_numpy``.

        Returns
        -------
        np.ndarray

        Notes
        -----
        If the underlying object is a pandas DataFrame, this will return
        a 2D NumPy array.
        """
        return self.apply(lambda df: df.to_numpy(**kwargs)).get()

    @staticmethod
    def _iloc(df, row_labels, col_labels):  # noqa: RT01, PR01
        """Perform `iloc` on dataframes wrapped in partitions (helper function)."""
        return df.iloc[row_labels, col_labels]

    def mask(self, row_labels, col_labels):
        """
        Lazily create a mask that extracts the indices provided.

        Parameters
        ----------
        row_labels : list-like, slice or label
            The row labels for the rows to extract.
        col_labels : list-like, slice or label
            The column labels for the columns to extract.

        Returns
        -------
        PandasDataframePartition
            New `PandasDataframePartition` object.
        """

        def is_full_axis_mask(index, axis_length):
            """Check whether `index` mask grabs `axis_length` amount of elements."""
            if isinstance(index, slice):
                return index == slice(None) or (
                    isinstance(axis_length, int)
                    and compute_sliced_len(index, axis_length) == axis_length
                )
            return (
                hasattr(index, "__len__")
                and isinstance(axis_length, int)
                and len(index) == axis_length
            )

        row_labels = [row_labels] if is_scalar(row_labels) else row_labels
        col_labels = [col_labels] if is_scalar(col_labels) else col_labels

        if is_full_axis_mask(row_labels, self._length_cache) and is_full_axis_mask(
            col_labels, self._width_cache
        ):
            return copy(self)

        new_obj = self.add_to_apply_calls(self._iloc_func, row_labels, col_labels)

        def try_recompute_cache(indices, previous_cache):
            """Compute new axis-length cache for the masked frame based on its previous cache."""
            if not isinstance(indices, slice):
                return len(indices)
            if not isinstance(previous_cache, int):
                return None
            return compute_sliced_len(indices, previous_cache)

        new_obj._length_cache = try_recompute_cache(row_labels, self._length_cache)
        new_obj._width_cache = try_recompute_cache(col_labels, self._width_cache)
        return new_obj

    @classmethod
    def put(cls, obj):
        """
        Put an object into a store and wrap it with partition object.

        Parameters
        ----------
        obj : object
            An object to be put.

        Returns
        -------
        PandasDataframePartition
            New `PandasDataframePartition` object.
        """
        pass

    @classmethod
    def preprocess_func(cls, func):
        """
        Preprocess a function before an `apply` call.

        Parameters
        ----------
        func : callable
            Function to preprocess.

        Returns
        -------
        callable
            An object that can be accepted by `apply`.

        Notes
        -----
        This is a classmethod because the definition of how to preprocess
        should be class-wide. Also, we may want to use this before we
        deploy a preprocessed function to multiple `PandasDataframePartition`
        objects.
        """
        pass

    @classmethod
    def _length_extraction_fn(cls):
        """
        Return the function that computes the length of the object wrapped by this partition.

        Returns
        -------
        callable
            The function that computes the length of the object wrapped by this partition.
        """
        return length_fn_pandas

    @classmethod
    def _width_extraction_fn(cls):
        """
        Return the function that computes the width of the object wrapped by this partition.

        Returns
        -------
        callable
            The function that computes the width of the object wrapped by this partition.
        """
        return width_fn_pandas

    def length(self, materialize=True):
        """
        Get the length of the object wrapped by this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int or its Future
            The length of the object.
        """
        if self._length_cache is None:
            self._length_cache = self.apply(self._length_extraction_fn()).get()
        return self._length_cache

    def width(self, materialize=True):
        """
        Get the width of the object wrapped by the partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int or its Future
            The width of the object.
        """
        if self._width_cache is None:
            self._width_cache = self.apply(self._width_extraction_fn()).get()
        return self._width_cache

    @property
    def _identity(self):
        """
        Calculate identifier on request for debug logging mode.

        Returns
        -------
        str
        """
        if self._identity_cache is None:
            self._identity_cache = uuid.uuid4().hex
        return self._identity_cache

    def split(self, split_func, num_splits, *args):
        """
        Split the object wrapped by the partition into multiple partitions.

        Parameters
        ----------
        split_func : Callable[pandas.DataFrame, List[Any]] -> List[pandas.DataFrame]
            The function that will split this partition into multiple partitions. The list contains
            pivots to split by, and will have the same dtype as the major column we are shuffling on.
        num_splits : int
            The number of resulting partitions (may be empty).
        *args : List[Any]
            Arguments to pass to ``split_func``.

        Returns
        -------
        list
            A list of partitions.
        """
        log = get_logger()
        self._is_debug(log) and log.debug(f"ENTER::Partition.split::{self._identity}")

        self._is_debug(log) and log.debug(f"SUBMIT::_split_df::{self._identity}")
        outputs = self.execution_wrapper.deploy(
            split_func, [self._data] + list(args), num_returns=num_splits
        )
        self._is_debug(log) and log.debug(f"EXIT::Partition.split::{self._identity}")
        return [self.__constructor__(output) for output in outputs]

    @classmethod
    def empty(cls):
        """
        Create a new partition that wraps an empty pandas DataFrame.

        Returns
        -------
        PandasDataframePartition
            New `PandasDataframePartition` object.
        """
        return cls.put(pandas.DataFrame(), 0, 0)

    def _is_debug(self, logger=None):
        """
        Check that the logger is set to debug mode.

        Parameters
        ----------
        logger : logging.logger, optional
            Logger obtained from Modin's `get_logger` utility.
            Explicit transmission of this parameter can be used in the case
            when within the context of `_is_debug` call there was already
            `get_logger` call. This is an optimization.

        Returns
        -------
        bool
        """
        if logger is None:
            logger = get_logger()
        return logger.isEnabledFor(logging.DEBUG)


================================================
FILE: modin/core/dataframe/pandas/partitioning/partition_manager.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module holding base PartitionManager class - the thing that tracks partitions across the distribution.

The manager also allows manipulating the data - running functions at each partition, shuffle over the distribution, etc.
"""

import os
import warnings
from abc import ABC
from functools import wraps
from typing import TYPE_CHECKING, Optional

import numpy as np
import pandas
from pandas._libs.lib import no_default

from modin.config import (
    BenchmarkMode,
    CpuCount,
    DynamicPartitioning,
    Engine,
    MinColumnPartitionSize,
    MinRowPartitionSize,
    NPartitions,
    PersistentPickle,
    ProgressBar,
)
from modin.core.dataframe.pandas.utils import create_pandas_df_from_partitions
from modin.core.storage_formats.pandas.utils import compute_chunksize
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger
from modin.logging.config import LogLevel
from modin.pandas.utils import get_pandas_backend

if TYPE_CHECKING:
    from modin.core.dataframe.pandas.dataframe.utils import ShuffleFunctions


def wait_computations_if_benchmark_mode(func):
    """
    Make sure a `func` finished its computations in benchmark mode.

    Parameters
    ----------
    func : callable
        A function that should be performed in syncronous mode.

    Returns
    -------
    callable
        Wrapped function that executes eagerly (if benchmark mode) or original `func`.

    Notes
    -----
    `func` should return NumPy array with partitions.
    """

    @wraps(func)
    def wait(cls, *args, **kwargs):
        """Wait for computation results."""
        result = func(cls, *args, **kwargs)
        if BenchmarkMode.get():
            if isinstance(result, tuple):
                partitions = result[0]
            else:
                partitions = result
            # When partitions have a deferred call queue, calling
            # partition.wait() on each partition serially will serially kick
            # off each deferred computation and wait for each partition to
            # finish before kicking off the next one. Instead, we want to
            # serially kick off all the deferred computations so that they can
            # all run asynchronously, then wait on all the results.
            cls.finalize(partitions)
            # The partition manager invokes the relevant .wait() method under
            # the hood, which should wait in parallel for all computations to finish
            cls.wait_partitions(partitions.flatten())
        return result

    return wait


class PandasDataframePartitionManager(
    ClassLogger, ABC, modin_layer="PARTITION-MANAGER", log_level=LogLevel.DEBUG
):
    """
    Base class for managing the dataframe data layout and operators across the distribution of partitions.

    Partition class is the class to use for storing each partition.
    Each partition must extend the `PandasDataframePartition` class.
    """

    _partition_class = None
    # Column partitions class is the class to use to create the column partitions.
    _column_partitions_class = None
    # Row partitions class is the class to use to create the row partitions.
    _row_partition_class = None
    _execution_wrapper = None

    @classmethod
    def materialize_futures(cls, input_list):
        """
        Materialize all futures in the input list.

        Parameters
        ----------
        input_list : list
            The list that has to be manipulated.

        Returns
        -------
        list
           A new list with materialized objects.
        """
        # Do nothing if input_list is None or [].
        if input_list is None:
            return None
        filtered_list = []
        filtered_idx = []
        for idx, item in enumerate(input_list):
            if cls._execution_wrapper.is_future(item):
                filtered_idx.append(idx)
                filtered_list.append(item)
        filtered_list = cls._execution_wrapper.materialize(filtered_list)
        result = input_list.copy()
        for idx, item in zip(filtered_idx, filtered_list):
            result[idx] = item
        return result

    @classmethod
    def preprocess_func(cls, map_func):
        """
        Preprocess a function to be applied to `PandasDataframePartition` objects.

        Parameters
        ----------
        map_func : callable
            The function to be preprocessed.

        Returns
        -------
        callable
            The preprocessed version of the `map_func` provided.

        Notes
        -----
        Preprocessing does not require any specific format, only that the
        `PandasDataframePartition.apply` method will recognize it (for the subclass
        being used).

        If your `PandasDataframePartition` objects assume that a function provided
        is serialized or wrapped or in some other format, this is the place
        to add that logic. It is possible that this can also just return
        `map_func` if the `apply` method of the `PandasDataframePartition` object
        you are using does not require any modification to a given function.
        """
        if cls._execution_wrapper.is_future(map_func):
            return map_func  # Has already been preprocessed

        old_value = PersistentPickle.get()
        # When performing a function with Modin objects, it is more profitable to
        # do the conversion to pandas once on the main process than several times
        # on worker processes. Details: https://github.com/modin-project/modin/pull/6673/files#r1391086755
        # For Dask, otherwise there may be an error: `coroutine 'Client._gather' was never awaited`
        need_update = not PersistentPickle.get() and Engine.get() != "Dask"
        if need_update:
            PersistentPickle.put(True)
        try:
            result = cls._partition_class.preprocess_func(map_func)
        finally:
            if need_update:
                PersistentPickle.put(old_value)
        return result

    # END Abstract Methods

    @classmethod
    def create_partition_from_metadata(
        cls, dtypes: Optional[pandas.Series] = None, **metadata
    ):
        """
        Create NumPy array of partitions that holds an empty dataframe with given metadata.

        Parameters
        ----------
        dtypes : pandas.Series, optional
            Column dtypes.
            Upon creating a pandas DataFrame from `metadata` we call `astype` since
            pandas doesn't allow to pass a list of dtypes directly in the constructor.
        **metadata : dict
            Metadata that has to be wrapped in a partition.

        Returns
        -------
        np.ndarray
            A NumPy 2D array of a single partition which contains the data.
        """
        metadata_dataframe = pandas.DataFrame(**metadata)
        if dtypes is not None:
            metadata_dataframe = metadata_dataframe.astype(dtypes)
        return np.array([[cls._partition_class.put(metadata_dataframe)]])

    @classmethod
    def column_partitions(cls, partitions, full_axis=True):
        """
        Get the list of `BaseDataframeAxisPartition` objects representing column-wise partitions.

        Parameters
        ----------
        partitions : list-like
            List of (smaller) partitions to be combined to column-wise partitions.
        full_axis : bool, default: True
            Whether or not this partition contains the entire column axis.

        Returns
        -------
        list
            A list of `BaseDataframeAxisPartition` objects.

        Notes
        -----
        Each value in this list will be an `BaseDataframeAxisPartition` object.
        `BaseDataframeAxisPartition` is located in `axis_partition.py`.
        """
        if not isinstance(partitions, list):
            partitions = [partitions]
        return [
            cls._column_partitions_class(col, full_axis=full_axis)
            for frame in partitions
            for col in frame.T
        ]

    @classmethod
    def row_partitions(cls, partitions):
        """
        List of `BaseDataframeAxisPartition` objects representing row-wise partitions.

        Parameters
        ----------
        partitions : list-like
            List of (smaller) partitions to be combined to row-wise partitions.

        Returns
        -------
        list
            A list of `BaseDataframeAxisPartition` objects.

        Notes
        -----
        Each value in this list will an `BaseDataframeAxisPartition` object.
        `BaseDataframeAxisPartition` is located in `axis_partition.py`.
        """
        if not isinstance(partitions, list):
            partitions = [partitions]
        return [cls._row_partition_class(row) for frame in partitions for row in frame]

    @classmethod
    def axis_partition(cls, partitions, axis, full_axis: bool = True):
        """
        Logically partition along given axis (columns or rows).

        Parameters
        ----------
        partitions : list-like
            List of partitions to be combined.
        axis : {0, 1}
            0 for column partitions, 1 for row partitions.
        full_axis : bool, default: True
            Whether or not this partition contains the entire column axis.

        Returns
        -------
        list
            A list of `BaseDataframeAxisPartition` objects.
        """
        make_column_partitions = axis == 0
        if not full_axis and not make_column_partitions:
            raise NotImplementedError(
                (
                    "Row partitions must contain the entire axis. We don't "
                    + "support virtual partitioning for row partitions yet."
                )
            )
        return (
            cls.column_partitions(partitions)
            if make_column_partitions
            else cls.row_partitions(partitions)
        )

    @classmethod
    def groupby_reduce(
        cls, axis, partitions, by, map_func, reduce_func, apply_indices=None
    ):
        """
        Groupby data using the `map_func` provided along the `axis` over the `partitions` then reduce using `reduce_func`.

        Parameters
        ----------
        axis : {0, 1}
            Axis to groupby over.
        partitions : NumPy 2D array
            Partitions of the ModinFrame to groupby.
        by : NumPy 2D array
            Partitions of 'by' to broadcast.
        map_func : callable
            Map function.
        reduce_func : callable,
            Reduce function.
        apply_indices : list of ints, default: None
            Indices of `axis ^ 1` to apply function over.

        Returns
        -------
        NumPy array
            Partitions with applied groupby.
        """
        if apply_indices is not None:
            partitions = (
                partitions[apply_indices] if axis else partitions[:, apply_indices]
            )

        if by is not None:
            # need to make sure that the partitioning of the following objects
            # coincides in the required axis, because `partition_manager.broadcast_apply`
            # doesn't call `_copartition` unlike `modin_frame.broadcast_apply`
            assert partitions.shape[axis] == by.shape[axis], (
                f"the number of partitions along {axis=} is not equal: "
                + f"{partitions.shape[axis]} != {by.shape[axis]}"
            )
            mapped_partitions = cls.broadcast_apply(
                axis, map_func, left=partitions, right=by
            )
        else:
            mapped_partitions = cls.map_partitions(partitions, map_func)

        # Assuming, that the output will not be larger than the input,
        # keep the current number of partitions.
        num_splits = min(len(partitions), NPartitions.get())
        return cls.map_axis_partitions(
            axis,
            mapped_partitions,
            reduce_func,
            enumerate_partitions=True,
            num_splits=num_splits,
        )

    @classmethod
    @wait_computations_if_benchmark_mode
    def broadcast_apply_select_indices(
        cls,
        axis,
        apply_func,
        left,
        right,
        left_indices,
        right_indices,
        keep_remaining=False,
    ):
        """
        Broadcast the `right` partitions to `left` and apply `apply_func` to selected indices.

        Parameters
        ----------
        axis : {0, 1}
            Axis to apply and broadcast over.
        apply_func : callable
            Function to apply.
        left : NumPy 2D array
            Left partitions.
        right : NumPy 2D array
            Right partitions.
        left_indices : list-like
            Indices to apply function to.
        right_indices : dictionary of indices of right partitions
            Indices that you want to bring at specified left partition, for example
            dict {key: {key1: [0, 1], key2: [5]}} means that in left[key] you want to
            broadcast [right[key1], right[key2]] partitions and internal indices
            for `right` must be [[0, 1], [5]].
        keep_remaining : bool, default: False
            Whether or not to keep the other partitions.
            Some operations may want to drop the remaining partitions and
            keep only the results.

        Returns
        -------
        NumPy array
            An array of partition objects.

        Notes
        -----
        Your internal function must take these kwargs:
        [`internal_indices`, `other`, `internal_other_indices`] to work correctly!
        """
        if not axis:
            partitions_for_apply = left.T
            right = right.T
        else:
            partitions_for_apply = left

        [obj.drain_call_queue() for row in right for obj in row]

        def get_partitions(index):
            """Grab required partitions and indices from `right` and `right_indices`."""
            must_grab = right_indices[index]
            partitions_list = np.array([right[i] for i in must_grab.keys()])
            indices_list = list(must_grab.values())
            return {"other": partitions_list, "internal_other_indices": indices_list}

        new_partitions = np.array(
            [
                (
                    partitions_for_apply[i]
                    if i not in left_indices
                    else cls._apply_func_to_list_of_partitions_broadcast(
                        apply_func,
                        partitions_for_apply[i],
                        internal_indices=left_indices[i],
                        **get_partitions(i),
                    )
                )
                for i in range(len(partitions_for_apply))
                if i in left_indices or keep_remaining
            ]
        )
        if not axis:
            new_partitions = new_partitions.T
        return new_partitions

    @classmethod
    @wait_computations_if_benchmark_mode
    def base_broadcast_apply(cls, axis, apply_func, left, right):
        """
        Broadcast the `right` partitions to `left` and apply `apply_func` function.

        Parameters
        ----------
        axis : {0, 1}
            Axis to apply and broadcast over.
        apply_func : callable
            Function to apply.
        left : np.ndarray
            NumPy array of left partitions.
        right : np.ndarray
            NumPy array of right partitions.

        Returns
        -------
        np.ndarray
            NumPy array of result partition objects.

        Notes
        -----
        This will often be overridden by implementations. It materializes the
        entire partitions of the right and applies them to the left through `apply`.
        """

        def map_func(df, *others):
            other = (
                pandas.concat(others, axis=axis ^ 1) if len(others) > 1 else others[0]
            )
            # to reduce peak memory consumption
            del others
            return apply_func(df, other)

        map_func = cls.preprocess_func(map_func)
        rt_axis_parts = cls.axis_partition(right, axis ^ 1)
        return np.array(
            [
                [
                    part.apply(
                        map_func,
                        *(
                            rt_axis_parts[col_idx].list_of_blocks
                            if axis
                            else rt_axis_parts[row_idx].list_of_blocks
                        ),
                    )
                    for col_idx, part in enumerate(left[row_idx])
                ]
                for row_idx in range(len(left))
            ]
        )

    @classmethod
    @wait_computations_if_benchmark_mode
    def broadcast_axis_partitions(
        cls,
        axis,
        apply_func,
        left,
        right,
        keep_partitioning=False,
        num_splits=None,
        apply_indices=None,
        broadcast_all=True,
        enumerate_partitions=False,
        lengths=None,
        apply_func_args=None,
        **kwargs,
    ):
        """
        Broadcast the `right` partitions to `left` and apply `apply_func` along full `axis`.

        Parameters
        ----------
        axis : {0, 1}
            Axis to apply and broadcast over.
        apply_func : callable
            Function to apply.
        left : NumPy 2D array
            Left partitions.
        right : NumPy 2D array
            Right partitions.
        keep_partitioning : boolean, default: False
            The flag to keep partition boundaries for Modin Frame if possible.
            Setting it to True disables shuffling data from one partition to another in case the resulting
            number of splits is equal to the initial number of splits.
        num_splits : int, optional
            The number of partitions to split the result into across the `axis`. If None, then the number
            of splits will be infered automatically. If `num_splits` is None and `keep_partitioning=True`
            then the number of splits is preserved.
        apply_indices : list of ints, default: None
            Indices of `axis ^ 1` to apply function over.
        broadcast_all : bool, default: True
            Whether or not to pass all right axis partitions to each of the left axis partitions.
        enumerate_partitions : bool, default: False
            Whether or not to pass partition index into `apply_func`.
            Note that `apply_func` must be able to accept `partition_idx` kwarg.
        lengths : list of ints, default: None
            The list of lengths to shuffle the object. Note:
                1. Passing `lengths` omits the `num_splits` parameter as the number of splits
                will now be inferred from the number of integers present in `lengths`.
                2. When passing lengths you must explicitly specify `keep_partitioning=False`.
        apply_func_args : list-like, optional
            Positional arguments to pass to the `func`.
        **kwargs : dict
            Additional options that could be used by different engines.

        Returns
        -------
        NumPy array
            An array of partition objects.
        """
        ErrorMessage.catch_bugs_and_request_email(
            failure_condition=keep_partitioning and lengths is not None,
            extra_log=f"`keep_partitioning` must be set to `False` when passing `lengths`. Got: {keep_partitioning=} | {lengths=}",
        )

        # Since we are already splitting the DataFrame back up after an
        # operation, we will just use this time to compute the number of
        # partitions as best we can right now.
        if keep_partitioning and num_splits is None:
            num_splits = len(left) if axis == 0 else len(left.T)
        elif lengths:
            num_splits = len(lengths)
        elif num_splits is None:
            num_splits = NPartitions.get()
        else:
            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=not isinstance(num_splits, int),
                extra_log=f"Expected `num_splits` to be an integer, got: {type(num_splits)} | {num_splits=}",
            )
        preprocessed_map_func = cls.preprocess_func(apply_func)
        left_partitions = cls.axis_partition(left, axis)
        right_partitions = None if right is None else cls.axis_partition(right, axis)
        # For mapping across the entire axis, we don't maintain partitioning because we
        # may want to line to partitioning up with another BlockPartitions object. Since
        # we don't need to maintain the partitioning, this gives us the opportunity to
        # load-balance the data as well.
        kw = {
            "num_splits": num_splits,
            "maintain_partitioning": keep_partitioning,
        }
        if lengths:
            kw["lengths"] = lengths
            kw["manual_partition"] = True

        if apply_indices is None:
            apply_indices = np.arange(len(left_partitions))

        result_blocks = np.array(
            [
                left_partitions[i].apply(
                    preprocessed_map_func,
                    *(apply_func_args if apply_func_args else []),
                    other_axis_partition=(
                        right_partitions if broadcast_all else right_partitions[i]
                    ),
                    **kw,
                    **({"partition_idx": idx} if enumerate_partitions else {}),
                    **kwargs,
                )
                for idx, i in enumerate(apply_indices)
            ]
        )
        # If we are mapping over columns, they are returned to use the same as
        # rows, so we need to transpose the returned 2D NumPy array to return
        # the structure to the correct order.
        return result_blocks.T if not axis else result_blocks

    @classmethod
    @wait_computations_if_benchmark_mode
    def base_map_partitions(
        cls,
        partitions,
        map_func,
        func_args=None,
        func_kwargs=None,
    ):
        """
        Apply `map_func` to every partition in `partitions`.

        Parameters
        ----------
        partitions : NumPy 2D array
            Partitions housing the data of Modin Frame.
        map_func : callable
            Function to apply.
        func_args : iterable, optional
            Positional arguments for the 'map_func'.
        func_kwargs : dict, optional
            Keyword arguments for the 'map_func'.

        Returns
        -------
        NumPy array
            An array of partitions
        """
        preprocessed_map_func = cls.preprocess_func(map_func)
        return np.array(
            [
                [
                    part.apply(
                        preprocessed_map_func,
                        *func_args if func_args is not None else (),
                        **func_kwargs if func_kwargs is not None else {},
                    )
                    for part in row_of_parts
                ]
                for row_of_parts in partitions
            ]
        )

    @classmethod
    @wait_computations_if_benchmark_mode
    def broadcast_apply(
        cls,
        axis,
        apply_func,
        left,
        right,
    ):
        """
        Broadcast the `right` partitions to `left` and apply `apply_func` function using different approaches to achieve the best performance.

        Parameters
        ----------
        axis : {0, 1}
            Axis to apply and broadcast over.
        apply_func : callable
            Function to apply.
        left : np.ndarray
            NumPy array of left partitions.
        right : np.ndarray
            NumPy array of right partitions.

        Returns
        -------
        np.ndarray
            NumPy array of result partition objects.
        """
        if not DynamicPartitioning.get():
            # block-wise broadcast
            new_partitions = cls.base_broadcast_apply(
                axis,
                apply_func,
                left,
                right,
            )
        else:
            # The dynamic partitioning behavior of `broadcast_apply` differs from that of `map_partitions`,
            # since the columnar approach for `broadcast_apply` results in slowdown.
            # axis-wise broadcast
            new_partitions = cls.broadcast_axis_partitions(
                axis=axis ^ 1,
                left=left,
                right=right,
                apply_func=apply_func,
                broadcast_all=False,
                keep_partitioning=True,
            )
        return new_partitions

    @classmethod
    @wait_computations_if_benchmark_mode
    def map_partitions(
        cls,
        partitions,
        map_func,
        func_args=None,
        func_kwargs=None,
    ):
        """
        Apply `map_func` to `partitions` using different approaches to achieve the best performance.

        Parameters
        ----------
        partitions : NumPy 2D array
            Partitions housing the data of Modin Frame.
        map_func : callable
            Function to apply.
        func_args : iterable, optional
            Positional arguments for the 'map_func'.
        func_kwargs : dict, optional
            Keyword arguments for the 'map_func'.

        Returns
        -------
        NumPy array
            An array of partitions
        """
        if not DynamicPartitioning.get():
            # block-wise map
            new_partitions = cls.base_map_partitions(
                partitions, map_func, func_args, func_kwargs
            )
        else:
            # axis-wise map
            # we choose an axis for a combination of partitions
            # whose size is closer to the number of CPUs
            if abs(partitions.shape[0] - CpuCount.get()) < abs(
                partitions.shape[1] - CpuCount.get()
            ):
                axis = 1
            else:
                axis = 0

            column_splits = CpuCount.get() // partitions.shape[1]

            if axis == 0 and column_splits > 1:
                # splitting by parts of columnar partitions
                new_partitions = cls.map_partitions_joined_by_column(
                    partitions, column_splits, map_func, func_args, func_kwargs
                )
            else:
                # splitting by full axis partitions
                new_partitions = cls.map_axis_partitions(
                    axis,
                    partitions,
                    lambda df: map_func(
                        df,
                        *(func_args if func_args is not None else ()),
                        **(func_kwargs if func_kwargs is not None else {}),
                    ),
                    keep_partitioning=True,
                )
        return new_partitions

    @classmethod
    @wait_computations_if_benchmark_mode
    def lazy_map_partitions(
        cls,
        partitions,
        map_func,
        func_args=None,
        func_kwargs=None,
        enumerate_partitions=False,
    ):
        """
        Apply `map_func` to every partition in `partitions` *lazily*.

        Parameters
        ----------
        partitions : NumPy 2D array
            Partitions of Modin Frame.
        map_func : callable
            Function to apply.
        func_args : iterable, optional
            Positional arguments for the 'map_func'.
        func_kwargs : dict, optional
            Keyword arguments for the 'map_func'.
        enumerate_partitions : bool, default: False

        Returns
        -------
        NumPy array
            An array of partitions
        """
        preprocessed_map_func = cls.preprocess_func(map_func)
        return np.array(
            [
                [
                    part.add_to_apply_calls(
                        preprocessed_map_func,
                        *(tuple() if func_args is None else func_args),
                        **func_kwargs if func_kwargs is not None else {},
                        **({"partition_idx": i} if enumerate_partitions else {}),
                    )
                    for part in row
                ]
                for i, row in enumerate(partitions)
            ]
        )

    @classmethod
    def map_axis_partitions(
        cls,
        axis,
        partitions,
        map_func,
        keep_partitioning=False,
        num_splits=None,
        lengths=None,
        enumerate_partitions=False,
        **kwargs,
    ):
        """
        Apply `map_func` to every partition in `partitions` along given `axis`.

        Parameters
        ----------
        axis : {0, 1}
            Axis to perform the map across (0 - index, 1 - columns).
        partitions : NumPy 2D array
            Partitions of Modin Frame.
        map_func : callable
            Function to apply.
        keep_partitioning : boolean, default: False
            The flag to keep partition boundaries for Modin Frame if possible.
            Setting it to True disables shuffling data from one partition to another in case the resulting
            number of splits is equal to the initial number of splits.
        num_splits : int, optional
            The number of partitions to split the result into across the `axis`. If None, then the number
            of splits will be infered automatically. If `num_splits` is None and `keep_partitioning=True`
            then the number of splits is preserved.
        lengths : list of ints, default: None
            The list of lengths to shuffle the object. Note:
                1. Passing `lengths` omits the `num_splits` parameter as the number of splits
                will now be inferred from the number of integers present in `lengths`.
                2. When passing lengths you must explicitly specify `keep_partitioning=False`.
        enumerate_partitions : bool, default: False
            Whether or not to pass partition index into `map_func`.
            Note that `map_func` must be able to accept `partition_idx` kwarg.
        **kwargs : dict
            Additional options that could be used by different engines.

        Returns
        -------
        NumPy array
            An array of new partitions for Modin Frame.

        Notes
        -----
        This method should be used in the case when `map_func` relies on
        some global information about the axis.
        """
        return cls.broadcast_axis_partitions(
            axis=axis,
            left=partitions,
            apply_func=map_func,
            keep_partitioning=keep_partitioning,
            num_splits=num_splits,
            right=None,
            lengths=lengths,
            enumerate_partitions=enumerate_partitions,
            **kwargs,
        )

    @classmethod
    def map_partitions_joined_by_column(
        cls,
        partitions,
        column_splits,
        map_func,
        map_func_args=None,
        map_func_kwargs=None,
    ):
        """
        Combine several blocks by column into one virtual partition and apply "map_func" to them.

        Parameters
        ----------
        partitions : NumPy 2D array
            Partitions of Modin Frame.
        column_splits : int
            The number of splits by column.
        map_func : callable
            Function to apply.
        map_func_args : iterable, optional
            Positional arguments for the 'map_func'.
        map_func_kwargs : dict, optional
            Keyword arguments for the 'map_func'.

        Returns
        -------
        NumPy array
            An array of new partitions for Modin Frame.
        """
        if column_splits < 1:
            raise ValueError(
                "The value of columns_splits must be greater than or equal to 1."
            )
        # step cannot be less than 1
        step = max(partitions.shape[0] // column_splits, 1)
        preprocessed_map_func = cls.preprocess_func(map_func)
        result = np.empty(partitions.shape, dtype=object)
        for i in range(
            0,
            partitions.shape[0],
            step,
        ):
            partitions_subset = partitions[i : i + step]
            # This is necessary when ``partitions.shape[0]`` is not divisible
            # by `column_splits` without a remainder.
            actual_step = len(partitions_subset)
            kw = {
                "num_splits": actual_step,
            }
            joined_column_partitions = cls.column_partitions(partitions_subset)
            for j in range(partitions.shape[1]):
                result[i : i + actual_step, j] = joined_column_partitions[j].apply(
                    preprocessed_map_func,
                    *map_func_args if map_func_args is not None else (),
                    **kw,
                    **map_func_kwargs if map_func_kwargs is not None else {},
                )

        return result

    @classmethod
    def concat(cls, axis, left_parts, right_parts):
        """
        Concatenate the blocks of partitions with another set of blocks.

        Parameters
        ----------
        axis : int
            The axis to concatenate to.
        left_parts : np.ndarray
            NumPy array of partitions to concatenate with.
        right_parts : np.ndarray or list
            NumPy array of partitions to be concatenated.

        Returns
        -------
        np.ndarray
            A new NumPy array with concatenated partitions.
        list[int] or None
            Row lengths if possible to compute it.

        Notes
        -----
        Assumes that the blocks are already the same shape on the
        dimension being concatenated. A ValueError will be thrown if this
        condition is not met.
        """
        # TODO: Possible change is `isinstance(right_parts, list)`
        if type(right_parts) is list:
            # `np.array` with partitions of empty ModinFrame has a shape (0,)
            # but `np.concatenate` can concatenate arrays only if its shapes at
            # specified axis are equals, so filtering empty frames to avoid concat error
            right_parts = [o for o in right_parts if o.size != 0]
            to_concat = (
                [left_parts] + right_parts if left_parts.size != 0 else right_parts
            )
            result = (
                np.concatenate(to_concat, axis=axis) if len(to_concat) else left_parts
            )
        else:
            result = np.append(left_parts, right_parts, axis=axis)
        if axis == 0:
            return cls.rebalance_partitions(result)
        else:
            return result, None

    @classmethod
    def to_pandas(cls, partitions):
        """
        Convert NumPy array of PandasDataframePartition to pandas DataFrame.

        Parameters
        ----------
        partitions : np.ndarray
            NumPy array of PandasDataframePartition.

        Returns
        -------
        pandas.DataFrame
            A pandas DataFrame
        """
        return create_pandas_df_from_partitions(
            cls.get_objects_from_partitions(partitions.flatten()), partitions.shape
        )

    @classmethod
    def to_numpy(cls, partitions, **kwargs):
        """
        Convert NumPy array of PandasDataframePartition to NumPy array of data stored within `partitions`.

        Parameters
        ----------
        partitions : np.ndarray
            NumPy array of PandasDataframePartition.
        **kwargs : dict
            Keyword arguments for PandasDataframePartition.to_numpy function.

        Returns
        -------
        np.ndarray
            A NumPy array.
        """
        return np.block(
            [[block.to_numpy(**kwargs) for block in row] for row in partitions]
        )

    @classmethod
    def split_pandas_df_into_partitions(
        cls, df, row_chunksize, col_chunksize, update_bar
    ):
        """
        Split given pandas DataFrame according to the row/column chunk sizes into distributed partitions.

        Parameters
        ----------
        df : pandas.DataFrame
        row_chunksize : int
        col_chunksize : int
        update_bar : callable(x) -> x
            Function that updates a progress bar.

        Returns
        -------
        2D np.ndarray[PandasDataframePartition]
        """
        put_func = cls._partition_class.put
        # even a full-axis slice can cost something (https://github.com/pandas-dev/pandas/issues/55202)
        # so we try not to do it if unnecessary.
        if col_chunksize >= len(df.columns):
            col_parts = [df]
        else:
            col_parts = [
                df.iloc[:, i : i + col_chunksize]
                for i in range(0, len(df.columns), col_chunksize)
            ]
        parts = [
            [
                update_bar(
                    put_func(col_part.iloc[i : i + row_chunksize]),
                )
                for col_part in col_parts
            ]
            for i in range(0, len(df), row_chunksize)
        ]
        return np.array(parts)

    @classmethod
    @wait_computations_if_benchmark_mode
    def from_pandas(cls, df, return_dims=False):
        """
        Return the partitions from pandas.DataFrame.

        Parameters
        ----------
        df : pandas.DataFrame
            A pandas.DataFrame.
        return_dims : bool, default: False
            If it's True, return as (np.ndarray, row_lengths, col_widths),
            else np.ndarray.

        Returns
        -------
        (np.ndarray, backend) or (np.ndarray, backend, row_lengths, col_widths)
            A NumPy array with partitions (with dimensions or not).
        """
        num_splits = NPartitions.get()
        min_row_block_size = MinRowPartitionSize.get()
        min_column_block_size = MinColumnPartitionSize.get()
        row_chunksize = compute_chunksize(df.shape[0], num_splits, min_row_block_size)
        col_chunksize = compute_chunksize(
            df.shape[1], num_splits, min_column_block_size
        )

        bar_format = (
            "{l_bar}{bar}{r_bar}"
            if os.environ.get("DEBUG_PROGRESS_BAR", "False") == "True"
            else "{desc}: {percentage:3.0f}%{bar} Elapsed time: {elapsed}, estimated remaining time: {remaining}"
        )
        if ProgressBar.get():
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                try:
                    from tqdm.autonotebook import tqdm as tqdm_notebook
                except ImportError:
                    raise ImportError("Please pip install tqdm to use the progress bar")

            rows = max(1, round(len(df) / row_chunksize))
            cols = max(1, round(len(df.columns) / col_chunksize))
            update_count = rows * cols
            pbar = tqdm_notebook(
                total=round(update_count),
                desc="Distributing Dataframe",
                bar_format=bar_format,
            )
        else:
            pbar = None

        def update_bar(f):
            if ProgressBar.get():
                pbar.update(1)
            return f

        parts = cls.split_pandas_df_into_partitions(
            df, row_chunksize, col_chunksize, update_bar
        )
        backend = get_pandas_backend(df.dtypes)
        if ProgressBar.get():
            pbar.close()
        if not return_dims:
            return parts, backend
        else:
            row_lengths = [
                (
                    row_chunksize
                    if i + row_chunksize < len(df)
                    else len(df) % row_chunksize or row_chunksize
                )
                for i in range(0, len(df), row_chunksize)
            ]
            col_widths = [
                (
                    col_chunksize
                    if i + col_chunksize < len(df.columns)
                    else len(df.columns) % col_chunksize or col_chunksize
                )
                for i in range(0, len(df.columns), col_chunksize)
            ]
            return parts, backend, row_lengths, col_widths

    @classmethod
    def from_arrow(cls, at, return_dims=False):
        """
        Return the partitions from Apache Arrow (PyArrow).

        Parameters
        ----------
        at : pyarrow.table
            Arrow Table.
        return_dims : bool, default: False
            If it's True, return as (np.ndarray, row_lengths, col_widths),
            else np.ndarray.

        Returns
        -------
        (np.ndarray, backend) or (np.ndarray, backend, row_lengths, col_widths)
            A NumPy array with partitions (with dimensions or not).
        """
        return cls.from_pandas(at.to_pandas(), return_dims=return_dims)

    @classmethod
    def get_objects_from_partitions(cls, partitions):
        """
        Get the objects wrapped by `partitions` (in parallel if supported).

        Parameters
        ----------
        partitions : np.ndarray
            NumPy array with ``PandasDataframePartition``-s.

        Returns
        -------
        list
            The objects wrapped by `partitions`.
        """
        if hasattr(cls, "_execution_wrapper"):
            # more efficient parallel implementation
            for idx, part in enumerate(partitions):
                if hasattr(part, "force_materialization"):
                    partitions[idx] = part.force_materialization()
            assert all(
                [len(partition.list_of_blocks) == 1 for partition in partitions]
            ), "Implementation assumes that each partition contains a single block."
            return cls._execution_wrapper.materialize(
                [partition.list_of_blocks[0] for partition in partitions]
            )
        return [partition.get() for partition in partitions]

    @classmethod
    def wait_partitions(cls, partitions):
        """
        Wait on the objects wrapped by `partitions`, without materializing them.

        This method will block until all computations in the list have completed.

        Parameters
        ----------
        partitions : np.ndarray
            NumPy array with ``PandasDataframePartition``-s.

        Notes
        -----
        This method should be implemented in a more efficient way for engines that supports
        waiting on objects in parallel.
        """
        for partition in partitions:
            partition.wait()

    @classmethod
    def get_indices(cls, axis, partitions, index_func=None):
        """
        Get the internal indices stored in the partitions.

        Parameters
        ----------
        axis : {0, 1}
            Axis to extract the labels over.
        partitions : np.ndarray
            NumPy array with PandasDataframePartition's.
        index_func : callable, default: None
            The function to be used to extract the indices.

        Returns
        -------
        pandas.Index
            A pandas Index object.
        list of pandas.Index
            The list of internal indices for each partition.

        Notes
        -----
        These are the global indices of the object. This is mostly useful
        when you have deleted rows/columns internally, but do not know
        which ones were deleted.
        """
        if index_func is None:
            index_func = lambda df: df.axes[axis]  # noqa: E731
        ErrorMessage.catch_bugs_and_request_email(not callable(index_func))
        func = cls.preprocess_func(index_func)
        target = partitions.T if axis == 0 else partitions
        if len(target):
            new_idx = [idx.apply(func) for idx in target[0]]
            new_idx = cls.get_objects_from_partitions(new_idx)
        else:
            new_idx = [pandas.Index([])]

        # filter empty indexes in case there are multiple partitions
        total_idx = list(filter(len, new_idx))
        if len(total_idx) > 0:
            # TODO FIX INFORMATION LEAK!!!!1!!1!!
            total_idx = total_idx[0].append(total_idx[1:])
        else:
            # Meaning that all partitions returned a zero-length index,
            # in this case, we return an index of any partition to preserve
            # the index's metadata
            total_idx = new_idx[0]
        return total_idx, new_idx

    @classmethod
    def _apply_func_to_list_of_partitions_broadcast(
        cls, func, partitions, other, **kwargs
    ):
        """
        Apply a function to a list of remote partitions.

        `other` partitions will be broadcasted to `partitions`
        and `func` will be applied.

        Parameters
        ----------
        func : callable
            The func to apply.
        partitions : np.ndarray
            The partitions to which the `func` will apply.
        other : np.ndarray
            The partitions to be broadcasted to `partitions`.
        **kwargs : dict
            Keyword arguments for PandasDataframePartition.apply function.

        Returns
        -------
        list
            A list of PandasDataframePartition objects.
        """
        preprocessed_func = cls.preprocess_func(func)
        return [
            obj.apply(preprocessed_func, other=[o.get() for o in broadcasted], **kwargs)
            for obj, broadcasted in zip(partitions, other.T)
        ]

    @classmethod
    def _apply_func_to_list_of_partitions(cls, func, partitions, **kwargs):
        """
        Apply a function to a list of remote partitions.

        Parameters
        ----------
        func : callable
            The func to apply.
        partitions : np.ndarray
            The partitions to which the `func` will apply.
        **kwargs : dict
            Keyword arguments for PandasDataframePartition.apply function.

        Returns
        -------
        list
            A list of PandasDataframePartition objects.

        Notes
        -----
        This preprocesses the `func` first before applying it to the partitions.
        """
        preprocessed_func = cls.preprocess_func(func)
        return [obj.apply(preprocessed_func, **kwargs) for obj in partitions]

    @classmethod
    def combine(cls, partitions, new_index=None, new_columns=None):
        """
        Convert a NumPy 2D array of partitions to a NumPy 2D array of a single partition.

        Parameters
        ----------
        partitions : np.ndarray
            The partitions which have to be converted to a single partition.
        new_index : pandas.Index, optional
            Index for propagation into internal partitions.
            Optimization allowing to do this in one remote kernel.
        new_columns : pandas.Index, optional
            Columns for propagation into internal partitions.
            Optimization allowing to do this in one remote kernel.

        Returns
        -------
        np.ndarray
            A NumPy 2D array of a single partition.
        """
        if partitions.size <= 1 and new_index is None and new_columns is None:
            return partitions

        def to_pandas_remote(df, partition_shape, *dfs):
            """Copy of ``cls.to_pandas()`` method adapted for a remote function."""
            return create_pandas_df_from_partitions(
                (df,) + dfs,
                partition_shape,
                called_from_remote=True,
                new_index=new_index,
                new_columns=new_columns,
            )

        preprocessed_func = cls.preprocess_func(to_pandas_remote)
        partition_shape = partitions.shape
        partitions_flattened = partitions.flatten()
        for idx, part in enumerate(partitions_flattened):
            if hasattr(part, "force_materialization"):
                partitions_flattened[idx] = part.force_materialization()
        partition_refs = [
            partition.list_of_blocks[0] for partition in partitions_flattened[1:]
        ]
        combined_partition = partitions.flat[0].apply(
            preprocessed_func, partition_shape, *partition_refs
        )
        return np.array([combined_partition]).reshape(1, -1)

    @classmethod
    @wait_computations_if_benchmark_mode
    def apply_func_to_select_indices(
        cls, axis, partitions, func, indices, keep_remaining=False
    ):
        """
        Apply a function to select indices.

        Parameters
        ----------
        axis : {0, 1}
            Axis to apply the `func` over.
        partitions : np.ndarray
            The partitions to which the `func` will apply.
        func : callable
            The function to apply to these indices of partitions.
        indices : dict
            The indices to apply the function to.
        keep_remaining : bool, default: False
            Whether or not to keep the other partitions. Some operations
            may want to drop the remaining partitions and keep
            only the results.

        Returns
        -------
        np.ndarray
            A NumPy array with partitions.

        Notes
        -----
        Your internal function must take a kwarg `internal_indices` for
        this to work correctly. This prevents information leakage of the
        internal index to the external representation.
        """
        if partitions.size == 0:
            return np.array([[]])
        # Handling dictionaries has to be done differently, but we still want
        # to figure out the partitions that need to be applied to, so we will
        # store the dictionary in a separate variable and assign `indices` to
        # the keys to handle it the same as we normally would.
        if isinstance(func, dict):
            dict_func = func
        else:
            dict_func = None
        if not axis:
            partitions_for_apply = partitions.T
        else:
            partitions_for_apply = partitions
        # We may have a command to perform different functions on different
        # columns at the same time. We attempt to handle this as efficiently as
        # possible here. Functions that use this in the dictionary format must
        # accept a keyword argument `func_dict`.
        if dict_func is not None:
            if not keep_remaining:
                result = np.array(
                    [
                        cls._apply_func_to_list_of_partitions(
                            func,
                            partitions_for_apply[o_idx],
                            func_dict={
                                i_idx: dict_func[i_idx]
                                for i_idx in list_to_apply
                                if i_idx >= 0
                            },
                        )
                        for o_idx, list_to_apply in indices.items()
                    ]
                )
            else:
                result = np.array(
                    [
                        (
                            partitions_for_apply[i]
                            if i not in indices
                            else cls._apply_func_to_list_of_partitions(
                                func,
                                partitions_for_apply[i],
                                func_dict={
                                    idx: dict_func[idx]
                                    for idx in indices[i]
                                    if idx >= 0
                                },
                            )
                        )
                        for i in range(len(partitions_for_apply))
                    ]
                )
        else:
            if not keep_remaining:
                # We are passing internal indices in here. In order for func to
                # actually be able to use this information, it must be able to take in
                # the internal indices. This might mean an iloc in the case of Pandas
                # or some other way to index into the internal representation.
                result = np.array(
                    [
                        cls._apply_func_to_list_of_partitions(
                            func,
                            partitions_for_apply[idx],
                            internal_indices=list_to_apply,
                        )
                        for idx, list_to_apply in indices.items()
                    ]
                )
            else:
                # The difference here is that we modify a subset and return the
                # remaining (non-updated) blocks in their original position.
                result = np.array(
                    [
                        (
                            partitions_for_apply[i]
                            if i not in indices
                            else cls._apply_func_to_list_of_partitions(
                                func,
                                partitions_for_apply[i],
                                internal_indices=indices[i],
                            )
                        )
                        for i in range(len(partitions_for_apply))
                    ]
                )
        return result.T if not axis else result

    @classmethod
    @wait_computations_if_benchmark_mode
    def apply_func_to_select_indices_along_full_axis(
        cls, axis, partitions, func, indices, keep_remaining=False
    ):
        """
        Apply a function to a select subset of full columns/rows.

        Parameters
        ----------
        axis : {0, 1}
            The axis to apply the function over.
        partitions : np.ndarray
            The partitions to which the `func` will apply.
        func : callable
            The function to apply.
        indices : list-like
            The global indices to apply the func to.
        keep_remaining : bool, default: False
            Whether or not to keep the other partitions.
            Some operations may want to drop the remaining partitions and
            keep only the results.

        Returns
        -------
        np.ndarray
            A NumPy array with partitions.

        Notes
        -----
        This should be used when you need to apply a function that relies
        on some global information for the entire column/row, but only need
        to apply a function to a subset.
        For your func to operate directly on the indices provided,
        it must use `internal_indices` as a keyword argument.
        """
        if partitions.size == 0:
            return np.array([[]])
        # Handling dictionaries has to be done differently, but we still want
        # to figure out the partitions that need to be applied to, so we will
        # store the dictionary in a separate variable and assign `indices` to
        # the keys to handle it the same as we normally would.
        if isinstance(func, dict):
            dict_func = func
        else:
            dict_func = None
        preprocessed_func = cls.preprocess_func(func)
        # Since we might be keeping the remaining blocks that are not modified,
        # we have to also keep the block_partitions object in the correct
        # direction (transpose for columns).
        if not keep_remaining:
            selected_partitions = partitions.T if not axis else partitions
            selected_partitions = np.array([selected_partitions[i] for i in indices])
            selected_partitions = (
                selected_partitions.T if not axis else selected_partitions
            )
        else:
            selected_partitions = partitions
        if not axis:
            partitions_for_apply = cls.column_partitions(selected_partitions)
            partitions_for_remaining = partitions.T
        else:
            partitions_for_apply = cls.row_partitions(selected_partitions)
            partitions_for_remaining = partitions
        # We may have a command to perform different functions on different
        # columns at the same time. We attempt to handle this as efficiently as
        # possible here. Functions that use this in the dictionary format must
        # accept a keyword argument `func_dict`.
        if dict_func is not None:
            if not keep_remaining:
                result = np.array(
                    [
                        part.apply(
                            preprocessed_func,
                            func_dict={idx: dict_func[idx] for idx in indices[i]},
                        )
                        for i, part in zip(indices, partitions_for_apply)
                    ]
                )
            else:
                result = np.array(
                    [
                        (
                            partitions_for_remaining[i]
                            if i not in indices
                            else cls._apply_func_to_list_of_partitions(
                                preprocessed_func,
                                partitions_for_apply[i],
                                func_dict={idx: dict_func[idx] for idx in indices[i]},
                            )
                        )
                        for i in range(len(partitions_for_apply))
                    ]
                )
        else:
            if not keep_remaining:
                # See notes in `apply_func_to_select_indices`
                result = np.array(
                    [
                        part.apply(preprocessed_func, internal_indices=indices[i])
                        for i, part in zip(indices, partitions_for_apply)
                    ]
                )
            else:
                # See notes in `apply_func_to_select_indices`
                result = np.array(
                    [
                        (
                            partitions_for_remaining[i]
                            if i not in indices
                            else partitions_for_apply[i].apply(
                                preprocessed_func, internal_indices=indices[i]
                            )
                        )
                        for i in range(len(partitions_for_remaining))
                    ]
                )
        return result.T if not axis else result

    @classmethod
    @wait_computations_if_benchmark_mode
    def apply_func_to_indices_both_axis(
        cls,
        partitions,
        func,
        row_partitions_list,
        col_partitions_list,
        item_to_distribute=no_default,
        row_lengths=None,
        col_widths=None,
    ):
        """
        Apply a function along both axes.

        Parameters
        ----------
        partitions : np.ndarray
            The partitions to which the `func` will apply.
        func : callable
            The function to apply.
        row_partitions_list : iterable of tuples
            Iterable of tuples, containing 2 values:
                1. Integer row partition index.
                2. Internal row indexer of this partition.
        col_partitions_list : iterable of tuples
            Iterable of tuples, containing 2 values:
                1. Integer column partition index.
                2. Internal column indexer of this partition.
        item_to_distribute : np.ndarray or scalar, default: no_default
            The item to split up so it can be applied over both axes.
        row_lengths : list of ints, optional
            Lengths of partitions for every row. If not specified this information
            is extracted from partitions itself.
        col_widths : list of ints, optional
            Widths of partitions for every column. If not specified this information
            is extracted from partitions itself.

        Returns
        -------
        np.ndarray
            A NumPy array with partitions.

        Notes
        -----
        For your func to operate directly on the indices provided,
        it must use `row_internal_indices`, `col_internal_indices` as keyword
        arguments.
        """
        partition_copy = partitions.copy()
        row_position_counter = 0

        if row_lengths is None:
            row_lengths = [None] * len(row_partitions_list)
        if col_widths is None:
            col_widths = [None] * len(col_partitions_list)

        def compute_part_size(indexer, remote_part, part_idx, axis):
            """Compute indexer length along the specified axis for the passed partition."""
            if isinstance(indexer, slice):
                shapes_container = row_lengths if axis == 0 else col_widths
                part_size = shapes_container[part_idx]
                if part_size is None:
                    part_size = (
                        remote_part.length() if axis == 0 else remote_part.width()
                    )
                    shapes_container[part_idx] = part_size
                indexer = range(*indexer.indices(part_size))
            return len(indexer)

        for row_idx, row_values in enumerate(row_partitions_list):
            row_blk_idx, row_internal_idx = row_values
            col_position_counter = 0
            row_offset = 0
            for col_idx, col_values in enumerate(col_partitions_list):
                col_blk_idx, col_internal_idx = col_values
                remote_part = partition_copy[row_blk_idx, col_blk_idx]

                row_offset = compute_part_size(
                    row_internal_idx, remote_part, row_idx, axis=0
                )
                col_offset = compute_part_size(
                    col_internal_idx, remote_part, col_idx, axis=1
                )

                if item_to_distribute is not no_default:
                    if isinstance(item_to_distribute, np.ndarray):
                        item = item_to_distribute[
                            row_position_counter : row_position_counter + row_offset,
                            col_position_counter : col_position_counter + col_offset,
                        ]
                    else:
                        item = item_to_distribute
                    item = {"item": item}
                else:
                    item = {}
                block_result = remote_part.add_to_apply_calls(
                    func,
                    row_internal_indices=row_internal_idx,
                    col_internal_indices=col_internal_idx,
                    **item,
                )
                partition_copy[row_blk_idx, col_blk_idx] = block_result
                col_position_counter += col_offset
            row_position_counter += row_offset
        return partition_copy

    @classmethod
    @wait_computations_if_benchmark_mode
    def n_ary_operation(cls, left, func, right: list):
        r"""
        Apply an n-ary operation to multiple ``PandasDataframe`` objects.

        This method assumes that all the partitions of the dataframes in left
        and right have the same dimensions. For each position i, j in each
        dataframe's partitions, the result has a partition at (i, j) whose data
        is func(left_partitions[i,j], \*each_right_partitions[i,j]).

        Parameters
        ----------
        left : np.ndarray
            The partitions of left ``PandasDataframe``.
        func : callable
            The function to apply.
        right : list of np.ndarray
            The list of partitions of other ``PandasDataframe``.

        Returns
        -------
        np.ndarray
            A NumPy array with new partitions.
        """
        func = cls.preprocess_func(func)

        def get_right_block(right_partitions, row_idx, col_idx):
            partition = right_partitions[row_idx][col_idx]
            blocks = partition.list_of_blocks
            """
            NOTE:
            Currently we do one remote call per right virtual partition to
            materialize the partitions' blocks, then another remote call to do
            the n_ary operation. we could get better performance if we
            assembled the other partition within the remote `apply` call, by
            passing the partition in as `other_axis_partition`. However,
            passing `other_axis_partition` requires some extra care that would
            complicate the code quite a bit:
            - block partitions don't know how to deal with `other_axis_partition`
            - the right axis partition's axis could be different from the axis
              of the corresponding left partition
            - there can be multiple other_axis_partition because this is an n-ary
              operation and n can be > 2.
            So for now just do the materialization in a separate remote step.
            """
            if len(blocks) > 1:
                partition.force_materialization()
            assert len(partition.list_of_blocks) == 1
            return partition.list_of_blocks[0]

        return np.array(
            [
                [
                    part.apply(
                        func,
                        *(
                            get_right_block(right_partitions, row_idx, col_idx)
                            for right_partitions in right
                        ),
                    )
                    for col_idx, part in enumerate(left[row_idx])
                ]
                for row_idx in range(len(left))
            ]
        )

    @classmethod
    def finalize(cls, partitions):
        """
        Perform all deferred calls on partitions.

        Parameters
        ----------
        partitions : np.ndarray
            Partitions of Modin Dataframe on which all deferred calls should be performed.
        """
        [part.drain_call_queue() for row in partitions for part in row]

    @classmethod
    def rebalance_partitions(cls, partitions):
        """
        Rebalance a 2-d array of partitions if we are using ``PandasOnRay`` or ``PandasOnDask`` executions.

        For all other executions, the partitions are returned unchanged.

        Rebalance the partitions by building a new array
        of partitions out of the original ones so that:

        - If all partitions have a length, each new partition has roughly the same number of rows.
        - Otherwise, each new partition spans roughly the same number of old partitions.

        Parameters
        ----------
        partitions : np.ndarray
            The 2-d array of partitions to rebalance.

        Returns
        -------
        np.ndarray
            A NumPy array with the same; or new, rebalanced, partitions, depending on the execution
            engine and storage format.
        list[int] or None
            Row lengths if possible to compute it.
        """
        # We rebalance when the ratio of the number of existing partitions to
        # the ideal number of partitions is larger than this threshold. The
        # threshold is a heuristic that may need to be tuned for performance.
        max_excess_of_num_partitions = 1.5
        num_existing_partitions = partitions.shape[0]
        ideal_num_new_partitions = NPartitions.get()
        if (
            num_existing_partitions
            <= ideal_num_new_partitions * max_excess_of_num_partitions
        ):
            return partitions, None
        # If any partition has an unknown length, give each axis partition
        # roughly the same number of row partitions. We use `_length_cache` here
        # to avoid materializing any unmaterialized lengths.
        if any(
            partition._length_cache is None for row in partitions for partition in row
        ):
            # We need each partition to go into an axis partition, but the
            # number of axis partitions may not evenly divide the number of
            # partitions.
            chunk_size = compute_chunksize(
                num_existing_partitions, ideal_num_new_partitions, min_block_size=1
            )
            new_partitions = np.array(
                [
                    cls.column_partitions(
                        partitions[i : i + chunk_size],
                        full_axis=False,
                    )
                    for i in range(
                        0,
                        num_existing_partitions,
                        chunk_size,
                    )
                ]
            )
            return new_partitions, None

        # If we know the number of rows in every partition, then we should try
        # instead to give each new partition roughly the same number of rows.
        new_partitions = []
        # `start` is the index of the first existing partition that we want to
        # put into the current new partition.
        start = 0
        total_rows = sum(part.length() for part in partitions[:, 0])
        ideal_partition_size = compute_chunksize(
            total_rows, ideal_num_new_partitions, min_block_size=1
        )
        for _ in range(ideal_num_new_partitions):
            # We might pick up old partitions too quickly and exhaust all of them.
            if start >= len(partitions):
                break
            # `stop` is the index of the last existing partition so far that we
            # want to put into the current new partition.
            stop = start
            partition_size = partitions[start][0].length()
            # Add existing partitions into the current new partition until the
            # number of rows in the new partition hits `ideal_partition_size`.
            while stop < len(partitions) and partition_size < ideal_partition_size:
                stop += 1
                if stop < len(partitions):
                    partition_size += partitions[stop][0].length()
            # If the new partition is larger than we want, split the last
            # current partition that it contains into two partitions, where
            # the first partition has just enough rows to make the current
            # new partition have length `ideal_partition_size`, and the second
            # partition has the remainder.
            if partition_size > ideal_partition_size * max_excess_of_num_partitions:
                prev_length = sum(row[0].length() for row in partitions[start:stop])
                new_last_partition_size = ideal_partition_size - prev_length
                partitions = np.insert(
                    partitions,
                    stop + 1,
                    [
                        obj.mask(slice(new_last_partition_size, None), slice(None))
                        for obj in partitions[stop]
                    ],
                    0,
                )
                # TODO: explicit `_length_cache` computing may be avoided after #4903 is merged
                for obj in partitions[stop + 1]:
                    obj._length_cache = partition_size - (
                        prev_length + new_last_partition_size
                    )

                partitions[stop, :] = [
                    obj.mask(slice(None, new_last_partition_size), slice(None))
                    for obj in partitions[stop]
                ]
                # TODO: explicit `_length_cache` computing may be avoided after #4903 is merged
                for obj in partitions[stop]:
                    obj._length_cache = new_last_partition_size

            # The new virtual partitions are not `full_axis`, even if they
            # happen to span all rows in the dataframe, because they are
            # meant to be the final partitions of the dataframe. They've
            # already been split up correctly along axis 0, but using the
            # default full_axis=True would cause partition.apply() to split
            # its result along axis 0.
            new_partitions.append(
                cls.column_partitions(partitions[start : stop + 1], full_axis=False)
            )
            start = stop + 1
        new_partitions = np.array(new_partitions)
        lengths = [part.length() for part in new_partitions[:, 0]]
        return new_partitions, lengths

    @classmethod
    @wait_computations_if_benchmark_mode
    def shuffle_partitions(
        cls,
        partitions,
        index,
        shuffle_functions: "ShuffleFunctions",
        final_shuffle_func,
        right_partitions=None,
    ):
        """
        Return shuffled partitions.

        Parameters
        ----------
        partitions : np.ndarray
            The 2-d array of partitions to shuffle.
        index : int or list of ints
            The index(es) of the column partitions corresponding to the partitions that contain the column to sample.
        shuffle_functions : ShuffleFunctions
            An object implementing the functions that we will be using to perform this shuffle.
        final_shuffle_func : Callable(pandas.DataFrame) -> pandas.DataFrame
            Function that shuffles the data within each new partition.
        right_partitions : np.ndarray, optional
            Partitions to broadcast to `self` partitions. If specified, the method builds range-partitioning
            for `right_partitions` basing on bins calculated for `partitions`, then performs broadcasting.

        Returns
        -------
        np.ndarray
            A list of row-partitions that have been shuffled.
        """
        # Mask the partition that contains the column that will be sampled.
        masked_partitions = partitions[:, index]
        # Sample each partition
        sample_func = cls.preprocess_func(shuffle_functions.sample_fn)
        if masked_partitions.ndim == 1:
            samples = [partition.apply(sample_func) for partition in masked_partitions]
        else:
            samples = [
                cls._row_partition_class(row_part, full_axis=False).apply(sample_func)
                for row_part in masked_partitions
            ]
        # Get each sample to pass in to the pivot function
        samples = cls.get_objects_from_partitions(samples)
        num_bins = shuffle_functions.pivot_fn(samples)
        # Convert our list of block partitions to row partitions. We need to create full-axis
        # row partitions since we need to send the whole partition to the split step as otherwise
        # we wouldn't know how to split the block partitions that don't contain the shuffling key.
        row_partitions = cls.row_partitions(partitions)
        if num_bins > 1:
            # Gather together all of the sub-partitions
            split_row_partitions = np.array(
                [
                    partition.split(
                        shuffle_functions.split_fn,
                        num_splits=num_bins,
                        # The partition's metadata will never be accessed for the split partitions,
                        # thus no need to compute it.
                        extract_metadata=False,
                    )
                    for partition in row_partitions
                ]
            ).T

            if right_partitions is None:
                # We need to convert every partition that came from the splits into a column partition.
                return np.array(
                    [
                        [
                            cls._column_partitions_class(
                                row_partition, full_axis=False
                            ).apply(final_shuffle_func)
                        ]
                        for row_partition in split_row_partitions
                    ]
                )

            right_row_parts = cls.row_partitions(right_partitions)
            right_split_row_partitions = np.array(
                [
                    partition.split(
                        shuffle_functions.split_fn,
                        num_splits=num_bins,
                        extract_metadata=False,
                    )
                    for partition in right_row_parts
                ]
            ).T
            return np.array(
                [
                    cls._column_partitions_class(row_partition, full_axis=False).apply(
                        final_shuffle_func,
                        other_axis_partition=cls._column_partitions_class(
                            right_row_partitions
                        ),
                    )
                    for right_row_partitions, row_partition in zip(
                        right_split_row_partitions, split_row_partitions
                    )
                ]
            )

        else:
            # If there are not pivots we can simply apply the function row-wise
            if right_partitions is None:
                return np.array(
                    [row_part.apply(final_shuffle_func) for row_part in row_partitions]
                )
            right_row_parts = cls.row_partitions(right_partitions)
            return np.array(
                [
                    row_part.apply(
                        final_shuffle_func, other_axis_partition=right_row_part
                    )
                    for right_row_part, row_part in zip(right_row_parts, row_partitions)
                ]
            )


================================================
FILE: modin/core/dataframe/pandas/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


"""Collection of utility functions for the PandasDataFrame."""

import pandas
from pandas.api.types import union_categoricals

from modin.error_message import ErrorMessage


def concatenate(dfs, copy=True):
    """
    Concatenate pandas DataFrames with saving 'category' dtype.

    All dataframes' columns must be equal to each other.

    Parameters
    ----------
    dfs : list
        List of pandas DataFrames to concatenate.
    copy : bool, default: True
        Make explicit copy when creating dataframe.

    Returns
    -------
    pandas.DataFrame
        A pandas DataFrame.
    """
    for df in dfs:
        assert df.columns.equals(dfs[0].columns)
    for i in dfs[0].columns.get_indexer_for(dfs[0].select_dtypes("category").columns):
        columns = [df.iloc[:, i] for df in dfs]
        all_categorical_parts_are_empty = None
        has_non_categorical_parts = False
        for col in columns:
            if isinstance(col.dtype, pandas.CategoricalDtype):
                if all_categorical_parts_are_empty is None:
                    all_categorical_parts_are_empty = len(col) == 0
                    continue
                all_categorical_parts_are_empty &= len(col) == 0
            else:
                has_non_categorical_parts = True
        # 'union_categoricals' raises an error if some of the passed values don't have categorical dtype,
        # if it happens, we only want to continue when all parts with categorical dtypes are actually empty.
        # This can happen if there were an aggregation that discards categorical dtypes and that aggregation
        # doesn't properly do so for empty partitions
        if has_non_categorical_parts and all_categorical_parts_are_empty:
            continue
        union = union_categoricals(columns)
        for df in dfs:
            df.isetitem(
                i, pandas.Categorical(df.iloc[:, i], categories=union.categories)
            )
    # `ValueError: buffer source array is read-only` if copy==False
    if len(dfs) == 1 and copy:
        # concat doesn't make a copy if len(dfs) == 1,
        # so do it explicitly
        return dfs[0].copy()
    return pandas.concat(dfs, copy=copy)


def create_pandas_df_from_partitions(
    partition_data,
    partition_shape,
    called_from_remote=False,
    new_index=None,
    new_columns=None,
):
    """
    Convert partition data of multiple dataframes to a single dataframe.

    Parameters
    ----------
    partition_data : list
        List of pandas DataFrames or list of Object references holding pandas DataFrames.
    partition_shape : int or tuple
        Shape of the partitions NumPy array.
    called_from_remote : bool, default: False
        Flag used to check if explicit copy should be done in concat.
    new_index : pandas.Index, optional
        Index for propagation into internal partitions.
        Optimization allowing to do this in one remote kernel.
    new_columns : pandas.Index, optional
        Columns for propagation into internal partitions.
        Optimization allowing to do this in one remote kernel.

    Returns
    -------
    pandas.DataFrame
        A pandas DataFrame.
    """
    if all(
        isinstance(obj, (pandas.DataFrame, pandas.Series)) for obj in partition_data
    ):
        height, width, *_ = tuple(partition_shape) + (0,)
        # restore 2d array
        objs = iter(partition_data)
        partition_data = [[next(objs) for _ in range(width)] for __ in range(height)]
    else:
        # Partitions do not always contain pandas objects.
        # This implementation comes from the fact that calling `partition.get`
        # function is not always equivalent to `partition.to_pandas`.
        partition_data = [[obj.to_pandas() for obj in part] for part in partition_data]
    if all(isinstance(part, pandas.Series) for row in partition_data for part in row):
        axis = 0
    elif all(
        isinstance(part, pandas.DataFrame) for row in partition_data for part in row
    ):
        axis = 1
    else:
        ErrorMessage.catch_bugs_and_request_email(True)

    def is_part_empty(part):
        return part.empty and (
            not isinstance(part, pandas.DataFrame) or (len(part.columns) == 0)
        )

    df_rows = [
        pandas.concat([part for part in row], axis=axis, copy=False)
        for row in partition_data
        if not all(is_part_empty(part) for part in row)
    ]

    # to reduce peak memory consumption
    del partition_data

    if len(df_rows) == 0:
        res = pandas.DataFrame()
    else:
        res = concatenate(df_rows, copy=not called_from_remote)

    if new_index is not None:
        res.index = new_index
    if new_columns is not None:
        res.columns = new_columns

    return res


================================================
FILE: modin/core/execution/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to execution engines supported."""


================================================
FILE: modin/core/execution/dask/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Dask execution engine."""


================================================
FILE: modin/core/execution/dask/common/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Common utilities for Dask execution engine."""

from .engine_wrapper import DaskWrapper
from .utils import initialize_dask

__all__ = [
    "initialize_dask",
    "DaskWrapper",
]


================================================
FILE: modin/core/execution/dask/common/engine_wrapper.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class responsible for execution of remote operations."""

from collections import UserDict

import pandas
from dask.distributed import wait
from distributed import Future
from distributed.client import default_client
from distributed.worker import get_worker


def get_dask_client():
    """
    Get the Dask client, reusing the worker's client if execution is on a Dask worker.

    Returns
    -------
    distributed.Client
        The Dask client.
    """
    try:
        client = default_client()
    except ValueError:
        # We ought to be in a worker process
        worker = get_worker()
        client = worker.client
    return client


def _deploy_dask_func(func, *args, return_pandas_df=None, **kwargs):  # pragma: no cover
    """
    Wrap `func` to ease calling it remotely.

    Parameters
    ----------
    func : callable
        A local function that we want to call remotely.
    *args : iterable
        Positional arguments to pass to `func` when calling remotely.
    return_pandas_df : bool, optional
        Whether to convert the result of `func` to a pandas DataFrame or not.
    **kwargs : dict
        Keyword arguments to pass to `func` when calling remotely.

    Returns
    -------
    distributed.Future or list
        Dask identifier of the result being put into distributed memory.
    """
    result = func(*args, **kwargs)
    if return_pandas_df and not isinstance(result, pandas.DataFrame):
        result = pandas.DataFrame(result)
    return result


class DaskWrapper:
    """The class responsible for execution of remote operations."""

    @classmethod
    def deploy(
        cls,
        func,
        f_args=None,
        f_kwargs=None,
        return_pandas_df=None,
        num_returns=1,
        pure=True,
    ):
        """
        Deploy a function in a worker process.

        Parameters
        ----------
        func : callable or distributed.Future
            Function to be deployed in a worker process.
        f_args : list or tuple, optional
            Positional arguments to pass to ``func``.
        f_kwargs : dict, optional
            Keyword arguments to pass to ``func``.
        return_pandas_df : bool, optional
            Whether to convert the result of `func` to a pandas DataFrame or not.
        num_returns : int, default: 1
            The number of returned objects.
        pure : bool, default: True
            Whether or not `func` is pure. See `Client.submit` for details.

        Returns
        -------
        list
            The result of ``func`` split into parts in accordance with ``num_returns``.
        """
        client = get_dask_client()
        args = [] if f_args is None else f_args
        kwargs = {} if f_kwargs is None else f_kwargs
        if callable(func):
            remote_task_future = client.submit(func, *args, pure=pure, **kwargs)
        else:
            # for the case where type(func) is distributed.Future
            remote_task_future = client.submit(
                _deploy_dask_func,
                func,
                *args,
                pure=pure,
                return_pandas_df=return_pandas_df,
                **kwargs,
            )
        if num_returns != 1:
            return [
                client.submit(lambda tup, i: tup[i], remote_task_future, i)
                for i in range(num_returns)
            ]
        return remote_task_future

    @classmethod
    def is_future(cls, item):
        """
        Check if the item is a Future.

        Parameters
        ----------
        item : distributed.Future or object
            Future or object to check.

        Returns
        -------
        boolean
            If the value is a future.
        """
        return isinstance(item, Future)

    @classmethod
    def materialize(cls, future):
        """
        Materialize data matching `future` object.

        Parameters
        ----------
        future : distributed.Future or list
            Future object of list of future objects whereby data needs to be materialized.

        Returns
        -------
        Any
            An object(s) from the distributed memory.
        """
        client = get_dask_client()
        return client.gather(future)

    @classmethod
    def put(cls, data, **kwargs):
        """
        Put data into distributed memory.

        Parameters
        ----------
        data : list, dict, or object
            Data to scatter out to workers. Output type matches input type.
        **kwargs : dict
            Additional keyword arguments to be passed in `Client.scatter`.

        Returns
        -------
        List, dict, iterator, or queue of futures matching the type of input.
        """
        if isinstance(data, dict):
            # there is a bug that looks similar to https://github.com/dask/distributed/issues/3965;
            # to avoid this we could change behaviour for serialization:
            # <Future: finished, type: collections.UserDict, key: UserDict-b8a15c164319c1d32fd28481125de455>
            # vs
            # {'sep': <Future: finished, type: pandas._libs.lib._NoDefault, key: sep>, \
            #  'delimiter': <Future: finished, type: NoneType, key: delimiter> ...
            data = UserDict(data)
        client = get_dask_client()
        return client.scatter(data, **kwargs)

    @classmethod
    def wait(cls, obj_ids, num_returns=None):
        """
        Wait on the objects without materializing them (blocking operation).

        Parameters
        ----------
        obj_ids : list, scalar
        num_returns : int, optional
        """
        if not isinstance(obj_ids, list):
            obj_ids = [obj_ids]
        if num_returns is None:
            num_returns = len(obj_ids)
        if num_returns == len(obj_ids):
            wait(obj_ids, return_when="ALL_COMPLETED")
        else:
            # Dask doesn't natively support `num_returns` as int.
            # `wait` function doesn't always return only one finished future,
            # so a simple loop is not enough here
            done, not_done = wait(obj_ids, return_when="FIRST_COMPLETED")
            while len(done) < num_returns and (i := 0 < num_returns):
                extra_done, not_done = wait(not_done, return_when="FIRST_COMPLETED")
                done.update(extra_done)
                i += 1


================================================
FILE: modin/core/execution/dask/common/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses utility function to initialize Dask environment."""

import os

from modin.config import (
    CIAWSAccessKeyID,
    CIAWSSecretAccessKey,
    CpuCount,
    DaskThreadsPerWorker,
    GithubCI,
    Memory,
    NPartitions,
)
from modin.core.execution.utils import set_env


def initialize_dask():
    """Initialize Dask environment."""
    from distributed.client import default_client
    from distributed.worker import get_worker

    try:
        # Check if running within a Dask worker process
        get_worker()
        # If the above line does not raise an error, we are in a worker process
        # and should not create a new client
        return
    except ValueError:
        # Not in a Dask worker, proceed to check for or create a client
        pass

    try:
        client = default_client()

        def _disable_warnings():
            import warnings

            warnings.simplefilter("ignore", category=FutureWarning)

        client.run(_disable_warnings)

    except ValueError:
        from distributed import Client

        num_cpus = CpuCount.get()
        threads_per_worker = DaskThreadsPerWorker.get()
        memory_limit = Memory.get()
        worker_memory_limit = memory_limit // num_cpus if memory_limit else "auto"

        # when the client is initialized, environment variables are inherited
        with set_env(PYTHONWARNINGS="ignore::FutureWarning"):
            client = Client(
                n_workers=num_cpus,
                threads_per_worker=threads_per_worker,
                memory_limit=worker_memory_limit,
            )

        if GithubCI.get():
            # set these keys to run tests that write to the mock s3 service. this seems
            # to be the way to pass environment variables to the workers:
            # https://jacobtomlinson.dev/posts/2021/bio-for-2021/
            access_key = CIAWSAccessKeyID.get()
            aws_secret = CIAWSSecretAccessKey.get()
            client.run(
                lambda: os.environ.update(
                    {
                        "AWS_ACCESS_KEY_ID": access_key,
                        "AWS_SECRET_ACCESS_KEY": aws_secret,
                    }
                )
            )

    num_cpus = len(client.ncores())
    NPartitions._put(num_cpus)
    CpuCount._put(num_cpus)


================================================
FILE: modin/core/execution/dask/implementations/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Dask execution engine and optimized for specific storage formats."""


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Dask execution engine and optimized for pandas storage format."""


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe class optimized for pandas on Dask execution."""

from .dataframe import PandasOnDaskDataframe

__all__ = ["PandasOnDaskDataframe"]


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/dataframe/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that implements ``PandasDataframe``."""

from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
from modin.utils import _inherit_docstrings

from ..partitioning.partition_manager import PandasOnDaskDataframePartitionManager


class PandasOnDaskDataframe(PandasDataframe):
    """
    The class implements the interface in ``PandasDataframe``.

    Parameters
    ----------
    partitions : np.ndarray
        A 2D NumPy array of partitions.
    index : sequence
        The index for the dataframe. Converted to a pandas.Index.
    columns : sequence
        The columns object for the dataframe. Converted to a pandas.Index.
    row_lengths : list, optional
        The length of each partition in the rows. The "height" of
        each of the block partitions. Is computed if not provided.
    column_widths : list, optional
        The width of each partition in the columns. The "width" of
        each of the block partitions. Is computed if not provided.
    dtypes : pandas.Series, optional
        The data types for the dataframe columns.
    pandas_backend : {"pyarrow", None}, optional
        Backend used by pandas. None - means default NumPy backend.
    """

    _partition_mgr_cls = PandasOnDaskDataframePartitionManager

    @classmethod
    def reconnect(cls, address, attributes):  # noqa: GL08
        # The main goal is to configure the client for the worker process
        # using the address passed by the custom `__reduce__` function
        try:
            from distributed import default_client

            default_client()
        except ValueError:
            from distributed import Client

            # setup `default_client` for worker process
            _ = Client(address)
        obj = cls.__new__(cls)
        obj.__dict__.update(attributes)
        return obj

    def __reduce__(self):  # noqa: GL08
        from distributed import default_client

        address = default_client().scheduler_info()["address"]
        return self.reconnect, (address, self.__dict__)

    @property
    @_inherit_docstrings(PandasDataframe.engine)
    def engine(self) -> str:
        return "Dask"


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base IO classes optimized for pandas on Dask execution."""

from .io import PandasOnDaskIO

__all__ = [
    "PandasOnDaskIO",
]


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/io/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that implements ``BaseIO`` using Dask as an execution engine."""

import numpy as np
from distributed.client import default_client

from modin.core.execution.dask.common import DaskWrapper
from modin.core.execution.dask.implementations.pandas_on_dask.dataframe import (
    PandasOnDaskDataframe,
)
from modin.core.execution.dask.implementations.pandas_on_dask.partitioning import (
    PandasOnDaskDataframePartition,
)
from modin.core.io import (
    BaseIO,
    CSVDispatcher,
    ExcelDispatcher,
    FeatherDispatcher,
    FWFDispatcher,
    JSONDispatcher,
    ParquetDispatcher,
    SQLDispatcher,
)
from modin.core.storage_formats.pandas.parsers import (
    PandasCSVParser,
    PandasExcelParser,
    PandasFeatherParser,
    PandasFWFParser,
    PandasJSONParser,
    PandasParquetParser,
    PandasSQLParser,
)
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
from modin.distributed.dataframe.pandas.partitions import (
    from_partitions,
    unwrap_partitions,
)
from modin.experimental.core.io import (
    ExperimentalCSVGlobDispatcher,
    ExperimentalCustomTextDispatcher,
    ExperimentalGlobDispatcher,
    ExperimentalSQLDispatcher,
)
from modin.experimental.core.storage_formats.pandas.parsers import (
    ExperimentalCustomTextParser,
    ExperimentalPandasCSVGlobParser,
    ExperimentalPandasJsonParser,
    ExperimentalPandasParquetParser,
    ExperimentalPandasPickleParser,
    ExperimentalPandasXmlParser,
)
from modin.pandas.series import Series
from modin.utils import MODIN_UNNAMED_SERIES_LABEL


class PandasOnDaskIO(BaseIO):
    """The class implements interface in ``BaseIO`` using Dask as an execution engine."""

    frame_cls = PandasOnDaskDataframe
    frame_partition_cls = PandasOnDaskDataframePartition
    query_compiler_cls = PandasQueryCompiler
    build_args = dict(
        frame_cls=PandasOnDaskDataframe,
        frame_partition_cls=PandasOnDaskDataframePartition,
        query_compiler_cls=PandasQueryCompiler,
        base_io=BaseIO,
    )

    def __make_read(*classes, build_args=build_args):
        # used to reduce code duplication
        return type("", (DaskWrapper, *classes), build_args).read

    def __make_write(*classes, build_args=build_args):
        # used to reduce code duplication
        return type("", (DaskWrapper, *classes), build_args).write

    read_csv = __make_read(PandasCSVParser, CSVDispatcher)
    read_fwf = __make_read(PandasFWFParser, FWFDispatcher)
    read_json = __make_read(PandasJSONParser, JSONDispatcher)
    read_parquet = __make_read(PandasParquetParser, ParquetDispatcher)
    to_parquet = __make_write(ParquetDispatcher)
    # Blocked on pandas-dev/pandas#12236. It is faster to default to pandas.
    # read_hdf = __make_read(PandasHDFParser, HDFReader)
    read_feather = __make_read(PandasFeatherParser, FeatherDispatcher)
    read_sql = __make_read(PandasSQLParser, SQLDispatcher)
    to_sql = __make_write(SQLDispatcher)
    read_excel = __make_read(PandasExcelParser, ExcelDispatcher)

    # experimental methods that don't exist in pandas
    read_csv_glob = __make_read(
        ExperimentalPandasCSVGlobParser, ExperimentalCSVGlobDispatcher
    )
    read_parquet_glob = __make_read(
        ExperimentalPandasParquetParser, ExperimentalGlobDispatcher
    )
    to_parquet_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": BaseIO.to_parquet},
    )
    read_json_glob = __make_read(
        ExperimentalPandasJsonParser, ExperimentalGlobDispatcher
    )
    to_json_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": BaseIO.to_json},
    )
    read_xml_glob = __make_read(ExperimentalPandasXmlParser, ExperimentalGlobDispatcher)
    to_xml_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": BaseIO.to_xml},
    )
    read_pickle_glob = __make_read(
        ExperimentalPandasPickleParser, ExperimentalGlobDispatcher
    )
    to_pickle_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": BaseIO.to_pickle},
    )
    read_custom_text = __make_read(
        ExperimentalCustomTextParser, ExperimentalCustomTextDispatcher
    )
    read_sql_distributed = __make_read(
        ExperimentalSQLDispatcher, build_args={**build_args, "base_read": read_sql}
    )

    del __make_read  # to not pollute class namespace
    del __make_write  # to not pollute class namespace

    @classmethod
    def from_dask(cls, dask_obj):
        """
        Create a Modin `query_compiler` from a Dask DataFrame.

        Parameters
        ----------
        dask_obj : dask.dataframe.DataFrame
            The Dask DataFrame to convert from.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the Dask DataFrame.
        """
        client = default_client()
        dask_fututures = client.compute(dask_obj.to_delayed())
        modin_df = from_partitions(dask_fututures, axis=0)._query_compiler
        return modin_df

    @classmethod
    def to_dask(cls, modin_obj):
        """
        Convert a Modin DataFrame/Series to a Dask DataFrame/Series.

        Parameters
        ----------
        modin_obj : modin.pandas.DataFrame, modin.pandas.Series
            The Modin DataFrame/Series to convert.

        Returns
        -------
        dask.dataframe.DataFrame or dask.dataframe.Series
            Converted object with type depending on input.
        """
        from dask.dataframe import from_delayed

        partitions = unwrap_partitions(modin_obj, axis=0)

        # partiotions must be converted to pandas Series
        if isinstance(modin_obj, Series):
            client = default_client()

            def df_to_series(df):
                series = df[df.columns[0]]
                if df.columns[0] == MODIN_UNNAMED_SERIES_LABEL:
                    series.name = None
                return series

            partitions = [client.submit(df_to_series, part) for part in partitions]

        return from_delayed(partitions)

    @classmethod
    def from_map(cls, func, iterable, *args, **kwargs):
        """
        Create a Modin `query_compiler` from a map function.

        This method will construct a Modin `query_compiler` split by row partitions.
        The number of row partitions matches the number of elements in the iterable object.

        Parameters
        ----------
        func : callable
            Function to map across the iterable object.
        iterable : Iterable
            An iterable object.
        *args : tuple
            Positional arguments to pass in `func`.
        **kwargs : dict
            Keyword arguments to pass in `func`.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data returned by map function.
        """
        func = cls.frame_cls._partition_mgr_cls.preprocess_func(func)
        partitions = np.array(
            [
                [
                    cls.frame_partition_cls(
                        DaskWrapper.deploy(
                            func,
                            f_args=(obj,) + args,
                            f_kwargs=kwargs,
                            return_pandas_df=True,
                        )
                    )
                ]
                for obj in iterable
            ]
        )
        return cls.query_compiler_cls(cls.frame_cls(partitions))


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/partitioning/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe classes related to its partitioning and optimized for pandas on Dask execution."""

from .partition import PandasOnDaskDataframePartition
from .partition_manager import PandasOnDaskDataframePartitionManager
from .virtual_partition import (
    PandasOnDaskDataframeColumnPartition,
    PandasOnDaskDataframeRowPartition,
    PandasOnDaskDataframeVirtualPartition,
)

__all__ = [
    "PandasOnDaskDataframePartition",
    "PandasOnDaskDataframePartitionManager",
    "PandasOnDaskDataframeVirtualPartition",
    "PandasOnDaskDataframeColumnPartition",
    "PandasOnDaskDataframeRowPartition",
]


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that wraps data (block partition) and its metadata."""

import pandas
from distributed import Future
from distributed.utils import get_ip

from modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition
from modin.core.execution.dask.common import DaskWrapper
from modin.logging import get_logger
from modin.pandas.indexing import compute_sliced_len


class PandasOnDaskDataframePartition(PandasDataframePartition):
    """
    The class implements the interface in ``PandasDataframePartition``.

    Parameters
    ----------
    data : distributed.Future
        A reference to pandas DataFrame that need to be wrapped with this class.
    length : distributed.Future or int, optional
        Length or reference to it of wrapped pandas DataFrame.
    width : distributed.Future or int, optional
        Width or reference to it of wrapped pandas DataFrame.
    ip : distributed.Future or str, optional
        Node IP address or reference to it that holds wrapped pandas DataFrame.
    call_queue : list, optional
        Call queue that needs to be executed on wrapped pandas DataFrame.
    """

    execution_wrapper = DaskWrapper

    def __init__(self, data, length=None, width=None, ip=None, call_queue=None):
        super().__init__()
        assert isinstance(data, Future)
        self._data = data
        if call_queue is None:
            call_queue = []
        self.call_queue = call_queue
        self._length_cache = length
        self._width_cache = width
        self._ip_cache = ip

        log = get_logger()
        self._is_debug(log) and log.debug(
            "Partition ID: {}, Height: {}, Width: {}, Node IP: {}".format(
                self._identity,
                str(self._length_cache),
                str(self._width_cache),
                str(self._ip_cache),
            )
        )

    def apply(self, func, *args, **kwargs):
        """
        Apply a function to the object wrapped by this partition.

        Parameters
        ----------
        func : callable or distributed.Future
            A function to apply.
        *args : iterable
            Additional positional arguments to be passed in `func`.
        **kwargs : dict
            Additional keyword arguments to be passed in `func`.

        Returns
        -------
        PandasOnDaskDataframePartition
            A new ``PandasOnDaskDataframePartition`` object.

        Notes
        -----
        The keyword arguments are sent as a dictionary.
        """
        log = get_logger()
        self._is_debug(log) and log.debug(f"ENTER::Partition.apply::{self._identity}")
        call_queue = self.call_queue + [[func, args, kwargs]]
        if len(call_queue) > 1:
            self._is_debug(log) and log.debug(
                f"SUBMIT::_apply_list_of_funcs::{self._identity}"
            )
            futures = self.execution_wrapper.deploy(
                func=apply_list_of_funcs,
                f_args=(call_queue, self._data),
                num_returns=2,
                pure=False,
            )
        else:
            # We handle `len(call_queue) == 1` in a different way because
            # this improves performance a bit.
            func, f_args, f_kwargs = call_queue[0]
            futures = self.execution_wrapper.deploy(
                func=apply_func,
                f_args=(self._data, func, *f_args),
                f_kwargs=f_kwargs,
                num_returns=2,
                pure=False,
            )
            self._is_debug(log) and log.debug(f"SUBMIT::_apply_func::{self._identity}")
        self._is_debug(log) and log.debug(f"EXIT::Partition.apply::{self._identity}")
        return self.__constructor__(futures[0], ip=futures[1])

    def drain_call_queue(self):
        """Execute all operations stored in the call queue on the object wrapped by this partition."""
        log = get_logger()
        self._is_debug(log) and log.debug(
            f"ENTER::Partition.drain_call_queue::{self._identity}"
        )
        if len(self.call_queue) == 0:
            return
        call_queue = self.call_queue
        if len(call_queue) > 1:
            self._is_debug(log) and log.debug(
                f"SUBMIT::_apply_list_of_funcs::{self._identity}"
            )
            futures = self.execution_wrapper.deploy(
                func=apply_list_of_funcs,
                f_args=(call_queue, self._data),
                num_returns=2,
                pure=False,
            )
        else:
            # We handle `len(call_queue) == 1` in a different way because
            # this improves performance a bit.
            func, f_args, f_kwargs = call_queue[0]
            self._is_debug(log) and log.debug(f"SUBMIT::_apply_func::{self._identity}")
            futures = self.execution_wrapper.deploy(
                func=apply_func,
                f_args=(self._data, func, *f_args),
                f_kwargs=f_kwargs,
                num_returns=2,
                pure=False,
            )
        self._data = futures[0]
        self._ip_cache = futures[1]
        self._is_debug(log) and log.debug(
            f"EXIT::Partition.drain_call_queue::{self._identity}"
        )
        self.call_queue = []

    def wait(self):
        """Wait completing computations on the object wrapped by the partition."""
        self.drain_call_queue()
        self.execution_wrapper.wait(self._data)

    def mask(self, row_labels, col_labels):
        """
        Lazily create a mask that extracts the indices provided.

        Parameters
        ----------
        row_labels : list-like, slice or label
            The row labels for the rows to extract.
        col_labels : list-like, slice or label
            The column labels for the columns to extract.

        Returns
        -------
        PandasOnDaskDataframePartition
            A new ``PandasOnDaskDataframePartition`` object.
        """
        log = get_logger()
        self._is_debug(log) and log.debug(f"ENTER::Partition.mask::{self._identity}")
        new_obj = super().mask(row_labels, col_labels)
        if isinstance(row_labels, slice) and isinstance(self._length_cache, Future):
            if row_labels == slice(None):
                # fast path - full axis take
                new_obj._length_cache = self._length_cache
            else:
                new_obj._length_cache = self.execution_wrapper.deploy(
                    func=compute_sliced_len, f_args=(row_labels, self._length_cache)
                )
        if isinstance(col_labels, slice) and isinstance(self._width_cache, Future):
            if col_labels == slice(None):
                # fast path - full axis take
                new_obj._width_cache = self._width_cache
            else:
                new_obj._width_cache = self.execution_wrapper.deploy(
                    func=compute_sliced_len, f_args=(col_labels, self._width_cache)
                )
        self._is_debug(log) and log.debug(f"EXIT::Partition.mask::{self._identity}")
        return new_obj

    def __copy__(self):
        """
        Create a copy of this partition.

        Returns
        -------
        PandasOnDaskDataframePartition
            A copy of this partition.
        """
        return self.__constructor__(
            self._data,
            length=self._length_cache,
            width=self._width_cache,
            ip=self._ip_cache,
            call_queue=self.call_queue,
        )

    @classmethod
    def put(cls, obj):
        """
        Put an object into distributed memory and wrap it with partition object.

        Parameters
        ----------
        obj : any
            An object to be put.

        Returns
        -------
        PandasOnDaskDataframePartition
            A new ``PandasOnDaskDataframePartition`` object.
        """
        return cls(
            cls.execution_wrapper.put(obj, hash=False),
            len(obj.index),
            len(obj.columns),
        )

    @classmethod
    def preprocess_func(cls, func):
        """
        Preprocess a function before an ``apply`` call.

        Parameters
        ----------
        func : callable
            The function to preprocess.

        Returns
        -------
        callable
            An object that can be accepted by ``apply``.
        """
        return cls.execution_wrapper.put(func, hash=False, broadcast=True)

    def length(self, materialize=True):
        """
        Get the length of the object wrapped by this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int or distributed.Future
            The length of the object.
        """
        if self._length_cache is None:
            self._length_cache = self.apply(len)._data
        if isinstance(self._length_cache, Future) and materialize:
            self._length_cache = self.execution_wrapper.materialize(self._length_cache)
        return self._length_cache

    def width(self, materialize=True):
        """
        Get the width of the object wrapped by the partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int or distributed.Future
            The width of the object.
        """
        if self._width_cache is None:
            self._width_cache = self.apply(lambda df: len(df.columns))._data
        if isinstance(self._width_cache, Future) and materialize:
            self._width_cache = self.execution_wrapper.materialize(self._width_cache)
        return self._width_cache

    def ip(self, materialize=True):
        """
        Get the node IP address of the object wrapped by this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        str
            IP address of the node that holds the data.
        """
        if self._ip_cache is None:
            self._ip_cache = self.apply(lambda df: pandas.DataFrame([]))._ip_cache
        if materialize and isinstance(self._ip_cache, Future):
            self._ip_cache = self.execution_wrapper.materialize(self._ip_cache)
        return self._ip_cache


def apply_func(partition, func, *args, **kwargs):
    """
    Execute a function on the partition in a worker process.

    Parameters
    ----------
    partition : pandas.DataFrame
        A pandas DataFrame the function needs to be executed on.
    func : callable
        The function to perform.
    *args : list
        Positional arguments to pass to ``func``.
    **kwargs : dict
        Keyword arguments to pass to ``func``.

    Returns
    -------
    pandas.DataFrame
        The resulting pandas DataFrame.
    str
        The node IP address of the worker process.

    Notes
    -----
    Directly passing a call queue entry (i.e. a list of [func, args, kwargs]) instead of
    destructuring it causes a performance penalty.
    """
    result = func(partition, *args, **kwargs)
    return result, get_ip()


def apply_list_of_funcs(call_queue, partition):
    """
    Execute all operations stored in the call queue on the partition in a worker process.

    Parameters
    ----------
    call_queue : list
        A call queue of ``[func, args, kwargs]`` triples that needs to be executed on the partition.
    partition : pandas.DataFrame
        A pandas DataFrame the call queue needs to be executed on.

    Returns
    -------
    pandas.DataFrame
        The resulting pandas DataFrame.
    str
        The node IP address of the worker process.
    """
    for func, f_args, f_kwargs in call_queue:
        partition = func(partition, *f_args, **f_kwargs)
    return partition, get_ip()


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition_manager.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that implements ``PandasDataframePartitionManager``."""

from modin.core.dataframe.pandas.partitioning.partition_manager import (
    PandasDataframePartitionManager,
)
from modin.core.execution.dask.common import DaskWrapper

from .partition import PandasOnDaskDataframePartition
from .virtual_partition import (
    PandasOnDaskDataframeColumnPartition,
    PandasOnDaskDataframeRowPartition,
)


class PandasOnDaskDataframePartitionManager(PandasDataframePartitionManager):
    """The class implements the interface in `PandasDataframePartitionManager`."""

    # This object uses PandasOnDaskDataframePartition objects as the underlying store.
    _partition_class = PandasOnDaskDataframePartition
    _column_partitions_class = PandasOnDaskDataframeColumnPartition
    _row_partition_class = PandasOnDaskDataframeRowPartition
    _execution_wrapper = DaskWrapper

    @classmethod
    def wait_partitions(cls, partitions):
        """
        Wait on the objects wrapped by `partitions` in parallel, without materializing them.

        This method will block until all computations in the list have completed.

        Parameters
        ----------
        partitions : np.ndarray
            NumPy array with ``PandasDataframePartition``-s.
        """
        cls._execution_wrapper.wait(
            [block for partition in partitions for block in partition.list_of_blocks]
        )


================================================
FILE: modin/core/execution/dask/implementations/pandas_on_dask/partitioning/virtual_partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses classes responsible for storing a virtual partition and applying a function to it."""

import pandas
from distributed.utils import get_ip

from modin.core.dataframe.pandas.partitioning.axis_partition import (
    PandasDataframeAxisPartition,
)
from modin.core.execution.dask.common import DaskWrapper
from modin.utils import _inherit_docstrings

from .partition import PandasOnDaskDataframePartition


class PandasOnDaskDataframeVirtualPartition(PandasDataframeAxisPartition):
    """
    The class implements the interface in ``PandasDataframeAxisPartition``.

    Parameters
    ----------
    list_of_partitions : Union[list, PandasOnDaskDataframePartition]
        List of ``PandasOnDaskDataframePartition`` and
        ``PandasOnDaskDataframeVirtualPartition`` objects, or a single
        ``PandasOnDaskDataframePartition``.
    get_ip : bool, default: False
        Whether to get node IP addresses of conforming partitions or not.
    full_axis : bool, default: True
        Whether or not the virtual partition encompasses the whole axis.
    call_queue : list, optional
        A list of tuples (callable, args, kwargs) that contains deferred calls.
    length : distributed.Future or int, optional
        Length, or reference to length, of wrapped ``pandas.DataFrame``.
    width : distributed.Future or int, optional
        Width, or reference to width, of wrapped ``pandas.DataFrame``.
    """

    axis = None
    _PARTITIONS_METADATA_LEN = 3  # (length, width, ip)
    partition_type = PandasOnDaskDataframePartition

    @property
    def list_of_ips(self):
        """
        Get the IPs holding the physical objects composing this partition.

        Returns
        -------
        List
            A list of IPs as ``distributed.Future`` or str.
        """
        # Defer draining call queue until we get the ip address
        result = [None] * len(self.list_of_block_partitions)
        for idx, partition in enumerate(self.list_of_block_partitions):
            partition.drain_call_queue()
            result[idx] = partition.ip(materialize=False)
        return result

    @classmethod
    @_inherit_docstrings(PandasDataframeAxisPartition.deploy_splitting_func)
    def deploy_splitting_func(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        *partitions,
        extract_metadata=False,
    ):
        return DaskWrapper.deploy(
            func=_deploy_dask_func,
            f_args=(
                PandasDataframeAxisPartition.deploy_splitting_func,
                axis,
                func,
                f_args,
                f_kwargs,
                num_splits,
                *partitions,
            ),
            f_kwargs={"extract_metadata": extract_metadata},
            num_returns=(
                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
                if extract_metadata
                else num_splits
            ),
            pure=False,
        )

    @classmethod
    def deploy_axis_func(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        maintain_partitioning,
        *partitions,
        min_block_size,
        lengths=None,
        manual_partition=False,
    ):
        """
        Deploy a function along a full axis.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        func : callable
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to ``func``.
        f_kwargs : dict
            Keyword arguments to pass to ``func``.
        num_splits : int
            The number of splits to return (see `split_result_of_axis_func_pandas`).
        maintain_partitioning : bool
            If True, keep the old partitioning if possible.
            If False, create a new partition layout.
        *partitions : iterable
            All partitions that make up the full axis (row or column).
        min_block_size : int
            Minimum number of rows/columns in a single split.
        lengths : iterable, default: None
            The list of lengths to shuffle the partition into.
        manual_partition : bool, default: False
            If True, partition the result with `lengths`.

        Returns
        -------
        list
            A list of distributed.Future.
        """
        result_num_splits = len(lengths) if lengths else num_splits
        return DaskWrapper.deploy(
            func=_deploy_dask_func,
            f_args=(
                PandasDataframeAxisPartition.deploy_axis_func,
                axis,
                func,
                f_args,
                f_kwargs,
                num_splits,
                maintain_partitioning,
                *partitions,
            ),
            f_kwargs={
                "min_block_size": min_block_size,
                "lengths": lengths,
                "manual_partition": manual_partition,
            },
            num_returns=result_num_splits * (1 + cls._PARTITIONS_METADATA_LEN),
            pure=False,
        )

    @classmethod
    def deploy_func_between_two_axis_partitions(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        len_of_left,
        other_shape,
        *partitions,
        min_block_size,
    ):
        """
        Deploy a function along a full axis between two data sets.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        func : callable
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to ``func``.
        f_kwargs : dict
            Keyword arguments to pass to ``func``.
        num_splits : int
            The number of splits to return (see `split_result_of_axis_func_pandas`).
        len_of_left : int
            The number of values in `partitions` that belong to the left data set.
        other_shape : np.ndarray
            The shape of right frame in terms of partitions, i.e.
            (other_shape[i-1], other_shape[i]) will indicate slice to restore i-1 axis partition.
        *partitions : iterable
            All partitions that make up the full axis (row or column) for both data sets.
        min_block_size : int
            Minimum number of rows/columns in a single split.

        Returns
        -------
        list
            A list of distributed.Future.
        """
        return DaskWrapper.deploy(
            func=_deploy_dask_func,
            f_args=(
                PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions,
                axis,
                func,
                f_args,
                f_kwargs,
                num_splits,
                len_of_left,
                other_shape,
                *partitions,
            ),
            f_kwargs={
                "min_block_size": min_block_size,
            },
            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN),
            pure=False,
        )

    def wait(self):
        """Wait completing computations on the object wrapped by the partition."""
        self.drain_call_queue()
        DaskWrapper.wait(self.list_of_blocks)


@_inherit_docstrings(PandasOnDaskDataframeVirtualPartition)
class PandasOnDaskDataframeColumnPartition(PandasOnDaskDataframeVirtualPartition):
    axis = 0


@_inherit_docstrings(PandasOnDaskDataframeVirtualPartition)
class PandasOnDaskDataframeRowPartition(PandasOnDaskDataframeVirtualPartition):
    axis = 1


def _deploy_dask_func(
    deployer,
    axis,
    f_to_deploy,
    f_args,
    f_kwargs,
    *args,
    extract_metadata=True,
    **kwargs,
):
    """
    Execute a function on an axis partition in a worker process.

    This is ALWAYS called on either ``PandasDataframeAxisPartition.deploy_axis_func``
    or ``PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions``, which both
    serve to deploy another dataframe function on a Dask worker process.

    Parameters
    ----------
    deployer : callable
        A `PandasDataFrameAxisPartition.deploy_*` method that will call `deploy_f`.
    axis : {0, 1}
        The axis to perform the function along.
    f_to_deploy : callable or RayObjectID
        The function to deploy.
    f_args : list or tuple
        Positional arguments to pass to ``f_to_deploy``.
    f_kwargs : dict
        Keyword arguments to pass to ``f_to_deploy``.
    *args : list
        Positional arguments to pass to ``func``.
    extract_metadata : bool, default: True
        Whether to return metadata (length, width, ip) of the result. Passing `False` may relax
        the load on object storage as the remote function would return 4 times fewer futures.
        Passing `False` makes sense for temporary results where you know for sure that the
        metadata will never be requested.
    **kwargs : dict
        Keyword arguments to pass to ``func``.

    Returns
    -------
    list
        The result of the function ``func`` and metadata for it.
    """
    result = deployer(axis, f_to_deploy, f_args, f_kwargs, *args, **kwargs)
    if not extract_metadata:
        return result
    ip = get_ip()
    if isinstance(result, pandas.DataFrame):
        return result, len(result), len(result.columns), ip
    elif all(isinstance(r, pandas.DataFrame) for r in result):
        return [i for r in result for i in [r, len(r), len(r.columns), ip]]
    else:
        return [i for r in result for i in [r, None, None, ip]]


================================================
FILE: modin/core/execution/dispatching/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to dispatching to specific execution."""


================================================
FILE: modin/core/execution/dispatching/factories/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Factories responsible for dispatching to specific execution."""


================================================
FILE: modin/core/execution/dispatching/factories/dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Contain IO dispatcher class.

Dispatcher routes the work to execution-specific functions.
"""

from typing import Union

from pandas._libs.lib import NoDefault, no_default

from modin.config import Backend, Engine, IsExperimental, StorageFormat
from modin.core.execution.dispatching.factories import factories
from modin.core.storage_formats.base import BaseQueryCompiler
from modin.utils import _inherit_docstrings


class FactoryNotFoundError(AttributeError):
    """
    ``FactoryNotFound`` exception class.

    Raise when no matching factory could be found.
    """

    pass


class StubIoEngine(object):
    """
    IO-Engine that does nothing more than raise NotImplementedError when any method is called.

    Parameters
    ----------
    factory_name : str
        Factory name, which will be reflected in error messages.

    Notes
    -----
    Used for testing purposes.
    """

    def __init__(self, factory_name=""):
        self.factory_name = factory_name or "Unknown"

    def __getattr__(self, name):
        """
        Return a function that raises `NotImplementedError` for the `name` method.

        Parameters
        ----------
        name : str
            Method name to indicate in `NotImplementedError`.

        Returns
        -------
        callable
        """

        def stub(*args, **kw):
            raise NotImplementedError(
                f"Method {self.factory_name}.{name} is not implemented"
            )

        return stub


class StubFactory(factories.BaseFactory):
    """
    Factory that does nothing more than raise NotImplementedError when any method is called.

    Notes
    -----
    Used for testing purposes.
    """

    io_cls = StubIoEngine()

    @classmethod
    def set_failing_name(cls, factory_name):
        """
        Fill in `.io_cls` class attribute with ``StubIoEngine`` engine.

        Parameters
        ----------
        factory_name : str
            Name to pass to the ``StubIoEngine`` constructor.
        """
        cls.io_cls = StubIoEngine(factory_name)
        return cls


class FactoryDispatcher(object):
    """
    Class that routes IO-work to the factories.

    This class is responsible for keeping selected factory up-to-date and dispatching
    calls of IO-functions to its actual execution-specific implementations.
    """

    __factory: factories.BaseFactory = None

    @classmethod
    def get_factory(cls) -> factories.BaseFactory:
        """Get current factory."""
        if cls.__factory is None:

            from modin.pandas import _initialize_engine

            Engine.subscribe(
                lambda engine_parameter: _initialize_engine(engine_parameter.get())
            )
            Backend.subscribe(cls._update_factory)
        return_value = cls.__factory
        return return_value

    @classmethod
    def _get_prepared_factory_for_backend(cls, backend) -> factories.BaseFactory:
        """
        Get factory for the specified backend.

        Parameters
        ----------
        backend : str
            Backend name.

        Returns
        -------
        factories.BaseFactory
            Factory for the specified backend.
        """
        execution = Backend.get_execution_for_backend(backend)
        from modin.pandas import _initialize_engine

        _initialize_engine(execution.engine)
        factory_name = f"{execution.storage_format}On{execution.engine}Factory"
        experimental_factory_name = "Experimental" + factory_name
        try:
            factory = getattr(factories, factory_name, None) or getattr(
                factories, experimental_factory_name
            )
        except AttributeError:
            if not IsExperimental.get():
                # allow missing factories in experimental mode only
                msg = (
                    "Cannot find neither factory {} nor experimental factory {}. "
                    + "Potential reason might be incorrect environment variable value for "
                    + f"{StorageFormat.varname} or {Engine.varname}"
                )
                raise FactoryNotFoundError(
                    msg.format(factory_name, experimental_factory_name)
                )
            factory = StubFactory.set_failing_name(factory_name)
        else:
            try:
                factory.prepare()
            except ModuleNotFoundError as err:
                raise ModuleNotFoundError(
                    f"Make sure all required packages are installed: {str(err)}"
                ) from err
        return factory

    @classmethod
    def _update_factory(cls, *args):
        """
        Update and prepare factory with a new one specified via Modin config.

        Parameters
        ----------
        *args : iterable
            This parameters serves the compatibility purpose.
            Does not affect the result.
        """
        cls.__factory = cls._get_prepared_factory_for_backend(Backend.get())

    @classmethod
    def from_pandas(
        cls, df, backend: Union[str, NoDefault] = no_default
    ) -> BaseQueryCompiler:
        """
        Create a Modin query compiler from a pandas DataFrame.

        Parameters
        ----------
        df : pandas.DataFrame
            The pandas DataFrame to convert.
        backend : str or NoDefault, default: NoDefault
            The backend to use for the resulting query compiler. If NoDefault,
            use the current global default ``Backend`` from the Modin config.

        Returns
        -------
        BaseQueryCompiler
            A Modin query compiler that wraps the input pandas DataFrame.
        """
        return (
            cls.get_factory()
            if backend is no_default
            else cls._get_prepared_factory_for_backend(backend)
        )._from_pandas(df)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._from_arrow)
    def from_arrow(cls, at):
        return cls.get_factory()._from_arrow(at)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._from_non_pandas)
    def from_non_pandas(cls, *args, **kwargs):
        return cls.get_factory()._from_non_pandas(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._from_interchange_dataframe)
    def from_interchange_dataframe(cls, *args, **kwargs):
        return cls.get_factory()._from_interchange_dataframe(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._from_ray)
    def from_ray(cls, ray_obj):
        return cls.get_factory()._from_ray(ray_obj)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._from_dask)
    def from_dask(cls, dask_obj):
        return cls.get_factory()._from_dask(dask_obj)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._from_map)
    def from_map(cls, func, iterable, *args, **kwargs):
        return cls.get_factory()._from_map(func, iterable, *args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_parquet)
    def read_parquet(cls, **kwargs):
        return cls.get_factory()._read_parquet(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_csv)
    def read_csv(cls, **kwargs):
        return cls.get_factory()._read_csv(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._read_csv_glob)
    def read_csv_glob(cls, **kwargs):
        return cls.get_factory()._read_csv_glob(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._read_pickle_glob)
    def read_pickle_glob(cls, **kwargs):
        return cls.get_factory()._read_pickle_glob(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_json)
    def read_json(cls, **kwargs):
        return cls.get_factory()._read_json(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_gbq)
    def read_gbq(cls, **kwargs):
        return cls.get_factory()._read_gbq(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_html)
    def read_html(cls, **kwargs):
        return cls.get_factory()._read_html(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_clipboard)
    def read_clipboard(cls, **kwargs):
        return cls.get_factory()._read_clipboard(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_excel)
    def read_excel(cls, **kwargs):
        return cls.get_factory()._read_excel(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_hdf)
    def read_hdf(cls, **kwargs):
        return cls.get_factory()._read_hdf(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_feather)
    def read_feather(cls, **kwargs):
        return cls.get_factory()._read_feather(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_stata)
    def read_stata(cls, **kwargs):
        return cls.get_factory()._read_stata(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_sas)
    def read_sas(cls, **kwargs):  # pragma: no cover
        return cls.get_factory()._read_sas(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_pickle)
    def read_pickle(cls, **kwargs):
        return cls.get_factory()._read_pickle(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_sql)
    def read_sql(cls, **kwargs):
        return cls.get_factory()._read_sql(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._read_sql_distributed)
    def read_sql_distributed(cls, **kwargs):
        return cls.get_factory()._read_sql_distributed(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_fwf)
    def read_fwf(cls, **kwargs):
        return cls.get_factory()._read_fwf(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_sql_table)
    def read_sql_table(cls, **kwargs):
        return cls.get_factory()._read_sql_table(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_sql_query)
    def read_sql_query(cls, **kwargs):
        return cls.get_factory()._read_sql_query(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_spss)
    def read_spss(cls, **kwargs):
        return cls.get_factory()._read_spss(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_sql)
    def to_sql(cls, *args, **kwargs):
        return cls.get_factory()._to_sql(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_pickle)
    def to_pickle(cls, *args, **kwargs):
        return cls.get_factory()._to_pickle(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._to_pickle_glob)
    def to_pickle_glob(cls, *args, **kwargs):
        return cls.get_factory()._to_pickle_glob(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._read_parquet_glob)
    def read_parquet_glob(cls, *args, **kwargs):
        return cls.get_factory()._read_parquet_glob(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._to_parquet_glob)
    def to_parquet_glob(cls, *args, **kwargs):
        return cls.get_factory()._to_parquet_glob(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._read_json_glob)
    def read_json_glob(cls, *args, **kwargs):
        return cls.get_factory()._read_json_glob(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._to_json_glob)
    def to_json_glob(cls, *args, **kwargs):
        return cls.get_factory()._to_json_glob(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._read_xml_glob)
    def read_xml_glob(cls, *args, **kwargs):
        return cls.get_factory()._read_xml_glob(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._to_xml_glob)
    def to_xml_glob(cls, *args, **kwargs):
        return cls.get_factory()._to_xml_glob(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.PandasOnRayFactory._read_custom_text)
    def read_custom_text(cls, **kwargs):
        return cls.get_factory()._read_custom_text(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_csv)
    def to_csv(cls, *args, **kwargs):
        return cls.get_factory()._to_csv(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_json)
    def to_json(cls, *args, **kwargs):
        return cls.get_factory()._to_json(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_json)
    def to_json_series(cls, *args, **kwargs):
        return cls.get_factory()._to_json_series(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_xml)
    def to_xml(cls, *args, **kwargs):
        return cls.get_factory()._to_xml(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_parquet)
    def to_parquet(cls, *args, **kwargs):
        return cls.get_factory()._to_parquet(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_ray)
    def to_ray(cls, modin_obj):
        return cls.get_factory()._to_ray(modin_obj)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_dask)
    def to_dask(cls, modin_obj):
        return cls.get_factory()._to_dask(modin_obj)


================================================
FILE: modin/core/execution/dispatching/factories/factories.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains Factories for all of the supported Modin executions.

Factory is a bridge between calls of IO function from high-level API and its
actual implementation in the execution, bound to that factory. Each execution is represented
with a Factory class.
"""

import re
import typing
import warnings

import pandas
from pandas.util._decorators import doc

from modin.core.io import BaseIO
from modin.core.storage_formats.pandas.native_query_compiler import NativeQueryCompiler
from modin.utils import get_current_execution

_doc_abstract_factory_class = """
Abstract {role} factory which allows to override the IO module easily.

This class is responsible for dispatching calls of IO-functions to its
actual execution-specific implementations.

Attributes
----------
io_cls : BaseIO
    IO module class of the underlying execution. The place to dispatch calls to.
"""

_doc_factory_class = """
Factory of {execution_name} execution.

This class is responsible for dispatching calls of IO-functions to its
actual execution-specific implementations.

Attributes
----------
io_cls : {execution_name}IO
    IO module class of the underlying execution. The place to dispatch calls to.
"""

_doc_factory_prepare_method = """
Initialize Factory.

Fills in `.io_cls` class attribute with {io_module_name} lazily.
"""

_doc_io_method_raw_template = """
Build query compiler from {source}.

Parameters
----------
{params}

Returns
-------
QueryCompiler
    Query compiler of the selected storage format.
"""

_doc_io_method_template = (
    _doc_io_method_raw_template
    + """
See Also
--------
modin.pandas.{method}
"""
)

_doc_io_method_all_params = """*args : args
    Arguments to pass to the QueryCompiler builder method.
**kwargs : kwargs
    Arguments to pass to the QueryCompiler builder method."""

_doc_io_method_kwargs_params = """**kwargs : kwargs
    Arguments to pass to the QueryCompiler builder method."""


types_dictionary = {"pandas": {"category": pandas.CategoricalDtype}}

supported_executions = (
    "PandasOnRay",
    "PandasOnUnidist",
    "PandasOnDask",
)


class FactoryInfo(typing.NamedTuple):
    """
    Structure that stores information about factory.

    Parameters
    ----------
    engine : str
        Name of underlying execution engine.
    partition : str
        Name of the partition format.
    experimental : bool
        Whether underlying engine is experimental-only.
    """

    engine: str
    partition: str
    experimental: bool


class NotRealFactory(Exception):
    """
    ``NotRealFactory`` exception class.

    Raise when no matching factory could be found.
    """

    pass


@doc(_doc_abstract_factory_class, role="")
class BaseFactory(object):
    io_cls: typing.Type[BaseIO] = None  # The module where the I/O functionality exists.

    @classmethod
    def get_info(cls) -> FactoryInfo:
        """
        Get information about current factory.

        Notes
        -----
        It parses factory name, so it must be conformant with how ``FactoryDispatcher``
        class constructs factory names.
        """
        try:
            experimental, partition, engine = re.match(
                r"^(Experimental)?(.*)On(.*)Factory$", cls.__name__
            ).groups()
        except AttributeError:
            raise NotRealFactory()
        return FactoryInfo(
            engine=engine, partition=partition, experimental=bool(experimental)
        )

    @classmethod
    @doc(
        _doc_factory_prepare_method,
        io_module_name="an underlying execution's IO-module",
    )
    def prepare(cls):
        raise NotImplementedError("Subclasses of BaseFactory must implement prepare")

    @classmethod
    @doc(
        _doc_io_method_template,
        source="pandas DataFrame",
        params="df : pandas.DataFrame",
        method="io.from_pandas",
    )
    def _from_pandas(cls, df):
        return cls.io_cls.from_pandas(df)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="Arrow Table",
        params="at : pyarrow.Table",
        method="io.from_arrow",
    )
    def _from_arrow(cls, at):
        return cls.io_cls.from_arrow(at)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a non-pandas object (dict, list, np.array etc...)",
        params=_doc_io_method_all_params,
        method="io.from_non_pandas",
    )
    def _from_non_pandas(cls, *args, **kwargs):
        return cls.io_cls.from_non_pandas(*args, **kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a DataFrame object supporting exchange protocol `__dataframe__()`",
        params=_doc_io_method_all_params,
        method="io.from_interchange_dataframe",
    )
    def _from_interchange_dataframe(cls, *args, **kwargs):
        return cls.io_cls.from_interchange_dataframe(*args, **kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a Ray Dataset",
        params="ray_obj : ray.data.Dataset",
        method="modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO.from_ray",
    )
    def _from_ray(cls, ray_obj):
        return cls.io_cls.from_ray(ray_obj)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a Dask DataFrame",
        params="dask_obj : dask.dataframe.DataFrame",
        method="modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO.from_dask",
    )
    def _from_dask(cls, dask_obj):
        return cls.io_cls.from_dask(dask_obj)

    @classmethod
    def _from_map(cls, func, iterable, *args, **kwargs):
        """
        Create a Modin `query_compiler` from a map function.

        This method will construct a Modin `query_compiler` split by row partitions.
        The number of row partitions matches the number of elements in the iterable object.

        Parameters
        ----------
        func : callable
            Function to map across the iterable object.
        iterable : Iterable
            An iterable object.
        *args : tuple
            Positional arguments to pass in `func`.
        **kwargs : dict
            Keyword arguments to pass in `func`.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data returned by map function.
        """
        return cls.io_cls.from_map(func, iterable, *args, **kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a Parquet file",
        params=_doc_io_method_kwargs_params,
        method="read_parquet",
    )
    def _read_parquet(cls, **kwargs):
        return cls.io_cls.read_parquet(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a CSV file",
        params=_doc_io_method_kwargs_params,
        method="read_csv",
    )
    def _read_csv(cls, **kwargs):
        return cls.io_cls.read_csv(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a JSON file",
        params=_doc_io_method_kwargs_params,
        method="read_json",
    )
    def _read_json(cls, **kwargs):
        return cls.io_cls.read_json(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a Google BigQuery",
        params=_doc_io_method_kwargs_params,
        method="read_gbq",
    )
    def _read_gbq(cls, **kwargs):
        return cls.io_cls.read_gbq(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="an HTML document",
        params=_doc_io_method_kwargs_params,
        method="read_html",
    )
    def _read_html(cls, **kwargs):
        return cls.io_cls.read_html(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="clipboard",
        params=_doc_io_method_kwargs_params,
        method="read_clipboard",
    )
    def _read_clipboard(cls, **kwargs):  # pragma: no cover
        return cls.io_cls.read_clipboard(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="an Excel file",
        params=_doc_io_method_kwargs_params,
        method="read_excel",
    )
    def _read_excel(cls, **kwargs):
        return cls.io_cls.read_excel(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="an HDFStore",
        params=_doc_io_method_kwargs_params,
        method="read_hdf",
    )
    def _read_hdf(cls, **kwargs):
        return cls.io_cls.read_hdf(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a feather-format object",
        params=_doc_io_method_kwargs_params,
        method="read_feather",
    )
    def _read_feather(cls, **kwargs):
        return cls.io_cls.read_feather(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a Stata file",
        params=_doc_io_method_kwargs_params,
        method="read_stata",
    )
    def _read_stata(cls, **kwargs):
        return cls.io_cls.read_stata(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a SAS file",
        params=_doc_io_method_kwargs_params,
        method="read_sas",
    )
    def _read_sas(cls, **kwargs):  # pragma: no cover
        return cls.io_cls.read_sas(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a pickled Modin or pandas DataFrame",
        params=_doc_io_method_kwargs_params,
        method="read_pickle",
    )
    def _read_pickle(cls, **kwargs):
        return cls.io_cls.read_pickle(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a SQL query or database table",
        params=_doc_io_method_kwargs_params,
        method="read_sql",
    )
    def _read_sql(cls, **kwargs):
        return cls.io_cls.read_sql(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a table of fixed-width formatted lines",
        params=_doc_io_method_kwargs_params,
        method="read_fwf",
    )
    def _read_fwf(cls, **kwargs):
        return cls.io_cls.read_fwf(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a SQL database table",
        params=_doc_io_method_kwargs_params,
        method="read_sql_table",
    )
    def _read_sql_table(cls, **kwargs):
        return cls.io_cls.read_sql_table(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="a SQL query",
        params=_doc_io_method_kwargs_params,
        method="read_sql_query",
    )
    def _read_sql_query(cls, **kwargs):
        return cls.io_cls.read_sql_query(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_template,
        source="an SPSS file",
        params=_doc_io_method_kwargs_params,
        method="read_spss",
    )
    def _read_spss(cls, **kwargs):
        return cls.io_cls.read_spss(**kwargs)

    @classmethod
    def _to_sql(cls, *args, **kwargs):
        """
        Write query compiler content to a SQL database.

        Parameters
        ----------
        *args : args
            Arguments to the writer method.
        **kwargs : kwargs
            Arguments to the writer method.
        """
        return cls.io_cls.to_sql(*args, **kwargs)

    @classmethod
    def _to_pickle(cls, *args, **kwargs):
        """
        Pickle query compiler object.

        Parameters
        ----------
        *args : args
            Arguments to the writer method.
        **kwargs : kwargs
            Arguments to the writer method.
        """
        return cls.io_cls.to_pickle(*args, **kwargs)

    @classmethod
    def _to_csv(cls, *args, **kwargs):
        """
        Write query compiler content to a CSV file.

        Parameters
        ----------
        *args : args
            Arguments to pass to the writer method.
        **kwargs : kwargs
            Arguments to pass to the writer method.
        """
        return cls.io_cls.to_csv(*args, **kwargs)

    @classmethod
    def _to_json(cls, *args, **kwargs):
        """
        Write query compiler content to a JSON file.

        Parameters
        ----------
        *args : args
            Arguments to pass to the writer method.
        **kwargs : kwargs
            Arguments to pass to the writer method.
        """
        return cls.io_cls.to_json(*args, **kwargs)

    @classmethod
    def _to_json_series(cls, *args, **kwargs):
        """
        Write query compiler content of a Series to a JSON file.

        Parameters
        ----------
        *args : args
            Arguments to pass to the writer method.
        **kwargs : kwargs
            Arguments to pass to the writer method.
        """
        return cls.io_cls.to_json_series(*args, **kwargs)

    @classmethod
    def _to_xml(cls, *args, **kwargs):
        """
        Write query compiler content to a XML file.

        Parameters
        ----------
        *args : args
            Arguments to pass to the writer method.
        **kwargs : kwargs
            Arguments to pass to the writer method.
        """
        return cls.io_cls.to_xml(*args, **kwargs)

    @classmethod
    def _to_parquet(cls, *args, **kwargs):
        """
        Write query compiler content to a parquet file.

        Parameters
        ----------
        *args : args
            Arguments to pass to the writer method.
        **kwargs : kwargs
            Arguments to pass to the writer method.
        """
        return cls.io_cls.to_parquet(*args, **kwargs)

    @classmethod
    def _to_ray(cls, modin_obj):
        """
        Write query compiler content to a Ray Dataset.

        Parameters
        ----------
        modin_obj : modin.pandas.DataFrame, modin.pandas.Series
            The Modin DataFrame/Series to write.

        Returns
        -------
        ray.data.Dataset
            A Ray Dataset object.

        Notes
        -----
        Modin DataFrame/Series can only be converted to a Ray Dataset if Modin uses a Ray engine.
        """
        return cls.io_cls.to_ray(modin_obj)

    @classmethod
    def _to_dask(cls, modin_obj):
        """
        Write query compiler content to a Dask DataFrame/Series.

        Parameters
        ----------
        modin_obj : modin.pandas.DataFrame, modin.pandas.Series
            The Modin DataFrame/Series to write.

        Returns
        -------
        dask.dataframe.DataFrame or dask.dataframe.Series
            A Dask DataFrame/Series object.

        Notes
        -----
        Modin DataFrame/Series can only be converted to a Dask DataFrame/Series if Modin uses a Dask engine.
        """
        return cls.io_cls.to_dask(modin_obj)

    # experimental methods that don't exist in pandas
    @classmethod
    @doc(
        _doc_io_method_raw_template,
        source="CSV files",
        params=_doc_io_method_kwargs_params,
    )
    def _read_csv_glob(cls, **kwargs):
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_read_csv_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.read_csv_glob(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_raw_template,
        source="Pickle files",
        params=_doc_io_method_kwargs_params,
    )
    def _read_pickle_glob(cls, **kwargs):
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_read_pickle_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.read_pickle_glob(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_raw_template,
        source="SQL files",
        params=_doc_io_method_kwargs_params,
    )
    def _read_sql_distributed(cls, **kwargs):
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            extra_parameters = (
                "partition_column",
                "lower_bound",
                "upper_bound",
                "max_sessions",
            )
            if any(
                param in kwargs and kwargs[param] is not None
                for param in extra_parameters
            ):
                warnings.warn(
                    f"Distributed read_sql() was only implemented for {', '.join(supported_executions)} executions."
                )
            for param in extra_parameters:
                del kwargs[param]
            return cls.io_cls.read_sql(**kwargs)
        return cls.io_cls.read_sql_distributed(**kwargs)

    @classmethod
    @doc(
        _doc_io_method_raw_template,
        source="Custom text files",
        params=_doc_io_method_kwargs_params,
    )
    def _read_custom_text(cls, **kwargs):
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_read_custom_text()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.read_custom_text(**kwargs)

    @classmethod
    def _to_pickle_glob(cls, *args, **kwargs):
        """
        Distributed pickle query compiler object.

        Parameters
        ----------
        *args : args
            Arguments to the writer method.
        **kwargs : kwargs
            Arguments to the writer method.
        """
        # TODO(https://github.com/modin-project/modin/issues/7429): Use
        # frame-level execution instead of the global, default execution.
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_to_pickle_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.to_pickle_glob(*args, **kwargs)

    @classmethod
    @doc(
        _doc_io_method_raw_template,
        source="Parquet files",
        params=_doc_io_method_kwargs_params,
    )
    def _read_parquet_glob(cls, **kwargs):
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_read_parquet_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.read_parquet_glob(**kwargs)

    @classmethod
    def _to_parquet_glob(cls, *args, **kwargs):
        """
        Write query compiler content to several parquet files.

        Parameters
        ----------
        *args : args
            Arguments to pass to the writer method.
        **kwargs : kwargs
            Arguments to pass to the writer method.
        """
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_to_parquet_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.to_parquet_glob(*args, **kwargs)

    @classmethod
    @doc(
        _doc_io_method_raw_template,
        source="Json files",
        params=_doc_io_method_kwargs_params,
    )
    def _read_json_glob(cls, **kwargs):
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_read_json_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.read_json_glob(**kwargs)

    @classmethod
    def _to_json_glob(cls, *args, **kwargs):
        """
        Write query compiler content to several json files.

        Parameters
        ----------
        *args : args
            Arguments to pass to the writer method.
        **kwargs : kwargs
            Arguments to pass to the writer method.
        """
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_to_json_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.to_json_glob(*args, **kwargs)

    @classmethod
    @doc(
        _doc_io_method_raw_template,
        source="XML files",
        params=_doc_io_method_kwargs_params,
    )
    def _read_xml_glob(cls, **kwargs):
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_read_xml_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.read_xml_glob(**kwargs)

    @classmethod
    def _to_xml_glob(cls, *args, **kwargs):
        """
        Write query compiler content to several XML files.

        Parameters
        ----------
        *args : args
            Arguments to pass to the writer method.
        **kwargs : kwargs
            Arguments to pass to the writer method.
        """
        current_execution = get_current_execution()
        if current_execution not in supported_executions:
            raise NotImplementedError(
                f"`_to_xml_glob()` is not implemented for {current_execution} execution."
            )
        return cls.io_cls.to_xml_glob(*args, **kwargs)


@doc(_doc_factory_class, execution_name="PandasOnRay")
class PandasOnRayFactory(BaseFactory):
    @classmethod
    @doc(_doc_factory_prepare_method, io_module_name="``PandasOnRayIO``")
    def prepare(cls):
        from modin.core.execution.ray.implementations.pandas_on_ray.io import (
            PandasOnRayIO,
        )

        cls.io_cls = PandasOnRayIO


@doc(_doc_factory_class, execution_name="PandasOnPython")
class PandasOnPythonFactory(BaseFactory):
    @classmethod
    @doc(_doc_factory_prepare_method, io_module_name="``PandasOnPythonIO``")
    def prepare(cls):
        from modin.core.execution.python.implementations.pandas_on_python.io import (
            PandasOnPythonIO,
        )

        cls.io_cls = PandasOnPythonIO


@doc(_doc_factory_class, execution_name="PandasOnDask")
class PandasOnDaskFactory(BaseFactory):
    @classmethod
    @doc(_doc_factory_prepare_method, io_module_name="``PandasOnDaskIO``")
    def prepare(cls):
        from modin.core.execution.dask.implementations.pandas_on_dask.io import (
            PandasOnDaskIO,
        )

        cls.io_cls = PandasOnDaskIO


@doc(_doc_factory_class, execution_name="PandasOnUnidist")
class PandasOnUnidistFactory(BaseFactory):
    @classmethod
    @doc(_doc_factory_prepare_method, io_module_name="``PandasOnUnidistIO``")
    def prepare(cls):
        from modin.core.execution.unidist.implementations.pandas_on_unidist.io import (
            PandasOnUnidistIO,
        )

        cls.io_cls = PandasOnUnidistIO


class NativeIO(BaseIO):
    """
    I/O class for native pandas execution.

    This class inherits the default function implementations from the
    ``BaseIO`` parent class.
    """

    _should_warn_on_default_to_pandas: bool = False
    query_compiler_cls = NativeQueryCompiler


@doc(_doc_factory_class, execution_name="NativeOnNative")
class NativeOnNativeFactory(BaseFactory):

    @classmethod
    @doc(_doc_factory_prepare_method, io_module_name="`NativeIO`")
    def prepare(cls):
        cls.io_cls = NativeIO


================================================
FILE: modin/core/execution/modin_aqp.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
The module for working with displaying progress bars for Modin execution engines.

Modin Automatic Query Progress (AQP).
"""

import inspect
import os
import threading
import time
import warnings

from modin.config import Engine, ProgressBar

progress_bars = {}
bar_lock = threading.Lock()


def call_progress_bar(result_parts, line_no):
    """
    Attach a progress bar to given `result_parts`.

    The progress bar is expected to be shown in a Jupyter Notebook cell.

    Parameters
    ----------
    result_parts : list of list of object refs (futures)
        Objects which are being computed for which progress is requested.
    line_no : int
        Line number in the call stack which we're displaying progress for.
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        try:
            from tqdm.autonotebook import tqdm as tqdm_notebook
        except ImportError:
            raise ImportError("Please pip install tqdm to use the progress bar")
        from IPython import get_ipython

    try:
        cell_no = get_ipython().execution_count
    # This happens if we are not in ipython or jupyter.
    # No progress bar is supported in that case.
    except AttributeError:
        return
    pbar_id = f"{cell_no}-{line_no}"
    futures = [
        block
        for row in result_parts
        for partition in row
        for block in partition.list_of_blocks
    ]
    bar_format = (
        "{l_bar}{bar}{r_bar}"
        if "DEBUG_PROGRESS_BAR" in os.environ
        and os.environ["DEBUG_PROGRESS_BAR"] == "True"
        else "{desc}: {percentage:3.0f}%{bar} Elapsed time: {elapsed}, estimated remaining time: {remaining}"
    )
    bar_lock.acquire()
    if pbar_id in progress_bars:
        if hasattr(progress_bars[pbar_id], "container"):
            if hasattr(progress_bars[pbar_id].container.children[0], "max"):
                index = 0
            else:
                index = 1
            progress_bars[pbar_id].container.children[index].max = progress_bars[
                pbar_id
            ].container.children[index].max + len(futures)
        progress_bars[pbar_id].total = progress_bars[pbar_id].total + len(futures)
        progress_bars[pbar_id].refresh()
    else:
        progress_bars[pbar_id] = tqdm_notebook(
            total=len(futures),
            desc="Estimated completion of line " + str(line_no),
            bar_format=bar_format,
        )
    bar_lock.release()

    threading.Thread(target=_show_time_updates, args=(progress_bars[pbar_id],)).start()

    # TODO(https://github.com/modin-project/modin/issues/7429): Use
    # frame-level engine config.
    modin_engine = Engine.get()
    engine_wrapper = None
    if modin_engine == "Ray":
        from modin.core.execution.ray.common.engine_wrapper import RayWrapper

        engine_wrapper = RayWrapper
    elif modin_engine == "Unidist":
        from modin.core.execution.unidist.common.engine_wrapper import UnidistWrapper

        engine_wrapper = UnidistWrapper
    else:
        raise NotImplementedError(
            f"ProgressBar feature is not supported for {modin_engine} engine."
        )

    for i in range(1, len(futures) + 1):
        engine_wrapper.wait(futures, num_returns=i)
        progress_bars[pbar_id].update(1)
        progress_bars[pbar_id].refresh()
    if progress_bars[pbar_id].n == progress_bars[pbar_id].total:
        progress_bars[pbar_id].close()


def display_time_updates(bar):
    """
    Start displaying the progress `bar` in a notebook.

    Parameters
    ----------
    bar : tqdm.tqdm
        The progress bar wrapper to display in a notebook cell.
    """
    threading.Thread(target=_show_time_updates, args=(bar,)).start()


def _show_time_updates(p_bar):
    """
    Refresh displayed progress bar `p_bar` periodically until it is complete.

    Parameters
    ----------
    p_bar : tqdm.tqdm
        The progress bar wrapper being displayed to refresh.
    """
    while p_bar.total > p_bar.n:
        time.sleep(1)
        if p_bar.total > p_bar.n:
            p_bar.refresh()


def progress_bar_wrapper(f):
    """
    Wrap computation function inside a progress bar.

    Spawns another thread which displays a progress bar showing
    estimated completion time.

    Parameters
    ----------
    f : callable
        The name of the function to be wrapped.

    Returns
    -------
    callable
        Decorated version of `f` which reports progress.
    """
    from functools import wraps

    @wraps(f)
    def magic(*args, **kwargs):
        result_parts = f(*args, **kwargs)
        if ProgressBar.get():
            current_frame = inspect.currentframe()
            function_name = None
            while function_name != "<module>":
                (
                    filename,
                    line_number,
                    function_name,
                    lines,
                    index,
                ) = inspect.getframeinfo(current_frame)
                current_frame = current_frame.f_back
            t = threading.Thread(
                target=call_progress_bar,
                args=(result_parts, line_number),
            )
            t.start()
            # We need to know whether or not we are in a jupyter notebook
            from IPython import get_ipython

            try:
                ipy_str = str(type(get_ipython()))
                if "zmqshell" not in ipy_str:
                    t.join()
            except Exception:
                pass
        return result_parts

    return magic


================================================
FILE: modin/core/execution/python/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Python execution engine."""


================================================
FILE: modin/core/execution/python/common/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Python execution engine."""

from .engine_wrapper import PythonWrapper

__all__ = ["PythonWrapper"]


================================================
FILE: modin/core/execution/python/common/engine_wrapper.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Python execution engine."""


class PythonWrapper:
    """Python engine wrapper serving for the compatibility purpose with other engines."""

    @classmethod
    def deploy(cls, func, f_args=None, f_kwargs=None, num_returns=1):
        """
        Run the passed function.

        Parameters
        ----------
        func : callable
        f_args : sequence, optional
            Positional arguments to pass to the `func`.
        f_kwargs : dict, optional
            Keyword arguments to pass to the `func`.
        num_returns : int, default: 1
            Number of return values from the `func`.

        Returns
        -------
        object
            Returns the result of the `func`.
        """
        args = [] if f_args is None else f_args
        kwargs = {} if f_kwargs is None else f_kwargs
        return func(*args, **kwargs)

    @classmethod
    def is_future(cls, item):
        """
        Check if the item is a Future.

        Parameters
        ----------
        item : object

        Returns
        -------
        boolean
            Always return false.
        """
        return False

    @classmethod
    def materialize(cls, obj_id):
        """
        Get the data from the data storage.

        The method only serves for the compatibility purpose, what it actually
        does is just return the passed value as is.

        Parameters
        ----------
        obj_id : object

        Returns
        -------
        object
            The passed `obj_id` itself.
        """
        return obj_id

    @classmethod
    def put(cls, data, **kwargs):
        """
        Put data into the data storage.

        The method only serves for the compatibility purpose, what it actually
        does is just return the passed value as is.

        Parameters
        ----------
        data : object
        **kwargs : dict

        Returns
        -------
        object
            The passed `data` itself.
        """
        return data


================================================
FILE: modin/core/execution/python/implementations/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Python execution engine and optimized for specific storage formats."""


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Python execution engine and optimized for pandas storage format."""


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe class optimized for pandas on Python execution."""


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/dataframe/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains class ``PandasOnPythonDataframe``.

``PandasOnPythonDataframe`` is dataframe class with pandas storage format and Python engine.
"""

from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
from modin.utils import _inherit_docstrings

from ..partitioning.partition_manager import PandasOnPythonDataframePartitionManager


class PandasOnPythonDataframe(PandasDataframe):
    """
    Class for dataframes with pandas storage format and Python engine.

    ``PandasOnPythonDataframe`` doesn't implement any specific interfaces,
    all functionality is inherited from the ``PandasDataframe`` class.

    Parameters
    ----------
    partitions : np.ndarray
        A 2D NumPy array of partitions.
    index : sequence
        The index for the dataframe. Converted to a ``pandas.Index``.
    columns : sequence
        The columns object for the dataframe. Converted to a ``pandas.Index``.
    row_lengths : list, optional
        The length of each partition in the rows. The "height" of
        each of the block partitions. Is computed if not provided.
    column_widths : list, optional
        The width of each partition in the columns. The "width" of
        each of the block partitions. Is computed if not provided.
    dtypes : pandas.Series, optional
        The data types for the dataframe columns.
    pandas_backend : {"pyarrow", None}, optional
        Backend used by pandas. None - means default NumPy backend.
    """

    _partition_mgr_cls = PandasOnPythonDataframePartitionManager

    @property
    @_inherit_docstrings(PandasDataframe.engine)
    def engine(self) -> str:
        return "Python"


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base IO classes optimized for pandas on Python execution."""

from .io import PandasOnPythonIO

__all__ = [
    "PandasOnPythonIO",
]


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/io/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module for housing IO classes with pandas storage format and Python engine."""

from modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe import (
    PandasOnPythonDataframe,
)
from modin.core.io import BaseIO
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class PandasOnPythonIO(BaseIO):
    """
    Class for storing IO functions operating on pandas storage format and Python engine.

    Inherits default function implementations from ``BaseIO`` parent class.
    """

    frame_cls = PandasOnPythonDataframe
    query_compiler_cls = PandasQueryCompiler


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/partitioning/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe classes related to its partitioning and optimized for pandas on Python execution."""

from .partition import PandasOnPythonDataframePartition
from .partition_manager import PandasOnPythonDataframePartitionManager
from .virtual_partition import (
    PandasOnPythonDataframeAxisPartition,
    PandasOnPythonDataframeColumnPartition,
    PandasOnPythonDataframeRowPartition,
)

__all__ = [
    "PandasOnPythonDataframePartition",
    "PandasOnPythonDataframePartitionManager",
    "PandasOnPythonDataframeAxisPartition",
    "PandasOnPythonDataframeColumnPartition",
    "PandasOnPythonDataframeRowPartition",
]


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/partitioning/partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module defines interface for a partition with pandas storage format and Python engine."""

import warnings

from modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition
from modin.core.execution.python.common import PythonWrapper


class PandasOnPythonDataframePartition(PandasDataframePartition):
    """
    Partition class with interface for pandas storage format and Python engine.

    Class holds the data and metadata for a single partition and implements
    methods of parent abstract class ``PandasDataframePartition``.

    Parameters
    ----------
    data : pandas.DataFrame
        ``pandas.DataFrame`` that should be wrapped with this class.
    length : int, optional
        Length of `data` (number of rows in the input dataframe).
    width : int, optional
        Width of `data` (number of columns in the input dataframe).
    call_queue : list, optional
        Call queue of the partition (list with entities that should be called
        before partition materialization).

    Notes
    -----
    Objects of this class are treated as immutable by partition manager
    subclasses. There is no logic for updating in-place.
    """

    execution_wrapper = PythonWrapper

    def __init__(self, data, length=None, width=None, call_queue=None):
        super().__init__()
        if hasattr(data, "copy"):
            data = data.copy()
        self._data = data
        if call_queue is None:
            call_queue = []
        self.call_queue = call_queue
        self._length_cache = length
        self._width_cache = width

    def get(self):
        """
        Flush the `call_queue` and return copy of the data.

        Returns
        -------
        pandas.DataFrame
            Copy of DataFrame that was wrapped by this partition.

        Notes
        -----
        Since this object is a simple wrapper, just return the copy of data.
        """
        self.drain_call_queue()
        return self._data.copy() if hasattr(self._data, "copy") else self._data

    def apply(self, func, *args, **kwargs):
        """
        Apply a function to the object wrapped by this partition.

        Parameters
        ----------
        func : callable
            Function to apply.
        *args : iterable
            Additional positional arguments to be passed in `func`.
        **kwargs : dict
            Additional keyword arguments to be passed in `func`.

        Returns
        -------
        PandasOnPythonDataframePartition
            New ``PandasOnPythonDataframePartition`` object.
        """

        def call_queue_closure(data, call_queue):
            """
            Apply callables from `call_queue` on copy of the `data` and return the result.

            Parameters
            ----------
            data : pandas.DataFrame or pandas.Series
                Data to use for computations.
            call_queue : array-like
                Array with callables and it's kwargs to be applied to the `data`.

            Returns
            -------
            pandas.DataFrame or pandas.Series
            """
            result = data.copy()
            for func, f_args, f_kwargs in call_queue:
                try:
                    result = func(result, *f_args, **f_kwargs)
                except Exception as err:
                    self.call_queue = []
                    raise err
            return result

        self._data = call_queue_closure(self._data, self.call_queue)
        self.call_queue = []
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            return self.__constructor__(func(self._data.copy(), *args, **kwargs))

    def drain_call_queue(self):
        """Execute all operations stored in the call queue on the object wrapped by this partition."""
        if len(self.call_queue) == 0:
            return
        self.apply(lambda x: x)

    def wait(self):
        """
        Wait for completion of computations on the object wrapped by the partition.

        Internally will be done by flushing the call queue.
        """
        self.drain_call_queue()

    @classmethod
    def put(cls, obj):
        """
        Create partition containing `obj`.

        Parameters
        ----------
        obj : pandas.DataFrame
            DataFrame to be put into the new partition.

        Returns
        -------
        PandasOnPythonDataframePartition
            New ``PandasOnPythonDataframePartition`` object.
        """
        return cls(obj.copy(), len(obj.index), len(obj.columns))

    @classmethod
    def preprocess_func(cls, func):
        """
        Preprocess a function before an ``apply`` call.

        Parameters
        ----------
        func : callable
            Function to preprocess.

        Returns
        -------
        callable
            An object that can be accepted by ``apply``.

        Notes
        -----
        No special preprocessing action is required, so unmodified
        `func` will be returned.
        """
        return func


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/partitioning/partition_manager.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class for managing partitions with pandas storage format and Python engine."""

from modin.core.dataframe.pandas.partitioning.partition_manager import (
    PandasDataframePartitionManager,
)
from modin.core.execution.python.common import PythonWrapper

from .partition import PandasOnPythonDataframePartition
from .virtual_partition import (
    PandasOnPythonDataframeColumnPartition,
    PandasOnPythonDataframeRowPartition,
)


class PandasOnPythonDataframePartitionManager(PandasDataframePartitionManager):
    """
    Class for managing partitions with pandas storage format and Python engine.

    Inherits all functionality from ``PandasDataframePartitionManager`` base class.
    """

    _partition_class = PandasOnPythonDataframePartition
    _column_partitions_class = PandasOnPythonDataframeColumnPartition
    _row_partition_class = PandasOnPythonDataframeRowPartition
    _execution_wrapper = PythonWrapper


================================================
FILE: modin/core/execution/python/implementations/pandas_on_python/partitioning/virtual_partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module defines interface for a virtual partition with pandas storage format and python engine."""

from modin.core.dataframe.pandas.partitioning.axis_partition import (
    PandasDataframeAxisPartition,
)
from modin.utils import _inherit_docstrings

from .partition import PandasOnPythonDataframePartition


class PandasOnPythonDataframeAxisPartition(PandasDataframeAxisPartition):
    """
    Class defines axis partition interface with pandas storage format and Python engine.

    Inherits functionality from ``PandasDataframeAxisPartition`` class.

    Parameters
    ----------
    list_of_partitions : Union[list, PandasOnPythonDataframePartition]
        List of ``PandasOnPythonDataframePartition`` and
        ``PandasOnPythonDataframeVirtualPartition`` objects, or a single
        ``PandasOnPythonDataframePartition``.
    get_ip : bool, default: False
        Whether to get node IP addresses to conforming partitions or not.
    full_axis : bool, default: True
        Whether or not the virtual partition encompasses the whole axis.
    call_queue : list, optional
        A list of tuples (callable, args, kwargs) that contains deferred calls.
    length : int, optional
        Length, or reference to length, of wrapped ``pandas.DataFrame``.
    width : int, optional
        Width, or reference to width, of wrapped ``pandas.DataFrame``.
    """

    partition_type = PandasOnPythonDataframePartition


@_inherit_docstrings(PandasOnPythonDataframeAxisPartition)
class PandasOnPythonDataframeColumnPartition(PandasOnPythonDataframeAxisPartition):
    axis = 0


@_inherit_docstrings(PandasOnPythonDataframeAxisPartition)
class PandasOnPythonDataframeRowPartition(PandasOnPythonDataframeAxisPartition):
    axis = 1


================================================
FILE: modin/core/execution/ray/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Ray execution engine."""


================================================
FILE: modin/core/execution/ray/common/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Common utilities for Ray execution engine."""

from .engine_wrapper import MaterializationHook, RayWrapper, SignalActor
from .utils import initialize_ray

__all__ = [
    "initialize_ray",
    "RayWrapper",
    "MaterializationHook",
    "SignalActor",
]


================================================
FILE: modin/core/execution/ray/common/deferred_execution.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module with classes and utilities for deferred remote execution in Ray workers."""

from enum import Enum
from itertools import islice
from typing import (
    Any,
    Callable,
    Dict,
    Generator,
    Iterable,
    List,
    Optional,
    Tuple,
    Union,
)

import pandas
import ray
from ray._private.services import get_node_ip_address

from modin.config import RayTaskCustomResources
from modin.core.execution.ray.common import MaterializationHook, RayWrapper
from modin.logging import get_logger

ObjectRefType = Union[ray.ObjectRef, None]
ObjectRefOrListType = Union[ObjectRefType, List[ObjectRefType]]
ListOrTuple = (list, tuple)


class DeferredExecution:
    """
    Deferred execution task.

    This class represents a single node in the execution tree. The input is either
    an object reference or another node on which this node depends.
    The output is calculated by the specified Callable.

    If the input is a DeferredExecution node, it is executed first and the execution
    output is used as the input for this one. All the executions are performed in a
    single batch (i.e. using a single remote call) and the results are saved in all
    the nodes that have multiple subscribers.

    Parameters
    ----------
    data : ObjectRefType or DeferredExecution
        The execution input.
    func : callable or ObjectRefType
        A function to be executed.
    args : list or tuple
        Additional positional arguments to be passed in `func`.
    kwargs : dict
        Additional keyword arguments to be passed in `func`.
    num_returns : int, optional
        The number of the return values.

    Attributes
    ----------
    data : ObjectRefType or DeferredExecution
        The execution input.
    func : callable or ObjectRefType
        A function to be executed.
    args : list or tuple
        Additional positional arguments to be passed in `func`.
    kwargs : dict
        Additional keyword arguments to be passed in `func`.
    num_returns : int
        The number of the return values.
    flat_args : bool
        True means that there are no lists or DeferredExecution objects in `args`.
        In this case, no arguments processing is performed and `args` is passed
        to the remote method as is.
    flat_kwargs : bool
        The same as `flat_args` but for the `kwargs` values.
    """

    def __init__(
        self,
        data: Union[
            ObjectRefType,
            "DeferredExecution",
            List[Union[ObjectRefType, "DeferredExecution"]],
        ],
        func: Union[Callable, ObjectRefType],
        args: Union[List[Any], Tuple[Any]],
        kwargs: Dict[str, Any],
        num_returns=1,
    ):
        if isinstance(data, DeferredExecution):
            data.subscribe()
        self.data = data
        self.func = func
        self.args = args
        self.kwargs = kwargs
        self.num_returns = num_returns
        self.flat_args = self._flat_args(args)
        self.flat_kwargs = self._flat_args(kwargs.values())
        self.subscribers = 0

    @classmethod
    def _flat_args(cls, args: Iterable):
        """
        Check if the arguments list is flat and subscribe to all `DeferredExecution` objects.

        Parameters
        ----------
        args : Iterable

        Returns
        -------
        bool
        """
        flat = True
        for arg in args:
            if isinstance(arg, DeferredExecution):
                flat = False
                arg.subscribe()
            elif isinstance(arg, ListOrTuple):
                flat = False
                cls._flat_args(arg)
        return flat

    def exec(
        self,
    ) -> Tuple[ObjectRefOrListType, Union["MetaList", List], Union[int, List[int]]]:
        """
        Execute this task, if required.

        Returns
        -------
        tuple
            The execution result, MetaList, containing the length, width and
            the worker's ip address (the last value in the list) and the values
            offset in the list. I.e. length = meta_list[offset],
            width = meta_list[offset + 1], ip = meta_list[-1].
        """
        if self.has_result:
            return self.data, self.meta, self.meta_offset

        if (
            not isinstance(self.data, DeferredExecution)
            and self.flat_args
            and self.flat_kwargs
            and self.num_returns == 1
        ):
            result, length, width, ip = remote_exec_func.options(
                resources=RayTaskCustomResources.get()
            ).remote(self.func, self.data, *self.args, **self.kwargs)
            meta = MetaList([length, width, ip])
            self._set_result(result, meta, 0)
            return result, meta, 0

        # If there are no subscribers, we still need the result here. We don't need to decrement
        # it back. After the execution, the result is saved and the counter has no effect.
        self.subscribers += 2
        consumers, output = self._deconstruct()
        # The last result is the MetaList, so adding +1 here.
        num_returns = sum(c.num_returns for c in consumers) + 1
        results = self._remote_exec_chain(num_returns, *output)
        meta = MetaList(results.pop())
        meta_offset = 0
        results = iter(results)
        for de in consumers:
            if de.num_returns == 1:
                de._set_result(next(results), meta, meta_offset)
                meta_offset += 2
            else:
                res = list(islice(results, num_returns))
                offsets = list(range(0, 2 * num_returns, 2))
                de._set_result(res, meta, offsets)
                meta_offset += 2 * num_returns
        return self.data, self.meta, self.meta_offset

    @property
    def has_result(self):
        """
        Return true if this task has already been executed and the result is set.

        Returns
        -------
        bool
        """
        return not hasattr(self, "func")

    def subscribe(self):
        """
        Increment the `subscribers` counter.

        Subscriber is any instance that could trigger the execution of this task.
        In case of a multiple subscribers, the execution could be triggerred multiple
        times. To prevent the multiple executions, the execution result is returned
        from the worker and saved in this instance. Subsequent calls to `execute()`
        return the previously saved result.
        """
        self.subscribers += 1

    def unsubscribe(self):
        """Decrement the `subscribers` counter."""
        self.subscribers -= 1
        assert self.subscribers >= 0

    def _deconstruct(self) -> Tuple[List["DeferredExecution"], List[Any]]:
        """
        Convert the specified execution tree to a flat list.

        This is required for the automatic Ray object references
        materialization before passing the list to a Ray worker.

        The format of the list is the following:
        <input object> sequence<<function> <n><args> <n><kwargs> <ref> <nret>>...
        If <n> before <args> is >= 0, then the next n objects are the function arguments.
        If it is -1, it means that the method arguments contain list and/or
        DeferredExecution (chain) objects. In this case the next values are read
        one by one until `_Tag.END` is encountered. If the value is `_Tag.LIST`,
        then the next sequence of values up to `_Tag.END` is converted to list.
        If the value is `_Tag.CHAIN`, then the next sequence of values up to
        `_Tag.END` has exactly the same format, as described here.
        If the value is `_Tag.REF`, then the next value is a reference id, i.e.
        the actual value should be retrieved by this id from the previously
        saved objects. The <input object> could also be `_Tag.REF` or `_Tag.LIST`.

        If <n> before <kwargs> is >=0, then the next 2*n values are the argument
        names and values in the following format - [name1, value1, name2, value2...].
        If it's -1, then the next values are converted to list in the same way as
        <args> and the argument names are the next len(<args>) values.

        <ref> is an integer reference id. If it's not 0, then there is another
        chain referring to the execution result of this method and, thus, it must
        be saved so that other chains could retrieve the object by the id.

        <nret> field contains either the `num_returns` value or 0. If it's 0, the
        execution result is not returned, but is just passed to the next task in the
        chain. If it's 1, the result is returned as is. Otherwise, it's expected that
        the result is iterable and the specified number of values is returned from
        the iterator. The values lengths and widths are added to the meta list.

        Returns
        -------
        tuple of list
            * The first list is the result consumers.
                If a DeferredExecution has multiple subscribers, the execution result
                should be returned and saved in order to avoid duplicate executions.
                These DeferredExecution tasks are added to this list and, after the
                execution, the results are passed to the ``_set_result()`` method of
                each task.
            * The second is a flat list of arguments that could be passed to the remote executor.
        """
        stack = []
        result_consumers = []
        output = []
        # Using stack and generators to avoid the ``RecursionError``s.
        stack.append(self._deconstruct_chain(self, output, stack, result_consumers))
        while stack:
            try:
                gen = stack.pop()
                next_gen = next(gen)
                stack.append(gen)
                stack.append(next_gen)
            except StopIteration:
                pass
        return result_consumers, output

    @classmethod
    def _deconstruct_chain(
        cls,
        de: "DeferredExecution",
        output: List,
        stack: List,
        result_consumers: List["DeferredExecution"],
    ):
        """
        Deconstruct the specified DeferredExecution chain.

        Parameters
        ----------
        de : DeferredExecution
            The chain to be deconstructed.
        output : list
            Put the arguments to this list.
        stack : list
            Used to eliminate recursive calls, that may lead to the RecursionError.
        result_consumers : list of DeferredExecution
            The result consumers.

        Yields
        ------
        Generator
            The ``_deconstruct_list()`` generator.
        """
        out_append = output.append
        out_extend = output.extend
        while True:
            de.unsubscribe()
            if (out_pos := getattr(de, "out_pos", None)) and not de.has_result:
                out_append(_Tag.REF)
                out_append(out_pos)
                output[out_pos] = out_pos
                if de.subscribers == 0:
                    # We may have subscribed to the same node multiple times.
                    # It could happen, for example, if it's passed to the args
                    # multiple times, or it's one of the parent nodes and also
                    # passed to the args. In this case, there are no multiple
                    # subscribers, and we don't need to return the result.
                    output[out_pos + 1] = 0
                    result_consumers.remove(de)
                break
            elif not isinstance(data := de.data, DeferredExecution):
                if isinstance(data, ListOrTuple):
                    yield cls._deconstruct_list(
                        data, output, stack, result_consumers, out_append
                    )
                else:
                    out_append(data)
                if not de.has_result:
                    stack.append(de)
                break
            else:
                stack.append(de)
                de = data

        while stack and isinstance(stack[-1], DeferredExecution):
            de: DeferredExecution = stack.pop()
            args = de.args
            kwargs = de.kwargs
            out_append(de.func)
            if de.flat_args:
                out_append(len(args))
                out_extend(args)
            else:
                out_append(-1)
                yield cls._deconstruct_list(
                    args, output, stack, result_consumers, out_append
                )
            if de.flat_kwargs:
                out_append(len(kwargs))
                for item in kwargs.items():
                    out_extend(item)
            else:
                out_append(-1)
                yield cls._deconstruct_list(
                    kwargs.values(), output, stack, result_consumers, out_append
                )
                out_extend(kwargs)

            out_append(0)  # Placeholder for ref id
            if de.subscribers > 0:
                # Ref id. This is the index in the output list.
                de.out_pos = len(output) - 1
                result_consumers.append(de)
                out_append(de.num_returns)  # Return result for this node
            else:
                out_append(0)  # Do not return result for this node

    @classmethod
    def _deconstruct_list(
        cls,
        lst: Iterable,
        output: List,
        stack: List,
        result_consumers: List["DeferredExecution"],
        out_append: Callable,
    ):
        """
        Deconstruct the specified list.

        Parameters
        ----------
        lst : list
        output : list
        stack : list
        result_consumers : list
        out_append : Callable
            The reference to the ``list.append()`` method.

        Yields
        ------
        Generator
            Either ``_deconstruct_list()`` or ``_deconstruct_chain()`` generator.
        """
        for obj in lst:
            if isinstance(obj, DeferredExecution):
                if out_pos := getattr(obj, "out_pos", None):
                    obj.unsubscribe()
                    if obj.has_result:
                        out_append(obj.data)
                    else:
                        out_append(_Tag.REF)
                        out_append(out_pos)
                        output[out_pos] = out_pos
                        if obj.subscribers == 0:
                            output[out_pos + 1] = 0
                            result_consumers.remove(obj)
                else:
                    out_append(_Tag.CHAIN)
                    yield cls._deconstruct_chain(obj, output, stack, result_consumers)
                    out_append(_Tag.END)
            elif isinstance(obj, ListOrTuple):
                out_append(_Tag.LIST)
                yield cls._deconstruct_list(
                    obj, output, stack, result_consumers, out_append
                )
            else:
                out_append(obj)
        out_append(_Tag.END)

    @staticmethod
    def _remote_exec_chain(num_returns: int, *args: Tuple) -> List[Any]:
        """
        Execute the deconstructed chain in a worker process.

        Parameters
        ----------
        num_returns : int
            The number of return values.
        *args : tuple
            A deconstructed chain to be executed.

        Returns
        -------
        list
            The execution results. The last element of this list is the ``MetaList``.
        """
        # Prefer _remote_exec_single_chain(). It has fewer arguments and
        # does not require the num_returns to be specified in options.
        if num_returns == 2:
            return _remote_exec_single_chain.options(
                resources=RayTaskCustomResources.get()
            ).remote(*args)
        else:
            return _remote_exec_multi_chain.options(
                num_returns=num_returns, resources=RayTaskCustomResources.get()
            ).remote(num_returns, *args)

    def _set_result(
        self,
        result: ObjectRefOrListType,
        meta: "MetaList",
        meta_offset: Union[int, List[int]],
    ):
        """
        Set the execution result.

        Parameters
        ----------
        result : ObjectRefOrListType
        meta : MetaList
        meta_offset : int or list of int
        """
        del self.func, self.args, self.kwargs, self.flat_args, self.flat_kwargs
        self.data = result
        self.meta = meta
        self.meta_offset = meta_offset

    def __reduce__(self):
        """Not serializable."""
        raise NotImplementedError("DeferredExecution is not serializable!")


class MetaList:
    """
    Meta information, containing the result lengths and the worker address.

    Parameters
    ----------
    obj : ray.ObjectID or list
    """

    def __init__(self, obj: Union[ray.ObjectID, List]):
        self._obj = obj

    def __getitem__(self, index):
        """
        Get item at the specified index.

        Parameters
        ----------
        index : int

        Returns
        -------
        Any
        """
        obj = self._obj
        return obj[index] if isinstance(obj, list) else MetaListHook(self, index)

    def __setitem__(self, index, value):
        """
        Set item at the specified index.

        Parameters
        ----------
        index : int
        value : Any
        """
        obj = self._obj
        if not isinstance(obj, list):
            self._obj = obj = RayWrapper.materialize(obj)
        obj[index] = value


class MetaListHook(MaterializationHook):
    """
    Used by MetaList.__getitem__() for lazy materialization and getting a single value from the list.

    Parameters
    ----------
    meta : MetaList
        Non-materialized list to get the value from.
    idx : int
        The value index in the list.
    """

    def __init__(self, meta: MetaList, idx: int):
        self.meta = meta
        self.idx = idx

    def pre_materialize(self):
        """
        Get item at self.idx or object ref if not materialized.

        Returns
        -------
        object
        """
        obj = self.meta._obj
        return obj[self.idx] if isinstance(obj, list) else obj

    def post_materialize(self, materialized):
        """
        Save the materialized list in self.meta and get the item at self.idx.

        Parameters
        ----------
        materialized : list

        Returns
        -------
        object
        """
        self.meta._obj = materialized
        return materialized[self.idx]


class _Tag(Enum):  # noqa: PR01
    """
    A set of special values used for the method arguments de/construction.

    See ``DeferredExecution._deconstruct()`` for details.
    """

    # The next item is an execution chain
    CHAIN = 0
    # The next item is a reference
    REF = 1
    # The next item a list
    LIST = 2
    # End of list or chain
    END = 3


class _RemoteExecutor:
    """Remote functions for DeferredExecution."""

    @staticmethod
    def exec_func(fn: Callable, obj: Any, args: Tuple, kwargs: Dict) -> Any:
        """
        Execute the specified function.

        Parameters
        ----------
        fn : Callable
        obj : Any
        args : Tuple
        kwargs : dict

        Returns
        -------
        Any
        """
        try:
            try:
                return fn(obj, *args, **kwargs)
                # Sometimes Arrow forces us to make a copy of an object before we operate on it. We
                # don't want the error to propagate to the user, and we want to avoid copying unless
                # we absolutely have to.
            except ValueError as err:
                if isinstance(obj, (pandas.DataFrame, pandas.Series)):
                    return fn(obj.copy(), *args, **kwargs)
                else:
                    raise err
        except Exception as err:
            get_logger().error(
                f"{err}. fn={fn}, obj={obj}, args={args}, kwargs={kwargs}"
            )
            raise err

    @classmethod
    def construct(cls, num_returns: int, args: Tuple):  # pragma: no cover
        """
        Construct and execute the specified chain.

        This function is called in a worker process. The last value, returned by
        this generator, is the meta list, containing the objects lengths and widths
        and the worker ip address, as the last value in the list.

        Parameters
        ----------
        num_returns : int
        args : tuple

        Yields
        ------
        Any
            The execution results and the MetaList as the last value.
        """
        chain = list(reversed(args))
        meta = []
        try:
            stack = [cls.construct_chain(chain, {}, meta, None)]
            while stack:
                try:
                    gen = stack.pop()
                    obj = next(gen)
                    stack.append(gen)
                    if isinstance(obj, Generator):
                        stack.append(obj)
                    else:
                        yield obj
                except StopIteration:
                    pass
        except Exception as err:
            get_logger().error(f"{err}. args={args}, chain={list(reversed(chain))}")
            raise err
        meta.append(get_node_ip_address())
        yield meta

    @classmethod
    def construct_chain(
        cls,
        chain: List,
        refs: Dict[int, Any],
        meta: List,
        lst: Optional[List],
    ):  # pragma: no cover
        """
        Construct the chain and execute it one by one.

        Parameters
        ----------
        chain : list
            A flat list containing the execution tree, deconstructed by
            ``DeferredExecution._deconstruct()``.
        refs : dict
            If an execution result is required for multiple chains, the
            reference to this result is saved in this dict.
        meta : list
            The lengths of the returned objects are added to this list.
        lst : list
            If specified, the execution result is added to this list.
            This is used when a chain is passed as an argument to a
            DeferredExecution task.

        Yields
        ------
        Any
            Either the ``construct_list()`` generator or the execution results.
        """
        pop = chain.pop
        tg_e = _Tag.END

        obj = pop()
        if obj is _Tag.REF:
            obj = refs[pop()]
        elif obj is _Tag.LIST:
            obj = []
            yield cls.construct_list(obj, chain, refs, meta)

        while chain:
            fn = pop()
            if fn == tg_e:
                lst.append(obj)
                break

            if (args_len := pop()) >= 0:
                if args_len == 0:
                    args = []
                else:
                    args = chain[-args_len:]
                    del chain[-args_len:]
                    args.reverse()
            else:
                args = []
                yield cls.construct_list(args, chain, refs, meta)
            if (args_len := pop()) >= 0:
                kwargs = {pop(): pop() for _ in range(args_len)}
            else:
                values = []
                yield cls.construct_list(values, chain, refs, meta)
                kwargs = {pop(): v for v in values}

            obj = cls.exec_func(fn, obj, args, kwargs)

            if ref := pop():  # <ref> is not 0 - adding the result to refs
                refs[ref] = obj
            if (num_returns := pop()) == 0:
                continue

            itr = iter([obj] if num_returns == 1 else obj)
            for _ in range(num_returns):
                obj = next(itr)
                meta.append(len(obj) if hasattr(obj, "__len__") else 0)
                meta.append(len(obj.columns) if hasattr(obj, "columns") else 0)
                yield obj

    @classmethod
    def construct_list(
        cls,
        lst: List,
        chain: List,
        refs: Dict[int, Any],
        meta: List,
    ):  # pragma: no cover
        """
        Construct the list.

        Parameters
        ----------
        lst : list
        chain : list
        refs : dict
        meta : list

        Yields
        ------
        Any
            Either ``construct_chain()`` or ``construct_list()`` generator.
        """
        pop = chain.pop
        lst_append = lst.append
        while True:
            obj = pop()
            if isinstance(obj, _Tag):
                if obj == _Tag.END:
                    break
                elif obj == _Tag.CHAIN:
                    yield cls.construct_chain(chain, refs, meta, lst)
                elif obj == _Tag.LIST:
                    lst_append([])
                    yield cls.construct_list(lst[-1], chain, refs, meta)
                elif obj is _Tag.REF:
                    lst_append(refs[pop()])
                else:
                    raise ValueError(f"Unexpected tag {obj}")
            else:
                lst_append(obj)

    def __reduce__(self):
        """
        Use a single instance on deserialization.

        Returns
        -------
        str
            Returns the ``_REMOTE_EXEC`` attribute name.
        """
        return "_REMOTE_EXEC"


_REMOTE_EXEC = _RemoteExecutor()


@ray.remote(num_returns=4)
def remote_exec_func(
    fn: Callable,
    obj: Any,
    *flat_args: Tuple,
    remote_executor=_REMOTE_EXEC,
    **flat_kwargs: Dict,
):  # pragma: no cover
    """
    Execute the specified function with the arguments in a worker process.

    The object `obj` is passed to the function as the first argument.
    Note: all the arguments must be flat, i.e. no lists, no chains.

    Parameters
    ----------
    fn : Callable
    obj : Any
    *flat_args : list
    remote_executor : _RemoteExecutor, default: _REMOTE_EXEC
        Do not change, it's used to avoid excessive serializations.
    **flat_kwargs : dict

    Returns
    -------
    tuple[Any, int, int, str]
    The execution result, the result length and width, the worked address.
    """
    obj = remote_executor.exec_func(fn, obj, flat_args, flat_kwargs)
    return (
        obj,
        len(obj) if hasattr(obj, "__len__") else 0,
        len(obj.columns) if hasattr(obj, "columns") else 0,
        get_node_ip_address(),
    )


@ray.remote(num_returns=2)
def _remote_exec_single_chain(
    *args: Tuple, remote_executor=_REMOTE_EXEC
) -> Generator:  # pragma: no cover
    """
    Execute the deconstructed chain with a single return value in a worker process.

    Parameters
    ----------
    *args : tuple
        A deconstructed chain to be executed.
    remote_executor : _RemoteExecutor, default: _REMOTE_EXEC
        Do not change, it's used to avoid excessive serializations.

    Returns
    -------
    Generator
    """
    return remote_executor.construct(num_returns=2, args=args)


@ray.remote
def _remote_exec_multi_chain(
    num_returns: int, *args: Tuple, remote_executor=_REMOTE_EXEC
) -> Generator:  # pragma: no cover
    """
    Execute the deconstructed chain with a multiple return values in a worker process.

    Parameters
    ----------
    num_returns : int
        The number of return values.
    *args : tuple
        A deconstructed chain to be executed.
    remote_executor : _RemoteExecutor, default: _REMOTE_EXEC
        Do not change, it's used to avoid excessive serializations.

    Returns
    -------
    Generator
    """
    return remote_executor.construct(num_returns, args)


================================================
FILE: modin/core/execution/ray/common/engine_wrapper.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
The module with helper mixin for executing functions remotely.

To be used as a piece of building a Ray-based engine.
"""

import asyncio
import os
from types import FunctionType
from typing import Sequence

import pandas
import ray

from modin.config import RayTaskCustomResources
from modin.error_message import ErrorMessage


@ray.remote
def _deploy_ray_func(func, *args, return_pandas_df=None, **kwargs):  # pragma: no cover
    """
    Wrap `func` to ease calling it remotely.

    Parameters
    ----------
    func : callable
        A local function that we want to call remotely.
    *args : iterable
        Positional arguments to pass to `func` when calling remotely.
    return_pandas_df : bool, optional
        Whether to convert the result of `func` to a pandas DataFrame or not.
    **kwargs : dict
        Keyword arguments to pass to `func` when calling remotely.

    Returns
    -------
    ray.ObjectRef or list
        Ray identifier of the result being put to Plasma store.
    """
    result = func(*args, **kwargs)
    if return_pandas_df and not isinstance(result, pandas.DataFrame):
        result = pandas.DataFrame(result)
    return result


class RayWrapper:
    """Mixin that provides means of running functions remotely and getting local results."""

    _func_cache = {}

    @classmethod
    def deploy(
        cls, func, f_args=None, f_kwargs=None, return_pandas_df=None, num_returns=1
    ):
        """
        Run local `func` remotely.

        Parameters
        ----------
        func : callable or ray.ObjectID
            The function to perform.
        f_args : list or tuple, optional
            Positional arguments to pass to ``func``.
        f_kwargs : dict, optional
            Keyword arguments to pass to ``func``.
        return_pandas_df : bool, optional
            Whether to convert the result of `func` to a pandas DataFrame or not.
        num_returns : int, default: 1
            Amount of return values expected from `func`.

        Returns
        -------
        ray.ObjectRef or list
            Ray identifier of the result being put to Plasma store.
        """
        args = [] if f_args is None else f_args
        kwargs = {} if f_kwargs is None else f_kwargs
        return _deploy_ray_func.options(
            num_returns=num_returns, resources=RayTaskCustomResources.get()
        ).remote(func, *args, return_pandas_df=return_pandas_df, **kwargs)

    @classmethod
    def is_future(cls, item):
        """
        Check if the item is a Future.

        Parameters
        ----------
        item : ray.ObjectID or object
            Future or object to check.

        Returns
        -------
        boolean
            If the value is a future.
        """
        return isinstance(item, ObjectRefTypes)

    @classmethod
    def materialize(cls, obj_id):
        """
        Get the value of object from the Plasma store.

        Parameters
        ----------
        obj_id : ray.ObjectID
            Ray object identifier to get the value by.

        Returns
        -------
        object
            Whatever was identified by `obj_id`.
        """
        if isinstance(obj_id, MaterializationHook):
            obj = obj_id.pre_materialize()
            return (
                obj_id.post_materialize(ray.get(obj))
                if isinstance(obj, ray.ObjectRef)
                else obj
            )

        if not isinstance(obj_id, Sequence):
            return ray.get(obj_id) if isinstance(obj_id, ray.ObjectRef) else obj_id

        if all(isinstance(obj, ray.ObjectRef) for obj in obj_id):
            return ray.get(obj_id)

        ids = {}
        result = []
        for obj in obj_id:
            if not isinstance(obj, ObjectRefTypes):
                result.append(obj)
                continue
            if isinstance(obj, MaterializationHook):
                oid = obj.pre_materialize()
                if isinstance(oid, ray.ObjectRef):
                    hook = obj
                    obj = oid
                else:
                    result.append(oid)
                    continue
            else:
                hook = None

            idx = ids.get(obj, None)
            if idx is None:
                ids[obj] = idx = len(ids)
            if hook is None:
                result.append(obj)
            else:
                hook._materialized_idx = idx
                result.append(hook)

        if len(ids) == 0:
            return result

        materialized = ray.get(list(ids.keys()))
        for i in range(len(result)):
            if isinstance((obj := result[i]), ObjectRefTypes):
                if isinstance(obj, MaterializationHook):
                    result[i] = obj.post_materialize(
                        materialized[obj._materialized_idx]
                    )
                else:
                    result[i] = materialized[ids[obj]]
        return result

    @classmethod
    def put(cls, data, **kwargs):
        """
        Store an object in the object store.

        Parameters
        ----------
        data : object
            The Python object to be stored.
        **kwargs : dict
            Additional keyword arguments.

        Returns
        -------
        ray.ObjectID
            Ray object identifier to get the value by.
        """
        if isinstance(data, FunctionType):
            qname = data.__qualname__
            if "<locals>" not in qname and "<lambda>" not in qname:
                ref = cls._func_cache.get(data, None)
                if ref is None:
                    if len(cls._func_cache) < 1024:
                        ref = ray.put(data)
                        cls._func_cache[data] = ref
                    else:
                        msg = "To many functions in the RayWrapper cache!"
                        assert "MODIN_GITHUB_CI" not in os.environ, msg
                        ErrorMessage.warn(msg)
                return ref
        return ray.put(data, **kwargs)

    @classmethod
    def wait(cls, obj_ids, num_returns=None):
        """
        Wait on the objects without materializing them (blocking operation).

        ``ray.wait`` assumes a list of unique object references: see
        https://github.com/modin-project/modin/issues/5045

        Parameters
        ----------
        obj_ids : list, scalar
        num_returns : int, optional
        """
        if not isinstance(obj_ids, Sequence):
            obj_ids = list(obj_ids)

        ids = set()
        for obj in obj_ids:
            if isinstance(obj, MaterializationHook):
                obj = obj.pre_materialize()
            if isinstance(obj, ray.ObjectRef):
                ids.add(obj)

        if num_ids := len(ids):
            ray.wait(list(ids), num_returns=num_returns or num_ids)


@ray.remote
class SignalActor:  # pragma: no cover
    """
    Help synchronize across tasks and actors on cluster.

    For details see: https://docs.ray.io/en/latest/advanced.html?highlight=signalactor#multi-node-synchronization-using-an-actor

    Parameters
    ----------
    event_count : int
        Number of events required for synchronization.
    """

    def __init__(self, event_count: int):
        self.events = [asyncio.Event() for _ in range(event_count)]

    def send(self, event_idx: int):
        """
        Indicate that event with `event_idx` has occurred.

        Parameters
        ----------
        event_idx : int
        """
        self.events[event_idx].set()

    async def wait(self, event_idx: int):
        """
        Wait until event with `event_idx` has occurred.

        Parameters
        ----------
        event_idx : int
        """
        await self.events[event_idx].wait()

    def is_set(self, event_idx: int) -> bool:
        """
        Check that event with `event_idx` had occurred or not.

        Parameters
        ----------
        event_idx : int

        Returns
        -------
        bool
        """
        return self.events[event_idx].is_set()


class MaterializationHook:
    """The Hook is called during the materialization and allows performing pre/post computations."""

    def pre_materialize(self):
        """
        Get an object reference to be materialized or a pre-computed value.

        Returns
        -------
        ray.ObjectRef or object
        """
        raise NotImplementedError()

    def post_materialize(self, materialized):
        """
        Perform computations on the materialized object.

        Parameters
        ----------
        materialized : object
            The materialized object to be post-computed.

        Returns
        -------
        object
            The post-computed object.
        """
        raise NotImplementedError()

    def __reduce__(self):
        """
        Replace this hook with the materialized object on serialization.

        Returns
        -------
        tuple
        """
        data = RayWrapper.materialize(self)
        if not isinstance(data, int):
            raise NotImplementedError("Only integers are currently supported")
        return int, (data,)


ObjectRefTypes = (ray.ObjectRef, MaterializationHook)


================================================
FILE: modin/core/execution/ray/common/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module holds utility and initialization routines for Modin on Ray."""

import os
import sys
import warnings
from typing import Optional

import psutil
import ray
from packaging import version

from modin.config import (
    CIAWSAccessKeyID,
    CIAWSSecretAccessKey,
    CpuCount,
    GithubCI,
    GpuCount,
    IsRayCluster,
    Memory,
    NPartitions,
    RayInitCustomResources,
    RayRedisAddress,
    RayRedisPassword,
    ValueSource,
)
from modin.core.execution.utils import set_env
from modin.error_message import ErrorMessage

from .engine_wrapper import ObjectRefTypes, RayWrapper

_OBJECT_STORE_TO_SYSTEM_MEMORY_RATIO = 0.6
# This constant should be in sync with the limit in ray, which is private,
# not exposed to users, and not documented:
# https://github.com/ray-project/ray/blob/4692e8d8023e789120d3f22b41ffb136b50f70ea/python/ray/_private/ray_constants.py#L57-L62
_MAC_OBJECT_STORE_LIMIT_BYTES = 2 * 2**30

_RAY_IGNORE_UNHANDLED_ERRORS_VAR = "RAY_IGNORE_UNHANDLED_ERRORS"

ObjectIDType = ObjectRefTypes


def initialize_ray(
    override_is_cluster=False,
    override_redis_address: str = None,
    override_redis_password: str = None,
):
    """
    Initialize Ray based on parameters, ``modin.config`` variables and internal defaults.

    Parameters
    ----------
    override_is_cluster : bool, default: False
        Whether to override the detection of Modin being run in a cluster
        and always assume this runs on cluster head node.
        This also overrides Ray worker detection and always runs the initialization
        function (runs from main thread only by default).
        If not specified, ``modin.config.IsRayCluster`` variable is used.
    override_redis_address : str, optional
        What Redis address to connect to when running in Ray cluster.
        If not specified, ``modin.config.RayRedisAddress`` is used.
    override_redis_password : str, optional
        What password to use when connecting to Redis.
        If not specified, ``modin.config.RayRedisPassword`` is used.
    """
    # We need these vars to be set for each Ray's worker in order to ensure that
    # the `pandas` module has been fully imported inside of each process before
    # any execution begins:
    # https://github.com/modin-project/modin/pull/4603
    env_vars = {
        "__MODIN_AUTOIMPORT_PANDAS__": "1",
        "PYTHONWARNINGS": "ignore::FutureWarning",
    }
    if GithubCI.get():
        # need these to write parquet to the moto service mocking s3.
        env_vars.update(
            {
                "AWS_ACCESS_KEY_ID": CIAWSAccessKeyID.get(),
                "AWS_SECRET_ACCESS_KEY": CIAWSSecretAccessKey.get(),
            }
        )
    extra_init_kw = {}
    is_cluster = override_is_cluster or IsRayCluster.get()
    if not ray.is_initialized() or override_is_cluster:
        redis_address = override_redis_address or RayRedisAddress.get()
        redis_password = (
            (
                ray.ray_constants.REDIS_DEFAULT_PASSWORD
                if is_cluster
                else RayRedisPassword.get()
            )
            if override_redis_password is None
            and RayRedisPassword.get_value_source() == ValueSource.DEFAULT
            else override_redis_password or RayRedisPassword.get()
        )

        if is_cluster:
            extra_init_kw["runtime_env"] = {"env_vars": env_vars}
            # We only start ray in a cluster setting for the head node.
            ray.init(
                address=redis_address or "auto",
                include_dashboard=False,
                ignore_reinit_error=True,
                _redis_password=redis_password,
                **extra_init_kw,
            )
        else:
            object_store_memory = _get_object_store_memory()
            ray_init_kwargs = {
                "num_cpus": CpuCount.get(),
                "num_gpus": GpuCount.get(),
                "include_dashboard": False,
                "ignore_reinit_error": True,
                "object_store_memory": object_store_memory,
                "_redis_password": redis_password,
                "_memory": object_store_memory,
                "resources": RayInitCustomResources.get(),
                **extra_init_kw,
            }
            # It should be enough to simply set the required variables for the main process
            # for Ray to automatically propagate them to each new worker on the same node.
            # Although Ray doesn't guarantee this behavior it works as expected most of the
            # time and doesn't enforce us with any overhead that Ray's native `runtime_env`
            # is usually causing. You can visit this gh-issue for more info:
            # https://github.com/modin-project/modin/issues/5157#issuecomment-1500225150
            with set_env(**env_vars):
                ray.init(**ray_init_kwargs)

    # Now ray is initialized, check runtime env config - especially useful if we join
    # an externally pre-configured cluster
    runtime_env_vars = ray.get_runtime_context().runtime_env.get("env_vars", {})
    for varname, varvalue in env_vars.items():
        if str(runtime_env_vars.get(varname, "")) != str(varvalue):
            if is_cluster:
                ErrorMessage.single_warning(
                    "When using a pre-initialized Ray cluster, please ensure that the runtime env "
                    + f"sets environment variable {varname} to {varvalue}"
                )

    num_cpus = int(ray.cluster_resources()["CPU"])
    NPartitions._put(num_cpus)
    CpuCount._put(num_cpus)

    # TODO(https://github.com/ray-project/ray/issues/28216): remove this
    # workaround once Ray gives a better way to suppress task errors.
    # Ideally we would not set global environment variables.
    # If user has explicitly set _RAY_IGNORE_UNHANDLED_ERRORS_VAR, don't
    # don't override its value.
    if _RAY_IGNORE_UNHANDLED_ERRORS_VAR not in os.environ:
        os.environ[_RAY_IGNORE_UNHANDLED_ERRORS_VAR] = "1"


def _get_object_store_memory() -> Optional[int]:
    """
    Get the object store memory we should start Ray with, in bytes.

    - If the ``Memory`` config variable is set, return that.
    - On Linux, take system memory from /dev/shm. On other systems use total
      virtual memory.
    - On Mac, never return more than Ray-specified upper limit.

    Returns
    -------
    Optional[int]
        The object store memory size in bytes, or None if we should use the Ray
        default.
    """
    object_store_memory = Memory.get()
    if object_store_memory is not None:
        return object_store_memory
    virtual_memory = psutil.virtual_memory().total
    if sys.platform.startswith("linux"):
        shm_fd = os.open("/dev/shm", os.O_RDONLY)
        try:
            shm_stats = os.fstatvfs(shm_fd)
            system_memory = shm_stats.f_bsize * shm_stats.f_bavail
            if system_memory / (virtual_memory / 2) < 0.99:
                warnings.warn(
                    f"The size of /dev/shm is too small ({system_memory} bytes). The required size "
                    + f"at least half of RAM ({virtual_memory // 2} bytes). Please, delete files in /dev/shm or "
                    + "increase size of /dev/shm with --shm-size in Docker. Also, you can can override the memory "
                    + "size for each Ray worker (in bytes) to the MODIN_MEMORY environment variable."
                )
        finally:
            os.close(shm_fd)
    else:
        system_memory = virtual_memory
    bytes_per_gb = 1e9
    object_store_memory = int(
        _OBJECT_STORE_TO_SYSTEM_MEMORY_RATIO
        * system_memory
        // bytes_per_gb
        * bytes_per_gb
    )
    if object_store_memory == 0:
        return None
    # Newer versions of ray don't allow us to initialize ray with object store
    # size larger than that _MAC_OBJECT_STORE_LIMIT_BYTES. It seems that
    # object store > the limit is too slow even on ray 1.0.0. However, limiting
    # the object store to _MAC_OBJECT_STORE_LIMIT_BYTES only seems to start
    # helping at ray version 1.3.0. So if ray version is at least 1.3.0, cap
    # the object store at _MAC_OBJECT_STORE_LIMIT_BYTES.
    # For background on the ray bug see:
    # - https://github.com/ray-project/ray/issues/20388
    # - https://github.com/modin-project/modin/issues/4872
    if sys.platform == "darwin" and version.parse(ray.__version__) >= version.parse(
        "1.3.0"
    ):
        object_store_memory = min(object_store_memory, _MAC_OBJECT_STORE_LIMIT_BYTES)
    return object_store_memory


def deserialize(obj):  # pragma: no cover
    """
    Deserialize a Ray object.

    Parameters
    ----------
    obj : ObjectIDType, iterable of ObjectIDType, or mapping of keys to ObjectIDTypes
        Object(s) to deserialize.

    Returns
    -------
    obj
        The deserialized object.
    """
    if isinstance(obj, ObjectIDType):
        return RayWrapper.materialize(obj)
    elif isinstance(obj, (tuple, list)):
        # Ray will error if any elements are not ObjectIDType, but we still want ray to
        # perform batch deserialization for us -- thus, we must submit only the list elements
        # that are ObjectIDType, deserialize them, and restore them to their correct list index
        oid_indices, oids = [], []
        for i, ray_id in enumerate(obj):
            if isinstance(ray_id, ObjectIDType):
                oid_indices.append(i)
                oids.append(ray_id)
        ray_result = RayWrapper.materialize(oids)
        new_lst = list(obj[:])
        for i, deser_item in zip(oid_indices, ray_result):
            new_lst[i] = deser_item
        # Check that all objects have been deserialized
        assert not any([isinstance(o, ObjectIDType) for o in new_lst])
        return new_lst
    elif isinstance(obj, dict) and any(
        isinstance(val, ObjectIDType) for val in obj.values()
    ):
        return dict(zip(obj.keys(), RayWrapper.materialize(list(obj.values()))))
    else:
        return obj


================================================
FILE: modin/core/execution/ray/generic/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Generic functionality for Ray execution engine."""


================================================
FILE: modin/core/execution/ray/generic/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Generic IO functionality for Ray execution engine."""

from .io import RayIO

__all__ = ["RayIO"]


================================================
FILE: modin/core/execution/ray/generic/io/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module holds base class implementing required I/O over Ray."""

from modin.core.io import BaseIO


class RayIO(BaseIO):
    """Base class for doing I/O operations over Ray."""

    @classmethod
    def from_ray(cls, ray_obj):
        """
        Create a Modin `query_compiler` from a Ray Dataset.

        Parameters
        ----------
        ray_obj : ray.data.Dataset
            The Ray Dataset to convert from.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the Ray Dataset.

        Notes
        -----
        This function must be implemented in every subclass
        otherwise NotImplementedError will be raised.
        """
        raise NotImplementedError(
            f"Modin dataset can't be created from `ray.data.Dataset` using {cls}."
        )

    @classmethod
    def to_ray(cls, modin_obj):
        """
        Convert a Modin DataFrame/Series to a Ray Dataset.

        Parameters
        ----------
        modin_obj : modin.pandas.DataFrame, modin.pandas.Series
            The Modin DataFrame/Series to convert.

        Returns
        -------
        ray.data.Dataset
            Converted object with type depending on input.

        Notes
        -----
        This function must be implemented in every subclass
        otherwise NotImplementedError will be raised.
        """
        raise NotImplementedError(
            f"`ray.data.Dataset` can't be created from Modin DataFrame/Series using {cls}."
        )


================================================
FILE: modin/core/execution/ray/generic/partitioning/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Generic partitioning functionality for Ray execution engine."""

from .partition_manager import GenericRayDataframePartitionManager

__all__ = [
    "GenericRayDataframePartitionManager",
]


================================================
FILE: modin/core/execution/ray/generic/partitioning/partition_manager.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module holds Modin partition manager implemented for Ray."""

import numpy as np

from modin.core.dataframe.pandas.partitioning.partition_manager import (
    PandasDataframePartitionManager,
)
from modin.core.execution.ray.common import RayWrapper


class GenericRayDataframePartitionManager(PandasDataframePartitionManager):
    """The class implements the interface in `PandasDataframePartitionManager`."""

    @classmethod
    def to_numpy(cls, partitions, **kwargs):
        """
        Convert `partitions` into a NumPy array.

        Parameters
        ----------
        partitions : NumPy array
            A 2-D array of partitions to convert to local NumPy array.
        **kwargs : dict
            Keyword arguments to pass to each partition ``.to_numpy()`` call.

        Returns
        -------
        NumPy array
        """
        if partitions.shape[1] == 1:
            parts = cls.get_objects_from_partitions(partitions.flatten())
            parts = [part.to_numpy(**kwargs) for part in parts]
        else:
            parts = RayWrapper.materialize(
                [
                    obj.apply(
                        lambda df, **kwargs: df.to_numpy(**kwargs)
                    ).list_of_blocks[0]
                    for row in partitions
                    for obj in row
                ]
            )
        rows, cols = partitions.shape
        parts = [parts[i * cols : (i + 1) * cols] for i in range(rows)]
        return np.block(parts)


================================================
FILE: modin/core/execution/ray/implementations/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Ray execution engine and optimized for specific storage formats."""


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to Ray execution engine and optimized for pandas storage format."""


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe class optimized for pandas on Ray execution."""

from .dataframe import PandasOnRayDataframe

__all__ = ["PandasOnRayDataframe"]


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/dataframe/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that implements ``PandasDataframe`` using Ray."""

from modin.core.dataframe.base.dataframe.utils import Axis
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
from modin.utils import _inherit_docstrings

from ..partitioning.partition_manager import PandasOnRayDataframePartitionManager


class PandasOnRayDataframe(PandasDataframe):
    """
    The class implements the interface in ``PandasDataframe`` using Ray.

    Parameters
    ----------
    partitions : np.ndarray
        A 2D NumPy array of partitions.
    index : sequence
        The index for the dataframe. Converted to a ``pandas.Index``.
    columns : sequence
        The columns object for the dataframe. Converted to a ``pandas.Index``.
    row_lengths : list, optional
        The length of each partition in the rows. The "height" of
        each of the block partitions. Is computed if not provided.
    column_widths : list, optional
        The width of each partition in the columns. The "width" of
        each of the block partitions. Is computed if not provided.
    dtypes : pandas.Series, optional
        The data types for the dataframe columns.
    pandas_backend : {"pyarrow", None}, optional
        Backend used by pandas. None - means default NumPy backend.
    """

    _partition_mgr_cls = PandasOnRayDataframePartitionManager

    def _get_lengths(self, parts, axis):
        """
        Get list of  dimensions for all the provided parts.

        Parameters
        ----------
        parts : list
            List of parttions.
        axis : {0, 1}
            The axis along which to get the lengths (0 - length across rows or, 1 - width across columns).

        Returns
        -------
        list
        """
        if axis == Axis.ROW_WISE:
            dims = [part.length(False) for part in parts]
        else:
            dims = [part.width(False) for part in parts]

        return self._partition_mgr_cls.materialize_futures(dims)

    @property
    @_inherit_docstrings(PandasDataframe.engine)
    def engine(self) -> str:
        return "Ray"


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base IO classes optimized for pandas on Ray execution."""

from .io import PandasOnRayIO

__all__ = ["PandasOnRayIO"]


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/io/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module holds the factory which performs I/O using pandas on Ray."""

import io

import numpy as np
import pandas
from pandas.io.common import get_handle, stringify_path
from ray.data import from_pandas_refs

from modin.config import RayTaskCustomResources
from modin.core.execution.ray.common import RayWrapper, SignalActor
from modin.core.execution.ray.generic.io import RayIO
from modin.core.io import (
    CSVDispatcher,
    ExcelDispatcher,
    FeatherDispatcher,
    FWFDispatcher,
    JSONDispatcher,
    ParquetDispatcher,
    SQLDispatcher,
)
from modin.core.storage_formats.pandas.parsers import (
    PandasCSVParser,
    PandasExcelParser,
    PandasFeatherParser,
    PandasFWFParser,
    PandasJSONParser,
    PandasParquetParser,
    PandasSQLParser,
)
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
from modin.distributed.dataframe.pandas.partitions import (
    from_partitions,
    unwrap_partitions,
)
from modin.experimental.core.io import (
    ExperimentalCSVGlobDispatcher,
    ExperimentalCustomTextDispatcher,
    ExperimentalGlobDispatcher,
    ExperimentalSQLDispatcher,
)
from modin.experimental.core.storage_formats.pandas.parsers import (
    ExperimentalCustomTextParser,
    ExperimentalPandasCSVGlobParser,
    ExperimentalPandasJsonParser,
    ExperimentalPandasParquetParser,
    ExperimentalPandasPickleParser,
    ExperimentalPandasXmlParser,
)

from ..dataframe import PandasOnRayDataframe
from ..partitioning import PandasOnRayDataframePartition


class PandasOnRayIO(RayIO):
    """Factory providing methods for performing I/O operations using pandas as storage format on Ray as engine."""

    frame_cls = PandasOnRayDataframe
    frame_partition_cls = PandasOnRayDataframePartition
    query_compiler_cls = PandasQueryCompiler
    build_args = dict(
        frame_partition_cls=PandasOnRayDataframePartition,
        query_compiler_cls=PandasQueryCompiler,
        frame_cls=PandasOnRayDataframe,
        base_io=RayIO,
    )

    def __make_read(*classes, build_args=build_args):
        # used to reduce code duplication
        return type("", (RayWrapper, *classes), build_args).read

    def __make_write(*classes, build_args=build_args):
        # used to reduce code duplication
        return type("", (RayWrapper, *classes), build_args).write

    read_csv = __make_read(PandasCSVParser, CSVDispatcher)
    read_fwf = __make_read(PandasFWFParser, FWFDispatcher)
    read_json = __make_read(PandasJSONParser, JSONDispatcher)
    read_parquet = __make_read(PandasParquetParser, ParquetDispatcher)
    to_parquet = __make_write(ParquetDispatcher)
    # Blocked on pandas-dev/pandas#12236. It is faster to default to pandas.
    # read_hdf = __make_read(PandasHDFParser, HDFReader)
    read_feather = __make_read(PandasFeatherParser, FeatherDispatcher)
    read_sql = __make_read(PandasSQLParser, SQLDispatcher)
    to_sql = __make_write(SQLDispatcher)
    read_excel = __make_read(PandasExcelParser, ExcelDispatcher)

    # experimental methods that don't exist in pandas
    read_csv_glob = __make_read(
        ExperimentalPandasCSVGlobParser, ExperimentalCSVGlobDispatcher
    )
    read_parquet_glob = __make_read(
        ExperimentalPandasParquetParser, ExperimentalGlobDispatcher
    )
    to_parquet_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": RayIO.to_parquet},
    )
    read_json_glob = __make_read(
        ExperimentalPandasJsonParser, ExperimentalGlobDispatcher
    )
    to_json_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": RayIO.to_json},
    )
    read_xml_glob = __make_read(ExperimentalPandasXmlParser, ExperimentalGlobDispatcher)
    to_xml_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": RayIO.to_xml},
    )
    read_pickle_glob = __make_read(
        ExperimentalPandasPickleParser, ExperimentalGlobDispatcher
    )
    to_pickle_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": RayIO.to_pickle},
    )
    read_custom_text = __make_read(
        ExperimentalCustomTextParser, ExperimentalCustomTextDispatcher
    )
    read_sql_distributed = __make_read(
        ExperimentalSQLDispatcher, build_args={**build_args, "base_read": read_sql}
    )

    del __make_read  # to not pollute class namespace
    del __make_write  # to not pollute class namespace

    @staticmethod
    def _to_csv_check_support(kwargs):
        """
        Check if parallel version of ``to_csv`` could be used.

        Parameters
        ----------
        kwargs : dict
            Keyword arguments passed to ``.to_csv()``.

        Returns
        -------
        bool
            Whether parallel version of ``to_csv`` is applicable.
        """
        path_or_buf = kwargs["path_or_buf"]
        compression = kwargs["compression"]
        if not isinstance(path_or_buf, str):
            return False
        # case when the pointer is placed at the beginning of the file.
        if "r" in kwargs["mode"] and "+" in kwargs["mode"]:
            return False
        # encodings with BOM don't support;
        # instead of one mark in result bytes we will have them by the number of partitions
        # so we should fallback in pandas for `utf-16`, `utf-32` with all aliases, in instance
        # (`utf_32_be`, `utf_16_le` and so on)
        if kwargs["encoding"] is not None:
            encoding = kwargs["encoding"].lower()
            if "u" in encoding or "utf" in encoding:
                if "16" in encoding or "32" in encoding:
                    return False
        if compression is None or not compression == "infer":
            return False
        if any((path_or_buf.endswith(ext) for ext in [".gz", ".bz2", ".zip", ".xz"])):
            return False
        return True

    @classmethod
    def to_csv(cls, qc, **kwargs):
        """
        Write records stored in the `qc` to a CSV file.

        Parameters
        ----------
        qc : BaseQueryCompiler
            The query compiler of the Modin dataframe that we want to run ``to_csv`` on.
        **kwargs : dict
            Parameters for ``pandas.to_csv(**kwargs)``.
        """
        kwargs["path_or_buf"] = stringify_path(kwargs["path_or_buf"])
        if not cls._to_csv_check_support(kwargs):
            return RayIO.to_csv(qc, **kwargs)

        signals = SignalActor.options(resources=RayTaskCustomResources.get()).remote(
            len(qc._modin_frame._partitions) + 1
        )

        def func(df, **kw):  # pragma: no cover
            """
            Dump a chunk of rows as csv, then save them to target maintaining order.

            Parameters
            ----------
            df : pandas.DataFrame
                A chunk of rows to write to a CSV file.
            **kw : dict
                Arguments to pass to ``pandas.to_csv(**kw)`` plus an extra argument
                `partition_idx` serving as chunk index to maintain rows order.
            """
            partition_idx = kw["partition_idx"]
            # the copy is made to not implicitly change the input parameters;
            # to write to an intermediate buffer, we need to change `path_or_buf` in kwargs
            csv_kwargs = kwargs.copy()
            if partition_idx != 0:
                # we need to create a new file only for first recording
                # all the rest should be recorded in appending mode
                if "w" in csv_kwargs["mode"]:
                    csv_kwargs["mode"] = csv_kwargs["mode"].replace("w", "a")
                # It is enough to write the header for the first partition
                csv_kwargs["header"] = False

            # for parallelization purposes, each partition is written to an intermediate buffer
            path_or_buf = csv_kwargs["path_or_buf"]
            is_binary = "b" in csv_kwargs["mode"]
            csv_kwargs["path_or_buf"] = io.BytesIO() if is_binary else io.StringIO()
            storage_options = csv_kwargs.pop("storage_options", None)
            df.to_csv(**csv_kwargs)
            csv_kwargs.update({"storage_options": storage_options})
            content = csv_kwargs["path_or_buf"].getvalue()
            csv_kwargs["path_or_buf"].close()

            # each process waits for its turn to write to a file
            RayWrapper.materialize(signals.wait.remote(partition_idx))

            # preparing to write data from the buffer to a file
            with get_handle(
                path_or_buf,
                # in case when using URL in implicit text mode
                # pandas try to open `path_or_buf` in binary mode
                csv_kwargs["mode"] if is_binary else csv_kwargs["mode"] + "t",
                encoding=kwargs["encoding"],
                errors=kwargs["errors"],
                compression=kwargs["compression"],
                storage_options=kwargs.get("storage_options", None),
                is_text=not is_binary,
            ) as handles:
                handles.handle.write(content)

            # signal that the next process can start writing to the file
            RayWrapper.materialize(signals.send.remote(partition_idx + 1))
            # used for synchronization purposes
            return pandas.DataFrame()

        # signaling that the partition with id==0 can be written to the file
        RayWrapper.materialize(signals.send.remote(0))
        # Ensure that the metadata is syncrhonized
        qc._modin_frame._propagate_index_objs(axis=None)
        result = qc._modin_frame._partition_mgr_cls.map_axis_partitions(
            axis=1,
            partitions=qc._modin_frame._partitions,
            map_func=func,
            keep_partitioning=True,
            lengths=None,
            enumerate_partitions=True,
            max_retries=0,
        )
        # pending completion
        RayWrapper.materialize(
            [part.list_of_blocks[0] for row in result for part in row]
        )

    @classmethod
    def from_ray(cls, ray_obj):
        """
        Create a Modin `query_compiler` from a Ray Dataset.

        Parameters
        ----------
        ray_obj : ray.data.Dataset
            The Ray Dataset to convert from.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the Ray Dataset.
        """
        pd_objs = ray_obj.to_pandas_refs()
        return from_partitions(pd_objs, axis=0)._query_compiler

    @classmethod
    def to_ray(cls, modin_obj):
        """
        Convert a Modin DataFrame/Series to a Ray Dataset.

        Parameters
        ----------
        modin_obj : modin.pandas.DataFrame, modin.pandas.Series
            The Modin DataFrame/Series to convert.

        Returns
        -------
        ray.data.Dataset
            Converted object with type depending on input.
        """
        parts = unwrap_partitions(modin_obj, axis=0)
        return from_pandas_refs(parts)

    @classmethod
    def from_map(cls, func, iterable, *args, **kwargs):
        """
        Create a Modin `query_compiler` from a map function.

        This method will construct a Modin `query_compiler` split by row partitions.
        The number of row partitions matches the number of elements in the iterable object.

        Parameters
        ----------
        func : callable
            Function to map across the iterable object.
        iterable : Iterable
            An iterable object.
        *args : tuple
            Positional arguments to pass in `func`.
        **kwargs : dict
            Keyword arguments to pass in `func`.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data returned by map function.
        """
        func = cls.frame_cls._partition_mgr_cls.preprocess_func(func)
        partitions = np.array(
            [
                [
                    cls.frame_partition_cls(
                        RayWrapper.deploy(
                            func, f_args=(obj,) + args, return_pandas_df=True, **kwargs
                        )
                    )
                ]
                for obj in iterable
            ]
        )
        return cls.query_compiler_cls(cls.frame_cls(partitions))


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/partitioning/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe classes related to its partitioning and optimized for pandas on Ray execution."""

from .partition import PandasOnRayDataframePartition
from .partition_manager import PandasOnRayDataframePartitionManager
from .virtual_partition import (
    PandasOnRayDataframeColumnPartition,
    PandasOnRayDataframeRowPartition,
    PandasOnRayDataframeVirtualPartition,
)

__all__ = [
    "PandasOnRayDataframePartition",
    "PandasOnRayDataframePartitionManager",
    "PandasOnRayDataframeVirtualPartition",
    "PandasOnRayDataframeColumnPartition",
    "PandasOnRayDataframeRowPartition",
]


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that wraps data (block partition) and its metadata."""

from typing import Callable, Union

import pandas
import ray

from modin.config import LazyExecution, RayTaskCustomResources
from modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition
from modin.core.execution.ray.common import MaterializationHook, RayWrapper
from modin.core.execution.ray.common.deferred_execution import (
    DeferredExecution,
    MetaList,
    MetaListHook,
)
from modin.core.execution.ray.common.utils import ObjectIDType
from modin.logging import disable_logging, get_logger
from modin.pandas.indexing import compute_sliced_len
from modin.utils import _inherit_docstrings


class PandasOnRayDataframePartition(PandasDataframePartition):
    """
    The class implements the interface in ``PandasDataframePartition``.

    Parameters
    ----------
    data : ObjectIDType or DeferredExecution
        A reference to ``pandas.DataFrame`` that needs to be wrapped with this class
        or a reference to DeferredExecution that needs to be executed on demand.
    length : ObjectIDType or int, optional
        Length or reference to it of wrapped ``pandas.DataFrame``.
    width : ObjectIDType or int, optional
        Width or reference to it of wrapped ``pandas.DataFrame``.
    ip : ObjectIDType or str, optional
        Node IP address or reference to it that holds wrapped ``pandas.DataFrame``.
    meta : MetaList
        Meta information, containing the lengths and the worker address (the last value).
    meta_offset : int
        The lengths offset in the meta list.
    """

    execution_wrapper = RayWrapper

    def __init__(
        self,
        data: Union[ray.ObjectRef, DeferredExecution],
        length: int = None,
        width: int = None,
        ip: str = None,
        meta: MetaList = None,
        meta_offset: int = 0,
    ):
        super().__init__()
        if isinstance(data, DeferredExecution):
            data.subscribe()
        self._data_ref = data
        # The metadata is stored in the MetaList at 0 offset. If the data is
        # a DeferredExecution, the _meta will be replaced with the list, returned
        # by the remote function. The returned list may contain data for multiple
        # results and, in this case, _meta_offset corresponds to the meta related to
        # this partition.
        if meta is None:
            self._meta = MetaList([length, width, ip])
            self._meta_offset = 0
        else:
            self._meta = meta
            self._meta_offset = meta_offset

        log = get_logger()
        self._is_debug(log) and log.debug(
            "Partition ID: {}, Height: {}, Width: {}, Node IP: {}".format(
                self._identity,
                str(self._length_cache),
                str(self._width_cache),
                str(self._ip_cache),
            )
        )

    @disable_logging
    def __del__(self):
        """Unsubscribe from DeferredExecution."""
        if isinstance(self._data_ref, DeferredExecution):
            self._data_ref.unsubscribe()

    def apply(self, func: Union[Callable, ray.ObjectRef], *args, **kwargs):
        """
        Apply a function to the object wrapped by this partition.

        Parameters
        ----------
        func : callable or ray.ObjectRef
            A function to apply.
        *args : iterable
            Additional positional arguments to be passed in `func`.
        **kwargs : dict
            Additional keyword arguments to be passed in `func`.

        Returns
        -------
        PandasOnRayDataframePartition
            A new ``PandasOnRayDataframePartition`` object.

        Notes
        -----
        It does not matter if `func` is callable or an ``ray.ObjectRef``. Ray will
        handle it correctly either way. The keyword arguments are sent as a dictionary.
        """
        log = get_logger()
        self._is_debug(log) and log.debug(f"ENTER::Partition.apply::{self._identity}")
        de = DeferredExecution(self._data_ref, func, args, kwargs)
        data, meta, meta_offset = de.exec()
        self._is_debug(log) and log.debug(f"EXIT::Partition.apply::{self._identity}")
        return self.__constructor__(data, meta=meta, meta_offset=meta_offset)

    @_inherit_docstrings(PandasDataframePartition.add_to_apply_calls)
    def add_to_apply_calls(
        self,
        func: Union[Callable, ray.ObjectRef],
        *args,
        length=None,
        width=None,
        **kwargs,
    ):
        return self.__constructor__(
            data=DeferredExecution(self._data_ref, func, args, kwargs),
            length=length,
            width=width,
        )

    @_inherit_docstrings(PandasDataframePartition.drain_call_queue)
    def drain_call_queue(self):
        data = self._data_ref
        if not isinstance(data, DeferredExecution):
            return data

        log = get_logger()
        self._is_debug(log) and log.debug(
            f"ENTER::Partition.drain_call_queue::{self._identity}"
        )
        self._data_ref, self._meta, self._meta_offset = data.exec()
        self._is_debug(log) and log.debug(
            f"EXIT::Partition.drain_call_queue::{self._identity}"
        )

    @_inherit_docstrings(PandasDataframePartition.wait)
    def wait(self):
        self.drain_call_queue()
        RayWrapper.wait(self._data_ref)

    def __copy__(self):
        """
        Create a copy of this partition.

        Returns
        -------
        PandasOnRayDataframePartition
            A copy of this partition.
        """
        return self.__constructor__(
            self._data_ref,
            meta=self._meta,
            meta_offset=self._meta_offset,
        )

    def mask(self, row_labels, col_labels):
        """
        Lazily create a mask that extracts the indices provided.

        Parameters
        ----------
        row_labels : list-like, slice or label
            The row labels for the rows to extract.
        col_labels : list-like, slice or label
            The column labels for the columns to extract.

        Returns
        -------
        PandasOnRayDataframePartition
            A new ``PandasOnRayDataframePartition`` object.
        """
        log = get_logger()
        self._is_debug(log) and log.debug(f"ENTER::Partition.mask::{self._identity}")
        new_obj = super().mask(row_labels, col_labels)
        if isinstance(row_labels, slice) and isinstance(
            (len_cache := self._length_cache), ObjectIDType
        ):
            if row_labels == slice(None):
                # fast path - full axis take
                new_obj._length_cache = len_cache
            else:
                new_obj._length_cache = SlicerHook(len_cache, row_labels)
        if isinstance(col_labels, slice) and isinstance(
            (width_cache := self._width_cache), ObjectIDType
        ):
            if col_labels == slice(None):
                # fast path - full axis take
                new_obj._width_cache = width_cache
            else:
                new_obj._width_cache = SlicerHook(width_cache, col_labels)
        self._is_debug(log) and log.debug(f"EXIT::Partition.mask::{self._identity}")
        return new_obj

    @classmethod
    def put(cls, obj: pandas.DataFrame):
        """
        Put the data frame into Plasma store and wrap it with partition object.

        Parameters
        ----------
        obj : pandas.DataFrame
            A data frame to be put.

        Returns
        -------
        PandasOnRayDataframePartition
            A new ``PandasOnRayDataframePartition`` object.
        """
        return cls(cls.execution_wrapper.put(obj), len(obj.index), len(obj.columns))

    @classmethod
    def preprocess_func(cls, func):
        """
        Put a function into the Plasma store to use in ``apply``.

        Parameters
        ----------
        func : callable
            A function to preprocess.

        Returns
        -------
        ray.ObjectRef
            A reference to `func`.
        """
        return cls.execution_wrapper.put(func)

    def length(self, materialize=True):
        """
        Get the length of the object wrapped by this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int or ray.ObjectRef
            The length of the object.
        """
        if (length := self._length_cache) is None:
            self.drain_call_queue()
            if (length := self._length_cache) is None:
                length, self._width_cache = _get_index_and_columns.options(
                    resources=RayTaskCustomResources.get()
                ).remote(self._data_ref)
                self._length_cache = length
        if materialize and isinstance(length, ObjectIDType):
            self._length_cache = length = RayWrapper.materialize(length)
        return length

    def width(self, materialize=True):
        """
        Get the width of the object wrapped by the partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int or ray.ObjectRef
            The width of the object.
        """
        if (width := self._width_cache) is None:
            self.drain_call_queue()
            if (width := self._width_cache) is None:
                self._length_cache, width = _get_index_and_columns.options(
                    resources=RayTaskCustomResources.get()
                ).remote(self._data_ref)
                self._width_cache = width
        if materialize and isinstance(width, ObjectIDType):
            self._width_cache = width = RayWrapper.materialize(width)
        return width

    def ip(self, materialize=True):
        """
        Get the node IP address of the object wrapped by this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        str
            IP address of the node that holds the data.
        """
        if (ip := self._ip_cache) is None:
            self.drain_call_queue()
        if materialize and isinstance(ip, ObjectIDType):
            self._ip_cache = ip = RayWrapper.materialize(ip)
        return ip

    @property
    def _data(self) -> ray.ObjectRef:  # noqa: GL08
        self.drain_call_queue()
        return self._data_ref

    @property
    def _length_cache(self):  # noqa: GL08
        return self._meta[self._meta_offset]

    @_length_cache.setter
    def _length_cache(self, value):  # noqa: GL08
        self._meta[self._meta_offset] = value

    @property
    def _width_cache(self):  # noqa: GL08
        return self._meta[self._meta_offset + 1]

    @_width_cache.setter
    def _width_cache(self, value):  # noqa: GL08
        self._meta[self._meta_offset + 1] = value

    @property
    def _ip_cache(self):  # noqa: GL08
        return self._meta[-1]

    @_ip_cache.setter
    def _ip_cache(self, value):  # noqa: GL08
        self._meta[-1] = value


@ray.remote(num_returns=2)
def _get_index_and_columns(df):  # pragma: no cover
    """
    Get the number of rows and columns of a pandas DataFrame.

    Parameters
    ----------
    df : pandas.DataFrame
        A pandas DataFrame which dimensions are needed.

    Returns
    -------
    int
        The number of rows.
    int
        The number of columns.
    """
    return len(df.index), len(df.columns)


PandasOnRayDataframePartition._eager_exec_func = PandasOnRayDataframePartition.apply
PandasOnRayDataframePartition._lazy_exec_func = (
    PandasOnRayDataframePartition.add_to_apply_calls
)


def _configure_lazy_exec(cls: LazyExecution):
    """Configure lazy execution mode for PandasOnRayDataframePartition."""
    mode = cls.get()
    get_logger().debug(f"Ray lazy execution mode: {mode}")
    if mode == "Auto":
        PandasOnRayDataframePartition.apply = (
            PandasOnRayDataframePartition._eager_exec_func
        )
        PandasOnRayDataframePartition.add_to_apply_calls = (
            PandasOnRayDataframePartition._lazy_exec_func
        )
    elif mode == "On":

        def lazy_exec(self, func, *args, **kwargs):
            return self._lazy_exec_func(func, *args, length=None, width=None, **kwargs)

        PandasOnRayDataframePartition.apply = lazy_exec
        PandasOnRayDataframePartition.add_to_apply_calls = (
            PandasOnRayDataframePartition._lazy_exec_func
        )
    elif mode == "Off":

        def eager_exec(self, func, *args, length=None, width=None, **kwargs):
            return self._eager_exec_func(func, *args, **kwargs)

        PandasOnRayDataframePartition.apply = (
            PandasOnRayDataframePartition._eager_exec_func
        )
        PandasOnRayDataframePartition.add_to_apply_calls = eager_exec
    else:
        raise ValueError(f"Invalid lazy execution mode: {mode}")


LazyExecution.subscribe(_configure_lazy_exec)


class SlicerHook(MaterializationHook):
    """
    Used by mask() for the slilced length computation.

    Parameters
    ----------
    ref : ObjectIDType
        Non-materialized length to be sliced.
    slc : slice
        The slice to be applied.
    """

    def __init__(self, ref: ObjectIDType, slc: slice):
        self.ref = ref
        self.slc = slc

    def pre_materialize(self):
        """
        Get the sliced length or object ref if not materialized.

        Returns
        -------
        int or ObjectIDType
        """
        if isinstance(self.ref, MetaListHook):
            len_or_ref = self.ref.pre_materialize()
            return (
                compute_sliced_len(self.slc, len_or_ref)
                if isinstance(len_or_ref, int)
                else len_or_ref
            )
        return self.ref

    def post_materialize(self, materialized):
        """
        Get the sliced length.

        Parameters
        ----------
        materialized : list or int

        Returns
        -------
        int
        """
        if isinstance(self.ref, MetaListHook):
            materialized = self.ref.post_materialize(materialized)
        return compute_sliced_len(self.slc, materialized)


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition_manager.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that implements ``GenericRayDataframePartitionManager`` using Ray."""

import numpy as np
from pandas.core.dtypes.common import is_numeric_dtype

from modin.config import AsyncReadMode
from modin.core.execution.modin_aqp import progress_bar_wrapper
from modin.core.execution.ray.common import RayWrapper
from modin.core.execution.ray.generic.partitioning import (
    GenericRayDataframePartitionManager,
)
from modin.logging import get_logger
from modin.utils import _inherit_docstrings

from .partition import PandasOnRayDataframePartition
from .virtual_partition import (
    PandasOnRayDataframeColumnPartition,
    PandasOnRayDataframeRowPartition,
)


class PandasOnRayDataframePartitionManager(GenericRayDataframePartitionManager):
    """The class implements the interface in `PandasDataframePartitionManager`."""

    # This object uses RayRemotePartition objects as the underlying store.
    _partition_class = PandasOnRayDataframePartition
    _column_partitions_class = PandasOnRayDataframeColumnPartition
    _row_partition_class = PandasOnRayDataframeRowPartition
    _execution_wrapper = RayWrapper
    materialize_futures = RayWrapper.materialize

    @classmethod
    def wait_partitions(cls, partitions):
        """
        Wait on the objects wrapped by `partitions` in parallel, without materializing them.

        This method will block until all computations in the list have completed.

        Parameters
        ----------
        partitions : np.ndarray
            NumPy array with ``PandasDataframePartition``-s.
        """
        RayWrapper.wait(
            [block for partition in partitions for block in partition.list_of_blocks]
        )

    @classmethod
    @_inherit_docstrings(
        GenericRayDataframePartitionManager.split_pandas_df_into_partitions
    )
    def split_pandas_df_into_partitions(
        cls, df, row_chunksize, col_chunksize, update_bar
    ):
        # it was found out, that with the following condition it's more beneficial
        # to use the distributed splitting, let's break them down:
        #   1. The distributed splitting is used only when there's more than 6mln elements
        #   in the `df`, as with fewer data it's better to use the sequential splitting
        #   2. Only used with numerical data, as with other dtypes, putting the whole big
        #   dataframe into the storage takes too much time.
        #   3. The distributed splitting consumes more memory that the sequential one.
        #   It was estimated that it requires ~2.5x of the dataframe size, for now there
        #   was no good way found to automatically fall back to the sequential
        #   implementation in case of not enough memory, so currently we're enabling
        #   the distributed version only if 'AsyncReadMode' is set to True. Follow this
        #   discussion for more info on why automatical dispatching is hard:
        #   https://github.com/modin-project/modin/pull/6640#issuecomment-1759932664
        enough_elements = (len(df) * len(df.columns)) > 6_000_000
        all_numeric_types = all(is_numeric_dtype(dtype) for dtype in df.dtypes)
        async_mode_on = AsyncReadMode.get()

        distributed_splitting = enough_elements and all_numeric_types and async_mode_on

        log = get_logger()

        if not distributed_splitting:
            log.info(
                "Using sequential splitting in '.from_pandas()' because of some of the conditions are False: "
                + f"{enough_elements=}; {all_numeric_types=}; {async_mode_on=}"
            )
            return super().split_pandas_df_into_partitions(
                df, row_chunksize, col_chunksize, update_bar
            )

        log.info("Using distributed splitting in '.from_pandas()'")
        put_func = cls._partition_class.put

        def mask(part, row_loc, col_loc):
            # 2D iloc works surprisingly slow, so doing this chained iloc calls:
            # https://github.com/pandas-dev/pandas/issues/55202
            return part.apply(lambda df: df.iloc[row_loc, :].iloc[:, col_loc])

        main_part = put_func(df)
        parts = [
            [
                update_bar(
                    mask(
                        main_part,
                        slice(i, i + row_chunksize),
                        slice(j, j + col_chunksize),
                    ),
                )
                for j in range(0, len(df.columns), col_chunksize)
            ]
            for i in range(0, len(df), row_chunksize)
        ]
        return np.array(parts)


def _make_wrapped_method(name: str):
    """
    Define new attribute that should work with progress bar.

    Parameters
    ----------
    name : str
        Name of `GenericRayDataframePartitionManager` attribute that should be reused.

    Notes
    -----
    - `classmethod` decorator shouldn't be applied twice, so we refer to `__func__` attribute.
    - New attribute is defined for `PandasOnRayDataframePartitionManager`.
    """
    setattr(
        PandasOnRayDataframePartitionManager,
        name,
        classmethod(
            progress_bar_wrapper(
                getattr(GenericRayDataframePartitionManager, name).__func__
            )
        ),
    )


for method in (
    "map_partitions",
    "lazy_map_partitions",
    "map_axis_partitions",
    "_apply_func_to_list_of_partitions",
    "apply_func_to_select_indices",
    "apply_func_to_select_indices_along_full_axis",
    "apply_func_to_indices_both_axis",
    "n_ary_operation",
):
    _make_wrapped_method(method)


================================================
FILE: modin/core/execution/ray/implementations/pandas_on_ray/partitioning/virtual_partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses classes responsible for storing a virtual partition and applying a function to it."""

import pandas
import ray
from ray.util import get_node_ip_address

from modin.config import RayTaskCustomResources
from modin.core.dataframe.pandas.partitioning.axis_partition import (
    PandasDataframeAxisPartition,
)
from modin.core.execution.ray.common import RayWrapper
from modin.utils import _inherit_docstrings

from .partition import PandasOnRayDataframePartition


class PandasOnRayDataframeVirtualPartition(PandasDataframeAxisPartition):
    """
    The class implements the interface in ``PandasDataframeAxisPartition``.

    Parameters
    ----------
    list_of_partitions : Union[list, PandasOnRayDataframePartition]
        List of ``PandasOnRayDataframePartition`` and
        ``PandasOnRayDataframeVirtualPartition`` objects, or a single
        ``PandasOnRayDataframePartition``.
    get_ip : bool, default: False
        Whether to get node IP addresses to conforming partitions or not.
    full_axis : bool, default: True
        Whether or not the virtual partition encompasses the whole axis.
    call_queue : list, optional
        A list of tuples (callable, args, kwargs) that contains deferred calls.
    length : ray.ObjectRef or int, optional
        Length, or reference to length, of wrapped ``pandas.DataFrame``.
    width : ray.ObjectRef or int, optional
        Width, or reference to width, of wrapped ``pandas.DataFrame``.
    """

    _PARTITIONS_METADATA_LEN = 3  # (length, width, ip)
    partition_type = PandasOnRayDataframePartition
    axis = None

    # these variables are intentionally initialized at runtime (see #6023)
    _DEPLOY_AXIS_FUNC = None
    _DEPLOY_SPLIT_FUNC = None
    _DRAIN_FUNC = None

    @classmethod
    def _get_deploy_axis_func(cls):  # noqa: GL08
        if cls._DEPLOY_AXIS_FUNC is None:
            cls._DEPLOY_AXIS_FUNC = RayWrapper.put(
                PandasDataframeAxisPartition.deploy_axis_func
            )
        return cls._DEPLOY_AXIS_FUNC

    @classmethod
    def _get_deploy_split_func(cls):  # noqa: GL08
        if cls._DEPLOY_SPLIT_FUNC is None:
            cls._DEPLOY_SPLIT_FUNC = RayWrapper.put(
                PandasDataframeAxisPartition.deploy_splitting_func
            )
        return cls._DEPLOY_SPLIT_FUNC

    @classmethod
    def _get_drain_func(cls):  # noqa: GL08
        if cls._DRAIN_FUNC is None:
            cls._DRAIN_FUNC = RayWrapper.put(PandasDataframeAxisPartition.drain)
        return cls._DRAIN_FUNC

    @property
    def list_of_ips(self):
        """
        Get the IPs holding the physical objects composing this partition.

        Returns
        -------
        List
            A list of IPs as ``ray.ObjectRef`` or str.
        """
        # Defer draining call queue until we get the ip address
        result = [None] * len(self.list_of_block_partitions)
        for idx, partition in enumerate(self.list_of_block_partitions):
            partition.drain_call_queue()
            result[idx] = partition.ip(materialize=False)
        return result

    @classmethod
    @_inherit_docstrings(PandasDataframeAxisPartition.deploy_splitting_func)
    def deploy_splitting_func(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        *partitions,
        extract_metadata=False,
    ):
        return _deploy_ray_func.options(
            num_returns=(
                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
                if extract_metadata
                else num_splits
            ),
            resources=RayTaskCustomResources.get(),
        ).remote(
            cls._get_deploy_split_func(),
            *f_args,
            num_splits,
            *partitions,
            axis=axis,
            f_to_deploy=func,
            f_len_args=len(f_args),
            f_kwargs=f_kwargs,
            extract_metadata=extract_metadata,
        )

    @classmethod
    def deploy_axis_func(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        maintain_partitioning,
        *partitions,
        min_block_size,
        lengths=None,
        manual_partition=False,
        max_retries=None,
    ):
        """
        Deploy a function along a full axis.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        func : callable
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to ``func``.
        f_kwargs : dict
            Keyword arguments to pass to ``func``.
        num_splits : int
            The number of splits to return (see ``split_result_of_axis_func_pandas``).
        maintain_partitioning : bool
            If True, keep the old partitioning if possible.
            If False, create a new partition layout.
        *partitions : iterable
            All partitions that make up the full axis (row or column).
        min_block_size : int
            Minimum number of rows/columns in a single split.
        lengths : list, optional
            The list of lengths to shuffle the object.
        manual_partition : bool, default: False
            If True, partition the result with `lengths`.
        max_retries : int, default: None
            The max number of times to retry the func.

        Returns
        -------
        list
            A list of ``ray.ObjectRef``-s.
        """
        return _deploy_ray_func.options(
            num_returns=(num_splits if lengths is None else len(lengths))
            * (1 + cls._PARTITIONS_METADATA_LEN),
            **({"max_retries": max_retries} if max_retries is not None else {}),
            resources=RayTaskCustomResources.get(),
        ).remote(
            cls._get_deploy_axis_func(),
            *f_args,
            num_splits,
            maintain_partitioning,
            *partitions,
            axis=axis,
            f_to_deploy=func,
            f_len_args=len(f_args),
            f_kwargs=f_kwargs,
            manual_partition=manual_partition,
            min_block_size=min_block_size,
            lengths=lengths,
            return_generator=True,
        )

    @classmethod
    def deploy_func_between_two_axis_partitions(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        len_of_left,
        other_shape,
        *partitions,
        min_block_size,
    ):
        """
        Deploy a function along a full axis between two data sets.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        func : callable
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to ``func``.
        f_kwargs : dict
            Keyword arguments to pass to ``func``.
        num_splits : int
            The number of splits to return (see ``split_result_of_axis_func_pandas``).
        len_of_left : int
            The number of values in `partitions` that belong to the left data set.
        other_shape : np.ndarray
            The shape of right frame in terms of partitions, i.e.
            (other_shape[i-1], other_shape[i]) will indicate slice to restore i-1 axis partition.
        *partitions : iterable
            All partitions that make up the full axis (row or column) for both data sets.
        min_block_size : int
            Minimum number of rows/columns in a single split.

        Returns
        -------
        list
            A list of ``ray.ObjectRef``-s.
        """
        return _deploy_ray_func.options(
            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN),
            resources=RayTaskCustomResources.get(),
        ).remote(
            PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions,
            *f_args,
            num_splits,
            len_of_left,
            other_shape,
            *partitions,
            axis=axis,
            f_to_deploy=func,
            f_len_args=len(f_args),
            f_kwargs=f_kwargs,
            min_block_size=min_block_size,
            return_generator=True,
        )

    def wait(self):
        """Wait completing computations on the object wrapped by the partition."""
        self.drain_call_queue()
        futures = self.list_of_blocks
        RayWrapper.wait(futures)


@_inherit_docstrings(PandasOnRayDataframeVirtualPartition)
class PandasOnRayDataframeColumnPartition(PandasOnRayDataframeVirtualPartition):
    axis = 0


@_inherit_docstrings(PandasOnRayDataframeVirtualPartition)
class PandasOnRayDataframeRowPartition(PandasOnRayDataframeVirtualPartition):
    axis = 1


@ray.remote
def _deploy_ray_func(
    deployer,
    *positional_args,
    axis,
    f_to_deploy,
    f_len_args,
    f_kwargs,
    extract_metadata=True,
    **kwargs,
):  # pragma: no cover
    """
    Execute a function on an axis partition in a worker process.

    This is ALWAYS called on either ``PandasDataframeAxisPartition.deploy_axis_func``
    or ``PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions``, which both
    serve to deploy another dataframe function on a Ray worker process. The provided `positional_args`
    contains positional arguments for both: `deployer` and for `f_to_deploy`, the parameters can be separated
    using the `f_len_args` value. The parameters are combined so they will be deserialized by Ray before the
    kernel is executed (`f_kwargs` will never contain more Ray objects, and thus does not require deserialization).

    Parameters
    ----------
    deployer : callable
        A `PandasDataFrameAxisPartition.deploy_*` method that will call ``f_to_deploy``.
    *positional_args : list
        The first `f_len_args` elements in this list represent positional arguments
        to pass to the `f_to_deploy`. The rest are positional arguments that will be
        passed to `deployer`.
    axis : {0, 1}
        The axis to perform the function along. This argument is keyword only.
    f_to_deploy : callable or RayObjectID
        The function to deploy. This argument is keyword only.
    f_len_args : int
        Number of positional arguments to pass to ``f_to_deploy``. This argument is keyword only.
    f_kwargs : dict
        Keyword arguments to pass to ``f_to_deploy``. This argument is keyword only.
    extract_metadata : bool, default: True
        Whether to return metadata (length, width, ip) of the result. Passing `False` may relax
        the load on object storage as the remote function would return 4 times fewer futures.
        Passing `False` makes sense for temporary results where you know for sure that the
        metadata will never be requested. This argument is keyword only.
    **kwargs : dict
        Keyword arguments to pass to ``deployer``.

    Returns
    -------
    list : Union[tuple, list]
        The result of the function call, and metadata for it.

    Notes
    -----
    Ray functions are not detected by codecov (thus pragma: no cover).
    """
    f_args = positional_args[:f_len_args]
    deploy_args = positional_args[f_len_args:]
    result = deployer(axis, f_to_deploy, f_args, f_kwargs, *deploy_args, **kwargs)

    if not extract_metadata:
        for item in result:
            yield item
    else:
        ip = get_node_ip_address()
        for r in result:
            if isinstance(r, pandas.DataFrame):
                for item in [r, len(r), len(r.columns), ip]:
                    yield item
            else:
                for item in [r, None, None, ip]:
                    yield item


================================================
FILE: modin/core/execution/unidist/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to unidist execution engine."""


================================================
FILE: modin/core/execution/unidist/common/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Common utilities for unidist execution engine."""

from .engine_wrapper import SignalActor, UnidistWrapper
from .utils import initialize_unidist

__all__ = [
    "initialize_unidist",
    "UnidistWrapper",
    "SignalActor",
]


================================================
FILE: modin/core/execution/unidist/common/engine_wrapper.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
The module with helper mixin for executing functions remotely.

To be used as a piece of building a unidist-based engine.
"""

import asyncio

import pandas
import unidist


@unidist.remote
def _deploy_unidist_func(
    func, *args, return_pandas_df=None, **kwargs
):  # pragma: no cover
    """
    Wrap `func` to ease calling it remotely.

    Parameters
    ----------
    func : callable
        A local function that we want to call remotely.
    *args : iterable
        Positional arguments to pass to `func` when calling remotely.
    return_pandas_df : bool, optional
        Whether to convert the result of `func` to a pandas DataFrame or not.
    **kwargs : dict
        Keyword arguments to pass to `func` when calling remotely.

    Returns
    -------
    unidist.ObjectRef or list[unidist.ObjectRef]
        Unidist identifier of the result being put to object store.
    """
    result = func(*args, **kwargs)
    if return_pandas_df and not isinstance(result, pandas.DataFrame):
        result = pandas.DataFrame(result)
    return result


class UnidistWrapper:
    """Mixin that provides means of running functions remotely and getting local results."""

    @classmethod
    def deploy(
        cls, func, f_args=None, f_kwargs=None, return_pandas_df=None, num_returns=1
    ):
        """
        Run local `func` remotely.

        Parameters
        ----------
        func : callable or unidist.ObjectRef
            The function to perform.
        f_args : list or tuple, optional
            Positional arguments to pass to ``func``.
        f_kwargs : dict, optional
            Keyword arguments to pass to ``func``.
        return_pandas_df : bool, optional
            Whether to convert the result of `func` to a pandas DataFrame or not.
        num_returns : int, default: 1
            Amount of return values expected from `func`.

        Returns
        -------
        unidist.ObjectRef or list
            Unidist identifier of the result being put to object store.
        """
        args = [] if f_args is None else f_args
        kwargs = {} if f_kwargs is None else f_kwargs
        return _deploy_unidist_func.options(num_returns=num_returns).remote(
            func, *args, return_pandas_df=return_pandas_df, **kwargs
        )

    @classmethod
    def is_future(cls, item):
        """
        Check if the item is a Future.

        Parameters
        ----------
        item : unidist.ObjectRef or object
            Future or object to check.

        Returns
        -------
        boolean
            If the value is a future.
        """
        return unidist.is_object_ref(item)

    @classmethod
    def materialize(cls, obj_id):
        """
        Get the value of object from the object store.

        Parameters
        ----------
        obj_id : unidist.ObjectRef
            Unidist object identifier to get the value by.

        Returns
        -------
        object
            Whatever was identified by `obj_id`.
        """
        return unidist.get(obj_id)

    @classmethod
    def put(cls, data, **kwargs):
        """
        Put data into the object store.

        Parameters
        ----------
        data : object
            Data to be put.
        **kwargs : dict
            Additional keyword arguments (mostly for compatibility).

        Returns
        -------
        unidist.ObjectRef
            A reference to `data`.
        """
        return unidist.put(data)

    @classmethod
    def wait(cls, obj_ids, num_returns=None):
        """
        Wait on the objects without materializing them (blocking operation).

        ``unidist.wait`` assumes a list of unique object references: see
        https://github.com/modin-project/modin/issues/5045

        Parameters
        ----------
        obj_ids : list, scalar
        num_returns : int, optional
        """
        if not isinstance(obj_ids, list):
            obj_ids = [obj_ids]
        unique_ids = list(set(obj_ids))
        if num_returns is None:
            num_returns = len(unique_ids)
        if num_returns > 0:
            unidist.wait(unique_ids, num_returns=num_returns)


@unidist.remote
class SignalActor:  # pragma: no cover
    """
    Help synchronize across tasks and actors on cluster.

    Parameters
    ----------
    event_count : int
        Number of events required for synchronization.

    Notes
    -----
    For details see: https://docs.ray.io/en/latest/advanced.html?highlight=signalactor#multi-node-synchronization-using-an-actor.
    """

    def __init__(self, event_count: int):
        self.events = [asyncio.Event() for _ in range(event_count)]

    def send(self, event_idx: int):
        """
        Indicate that event with `event_idx` has occurred.

        Parameters
        ----------
        event_idx : int
        """
        self.events[event_idx].set()

    async def wait(self, event_idx: int):
        """
        Wait until event with `event_idx` has occurred.

        Parameters
        ----------
        event_idx : int
        """
        await self.events[event_idx].wait()


================================================
FILE: modin/core/execution/unidist/common/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module holds utility and initialization routines for Modin on unidist."""

import unidist
import unidist.config as unidist_cfg

import modin.config as modin_cfg

from .engine_wrapper import UnidistWrapper


def initialize_unidist():
    """
    Initialize unidist based on ``modin.config`` variables and internal defaults.
    """

    if unidist_cfg.Backend.get() != "mpi":
        raise RuntimeError(
            f"Modin only supports MPI through unidist for now, got unidist backend '{unidist_cfg.Backend.get()}'"
        )

    if not unidist.is_initialized():
        modin_cfg.CpuCount.subscribe(
            lambda cpu_count: unidist_cfg.CpuCount.put(cpu_count.get())
        )
        unidist_cfg.MpiRuntimeEnv.put(
            {"env_vars": {"PYTHONWARNINGS": "ignore::FutureWarning"}}
        )
        unidist.init()

    num_cpus = sum(v["CPU"] for v in unidist.cluster_resources().values())
    modin_cfg.NPartitions._put(num_cpus)
    modin_cfg.CpuCount._put(num_cpus)


def deserialize(obj):  # pragma: no cover
    """
    Deserialize a unidist object.

    Parameters
    ----------
    obj : unidist.ObjectRef, iterable of unidist.ObjectRef, or mapping of keys to unidist.ObjectRef
        Object(s) to deserialize.

    Returns
    -------
    obj
        The deserialized object(s).
    """
    if unidist.is_object_ref(obj):
        return UnidistWrapper.materialize(obj)
    elif isinstance(obj, (tuple, list)):
        # Unidist will error if any elements are not ObjectRef, but we still want unidist to
        # perform batch deserialization for us -- thus, we must submit only the list elements
        # that are ObjectRef, deserialize them, and restore them to their correct list index
        ref_indices, refs = [], []
        for i, unidist_ref in enumerate(obj):
            if unidist.is_object_ref(unidist_ref):
                ref_indices.append(i)
                refs.append(unidist_ref)
        unidist_result = UnidistWrapper.materialize(refs)
        new_lst = list(obj)
        for i, deser_item in zip(ref_indices, unidist_result):
            new_lst[i] = deser_item
        # Check that all objects have been deserialized
        assert not any(unidist.is_object_ref(o) for o in new_lst)
        return new_lst
    elif isinstance(obj, dict) and any(
        unidist.is_object_ref(val) for val in obj.values()
    ):
        return dict(zip(obj.keys(), deserialize(tuple(obj.values()))))
    else:
        return obj


================================================
FILE: modin/core/execution/unidist/generic/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Generic functionality for unidist execution engine."""


================================================
FILE: modin/core/execution/unidist/generic/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Generic IO functionality for unidist execution engine."""

from .io import UnidistIO

__all__ = ["UnidistIO"]


================================================
FILE: modin/core/execution/unidist/generic/io/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module holds base class implementing required I/O over unidist."""

from modin.core.io import BaseIO


class UnidistIO(BaseIO):
    """Base class for doing I/O operations over unidist."""


================================================
FILE: modin/core/execution/unidist/generic/partitioning/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Generic partitioning functionality for unidist execution engine."""

from .partition_manager import GenericUnidistDataframePartitionManager

__all__ = [
    "GenericUnidistDataframePartitionManager",
]


================================================
FILE: modin/core/execution/unidist/generic/partitioning/partition_manager.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module holds Modin partition manager implemented for unidist."""

import numpy as np

from modin.core.dataframe.pandas.partitioning.partition_manager import (
    PandasDataframePartitionManager,
)
from modin.core.execution.unidist.common import UnidistWrapper


class GenericUnidistDataframePartitionManager(PandasDataframePartitionManager):
    """The class implements the interface in `PandasDataframePartitionManager`."""

    @classmethod
    def to_numpy(cls, partitions, **kwargs):
        """
        Convert `partitions` into a NumPy array.

        Parameters
        ----------
        partitions : NumPy array
            A 2-D array of partitions to convert to local NumPy array.
        **kwargs : dict
            Keyword arguments to pass to each partition ``.to_numpy()`` call.

        Returns
        -------
        NumPy array
        """
        if partitions.shape[1] == 1:
            parts = cls.get_objects_from_partitions(partitions.flatten())
            parts = [part.to_numpy(**kwargs) for part in parts]
        else:
            parts = UnidistWrapper.materialize(
                [
                    obj.apply(
                        lambda df, **kwargs: df.to_numpy(**kwargs)
                    ).list_of_blocks[0]
                    for row in partitions
                    for obj in row
                ]
            )
        rows, cols = partitions.shape
        parts = [parts[i * cols : (i + 1) * cols] for i in range(rows)]
        return np.block(parts)


================================================
FILE: modin/core/execution/unidist/implementations/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental Modin's functionality related to unidist execution engine and optimized for specific storage formats."""


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to unidist execution engine and optimized for pandas storage format."""


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe class optimized for pandas on unidist execution."""

from .dataframe import PandasOnUnidistDataframe

__all__ = ["PandasOnUnidistDataframe"]


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/dataframe/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that implements ``PandasDataframe`` using unidist."""

from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
from modin.utils import _inherit_docstrings

from ..partitioning.partition_manager import PandasOnUnidistDataframePartitionManager


class PandasOnUnidistDataframe(PandasDataframe):
    """
    The class implements the interface in ``PandasDataframe`` using unidist.

    Parameters
    ----------
    partitions : np.ndarray
        A 2D NumPy array of partitions.
    index : sequence
        The index for the dataframe. Converted to a ``pandas.Index``.
    columns : sequence
        The columns object for the dataframe. Converted to a ``pandas.Index``.
    row_lengths : list, optional
        The length of each partition in the rows. The "height" of
        each of the block partitions. Is computed if not provided.
    column_widths : list, optional
        The width of each partition in the columns. The "width" of
        each of the block partitions. Is computed if not provided.
    dtypes : pandas.Series, optional
        The data types for the dataframe columns.
    pandas_backend : {"pyarrow", None}, optional
        Backend used by pandas. None - means default NumPy backend.
    """

    _partition_mgr_cls = PandasOnUnidistDataframePartitionManager

    def support_materialization_in_worker_process(self) -> bool:
        # more details why this is not `True` in https://github.com/modin-project/modin/pull/6673
        return False

    @property
    @_inherit_docstrings(PandasDataframe.engine)
    def engine(self) -> str:
        return "Unidist"


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base IO classes optimized for pandas on unidist execution."""

from .io import PandasOnUnidistIO

__all__ = ["PandasOnUnidistIO"]


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/io/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module holds the factory which performs I/O using pandas on unidist."""

import io

import numpy as np
import pandas
from pandas.io.common import get_handle, stringify_path

from modin.core.execution.unidist.common import SignalActor, UnidistWrapper
from modin.core.execution.unidist.generic.io import UnidistIO
from modin.core.io import (
    CSVDispatcher,
    ExcelDispatcher,
    FeatherDispatcher,
    FWFDispatcher,
    JSONDispatcher,
    ParquetDispatcher,
    SQLDispatcher,
)
from modin.core.storage_formats.pandas.parsers import (
    PandasCSVParser,
    PandasExcelParser,
    PandasFeatherParser,
    PandasFWFParser,
    PandasJSONParser,
    PandasParquetParser,
    PandasSQLParser,
)
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
from modin.experimental.core.io import (
    ExperimentalCSVGlobDispatcher,
    ExperimentalCustomTextDispatcher,
    ExperimentalGlobDispatcher,
    ExperimentalSQLDispatcher,
)
from modin.experimental.core.storage_formats.pandas.parsers import (
    ExperimentalCustomTextParser,
    ExperimentalPandasCSVGlobParser,
    ExperimentalPandasJsonParser,
    ExperimentalPandasParquetParser,
    ExperimentalPandasPickleParser,
    ExperimentalPandasXmlParser,
)

from ..dataframe import PandasOnUnidistDataframe
from ..partitioning import PandasOnUnidistDataframePartition


class PandasOnUnidistIO(UnidistIO):
    """Factory providing methods for performing I/O operations using pandas as storage format on unidist as engine."""

    frame_cls = PandasOnUnidistDataframe
    frame_partition_cls = PandasOnUnidistDataframePartition
    query_compiler_cls = PandasQueryCompiler
    build_args = dict(
        frame_partition_cls=PandasOnUnidistDataframePartition,
        query_compiler_cls=PandasQueryCompiler,
        frame_cls=PandasOnUnidistDataframe,
        base_io=UnidistIO,
    )

    def __make_read(*classes, build_args=build_args):
        # used to reduce code duplication
        return type("", (UnidistWrapper, *classes), build_args).read

    def __make_write(*classes, build_args=build_args):
        # used to reduce code duplication
        return type("", (UnidistWrapper, *classes), build_args).write

    read_csv = __make_read(PandasCSVParser, CSVDispatcher)
    read_fwf = __make_read(PandasFWFParser, FWFDispatcher)
    read_json = __make_read(PandasJSONParser, JSONDispatcher)
    read_parquet = __make_read(PandasParquetParser, ParquetDispatcher)
    to_parquet = __make_write(ParquetDispatcher)
    # Blocked on pandas-dev/pandas#12236. It is faster to default to pandas.
    # read_hdf = __make_read(PandasHDFParser, HDFReader)
    read_feather = __make_read(PandasFeatherParser, FeatherDispatcher)
    read_sql = __make_read(PandasSQLParser, SQLDispatcher)
    to_sql = __make_write(SQLDispatcher)
    read_excel = __make_read(PandasExcelParser, ExcelDispatcher)

    # experimental methods that don't exist in pandas
    read_csv_glob = __make_read(
        ExperimentalPandasCSVGlobParser, ExperimentalCSVGlobDispatcher
    )
    read_parquet_glob = __make_read(
        ExperimentalPandasParquetParser, ExperimentalGlobDispatcher
    )
    to_parquet_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": UnidistIO.to_parquet},
    )
    read_json_glob = __make_read(
        ExperimentalPandasJsonParser, ExperimentalGlobDispatcher
    )
    to_json_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": UnidistIO.to_json},
    )
    read_xml_glob = __make_read(ExperimentalPandasXmlParser, ExperimentalGlobDispatcher)
    to_xml_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": UnidistIO.to_xml},
    )
    read_pickle_glob = __make_read(
        ExperimentalPandasPickleParser, ExperimentalGlobDispatcher
    )
    to_pickle_glob = __make_write(
        ExperimentalGlobDispatcher,
        build_args={**build_args, "base_write": UnidistIO.to_pickle},
    )
    read_custom_text = __make_read(
        ExperimentalCustomTextParser, ExperimentalCustomTextDispatcher
    )
    read_sql_distributed = __make_read(
        ExperimentalSQLDispatcher, build_args={**build_args, "base_read": read_sql}
    )

    del __make_read  # to not pollute class namespace
    del __make_write  # to not pollute class namespace

    @staticmethod
    def _to_csv_check_support(kwargs):
        """
        Check if parallel version of ``to_csv`` could be used.

        Parameters
        ----------
        kwargs : dict
            Keyword arguments passed to ``.to_csv()``.

        Returns
        -------
        bool
            Whether parallel version of ``to_csv`` is applicable.
        """
        path_or_buf = kwargs["path_or_buf"]
        compression = kwargs["compression"]
        if not isinstance(path_or_buf, str):
            return False
        # case when the pointer is placed at the beginning of the file.
        if "r" in kwargs["mode"] and "+" in kwargs["mode"]:
            return False
        # encodings with BOM don't support;
        # instead of one mark in result bytes we will have them by the number of partitions
        # so we should fallback in pandas for `utf-16`, `utf-32` with all aliases, in instance
        # (`utf_32_be`, `utf_16_le` and so on)
        if kwargs["encoding"] is not None:
            encoding = kwargs["encoding"].lower()
            if "u" in encoding or "utf" in encoding:
                if "16" in encoding or "32" in encoding:
                    return False
        if compression is None or not compression == "infer":
            return False
        if any((path_or_buf.endswith(ext) for ext in [".gz", ".bz2", ".zip", ".xz"])):
            return False
        return True

    @classmethod
    def to_csv(cls, qc, **kwargs):
        """
        Write records stored in the `qc` to a CSV file.

        Parameters
        ----------
        qc : BaseQueryCompiler
            The query compiler of the Modin dataframe that we want to run ``to_csv`` on.
        **kwargs : dict
            Parameters for ``pandas.to_csv(**kwargs)``.
        """
        kwargs["path_or_buf"] = stringify_path(kwargs["path_or_buf"])
        if not cls._to_csv_check_support(kwargs):
            return UnidistIO.to_csv(qc, **kwargs)

        signals = SignalActor.remote(len(qc._modin_frame._partitions) + 1)

        def func(df, **kw):  # pragma: no cover
            """
            Dump a chunk of rows as csv, then save them to target maintaining order.

            Parameters
            ----------
            df : pandas.DataFrame
                A chunk of rows to write to a CSV file.
            **kw : dict
                Arguments to pass to ``pandas.to_csv(**kw)`` plus an extra argument
                `partition_idx` serving as chunk index to maintain rows order.
            """
            partition_idx = kw["partition_idx"]
            # the copy is made to not implicitly change the input parameters;
            # to write to an intermediate buffer, we need to change `path_or_buf` in kwargs
            csv_kwargs = kwargs.copy()
            if partition_idx != 0:
                # we need to create a new file only for first recording
                # all the rest should be recorded in appending mode
                if "w" in csv_kwargs["mode"]:
                    csv_kwargs["mode"] = csv_kwargs["mode"].replace("w", "a")
                # It is enough to write the header for the first partition
                csv_kwargs["header"] = False

            # for parallelization purposes, each partition is written to an intermediate buffer
            path_or_buf = csv_kwargs["path_or_buf"]
            is_binary = "b" in csv_kwargs["mode"]
            csv_kwargs["path_or_buf"] = io.BytesIO() if is_binary else io.StringIO()
            storage_options = csv_kwargs.pop("storage_options", None)
            df.to_csv(**csv_kwargs)
            csv_kwargs.update({"storage_options": storage_options})
            content = csv_kwargs["path_or_buf"].getvalue()
            csv_kwargs["path_or_buf"].close()

            # each process waits for its turn to write to a file
            UnidistWrapper.materialize(signals.wait.remote(partition_idx))

            # preparing to write data from the buffer to a file
            with get_handle(
                path_or_buf,
                # in case when using URL in implicit text mode
                # pandas try to open `path_or_buf` in binary mode
                csv_kwargs["mode"] if is_binary else csv_kwargs["mode"] + "t",
                encoding=kwargs["encoding"],
                errors=kwargs["errors"],
                compression=kwargs["compression"],
                storage_options=kwargs.get("storage_options", None),
                is_text=not is_binary,
            ) as handles:
                handles.handle.write(content)

            # signal that the next process can start writing to the file
            UnidistWrapper.materialize(signals.send.remote(partition_idx + 1))
            # used for synchronization purposes
            return pandas.DataFrame()

        # signaling that the partition with id==0 can be written to the file
        UnidistWrapper.materialize(signals.send.remote(0))
        # Ensure that the metadata is syncrhonized
        qc._modin_frame._propagate_index_objs(axis=None)
        result = qc._modin_frame._partition_mgr_cls.map_axis_partitions(
            axis=1,
            partitions=qc._modin_frame._partitions,
            map_func=func,
            keep_partitioning=True,
            lengths=None,
            enumerate_partitions=True,
            max_retries=0,
        )
        # pending completion
        UnidistWrapper.materialize(
            [part.list_of_blocks[0] for row in result for part in row]
        )

    @classmethod
    def from_map(cls, func, iterable, *args, **kwargs):
        """
        Create a Modin `query_compiler` from a map function.

        This method will construct a Modin `query_compiler` split by row partitions.
        The number of row partitions matches the number of elements in the iterable object.

        Parameters
        ----------
        func : callable
            Function to map across the iterable object.
        iterable : Iterable
            An iterable object.
        *args : tuple
            Positional arguments to pass in `func`.
        **kwargs : dict
            Keyword arguments to pass in `func`.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data returned by map function.
        """
        func = cls.frame_cls._partition_mgr_cls.preprocess_func(func)
        partitions = np.array(
            [
                [
                    cls.frame_partition_cls(
                        UnidistWrapper.deploy(
                            func,
                            f_args=(obj,) + args,
                            f_kwargs=kwargs,
                            return_pandas_df=True,
                        )
                    )
                ]
                for obj in iterable
            ]
        )
        return cls.query_compiler_cls(cls.frame_cls(partitions))


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Base Modin Dataframe classes related to its partitioning and optimized for pandas on unidist execution."""

from .partition import PandasOnUnidistDataframePartition
from .partition_manager import PandasOnUnidistDataframePartitionManager
from .virtual_partition import (
    PandasOnUnidistDataframeColumnPartition,
    PandasOnUnidistDataframeRowPartition,
    PandasOnUnidistDataframeVirtualPartition,
)

__all__ = [
    "PandasOnUnidistDataframePartitionManager",
    "PandasOnUnidistDataframePartition",
    "PandasOnUnidistDataframeVirtualPartition",
    "PandasOnUnidistDataframeColumnPartition",
    "PandasOnUnidistDataframeRowPartition",
]


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that wraps data (block partition) and its metadata."""

import warnings

import pandas
import unidist

from modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition
from modin.core.execution.unidist.common import UnidistWrapper
from modin.core.execution.unidist.common.utils import deserialize
from modin.logging import get_logger
from modin.pandas.indexing import compute_sliced_len

compute_sliced_len = unidist.remote(compute_sliced_len)


class PandasOnUnidistDataframePartition(PandasDataframePartition):
    """
    The class implements the interface in ``PandasDataframePartition``.

    Parameters
    ----------
    data : unidist.ObjectRef
        A reference to ``pandas.DataFrame`` that need to be wrapped with this class.
    length : unidist.ObjectRef or int, optional
        Length or reference to it of wrapped ``pandas.DataFrame``.
    width : unidist.ObjectRef or int, optional
        Width or reference to it of wrapped ``pandas.DataFrame``.
    ip : unidist.ObjectRef or str, optional
        Node IP address or reference to it that holds wrapped ``pandas.DataFrame``.
    call_queue : list
        Call queue that needs to be executed on wrapped ``pandas.DataFrame``.
    """

    execution_wrapper = UnidistWrapper

    def __init__(self, data, length=None, width=None, ip=None, call_queue=None):
        super().__init__()
        assert unidist.is_object_ref(data)
        self._data = data
        self.call_queue = call_queue if call_queue is not None else []
        self._length_cache = length
        self._width_cache = width
        self._ip_cache = ip

        log = get_logger()
        self._is_debug(log) and log.debug(
            "Partition ID: {}, Height: {}, Width: {}, Node IP: {}".format(
                self._identity,
                str(self._length_cache),
                str(self._width_cache),
                str(self._ip_cache),
            )
        )

    def apply(self, func, *args, **kwargs):
        """
        Apply a function to the object wrapped by this partition.

        Parameters
        ----------
        func : callable or unidist.ObjectRef
            A function to apply.
        *args : iterable
            Additional positional arguments to be passed in `func`.
        **kwargs : dict
            Additional keyword arguments to be passed in `func`.

        Returns
        -------
        PandasOnUnidistDataframePartition
            A new ``PandasOnUnidistDataframePartition`` object.

        Notes
        -----
        It does not matter if `func` is callable or an ``unidist.ObjectRef``. Unidist will
        handle it correctly either way. The keyword arguments are sent as a dictionary.
        """
        log = get_logger()
        self._is_debug(log) and log.debug(f"ENTER::Partition.apply::{self._identity}")
        data = self._data
        call_queue = self.call_queue + [[func, args, kwargs]]
        if len(call_queue) > 1:
            self._is_debug(log) and log.debug(
                f"SUBMIT::_apply_list_of_funcs::{self._identity}"
            )
            result, length, width, ip = _apply_list_of_funcs.remote(call_queue, data)
        else:
            # We handle `len(call_queue) == 1` in a different way because
            # this dramatically improves performance.
            result, length, width, ip = _apply_func.remote(data, func, *args, **kwargs)
            self._is_debug(log) and log.debug(f"SUBMIT::_apply_func::{self._identity}")
        self._is_debug(log) and log.debug(f"EXIT::Partition.apply::{self._identity}")
        return self.__constructor__(result, length, width, ip)

    def drain_call_queue(self):
        """Execute all operations stored in the call queue on the object wrapped by this partition."""
        log = get_logger()
        self._is_debug(log) and log.debug(
            f"ENTER::Partition.drain_call_queue::{self._identity}"
        )
        if len(self.call_queue) == 0:
            return
        data = self._data
        call_queue = self.call_queue
        if len(call_queue) > 1:
            self._is_debug(log) and log.debug(
                f"SUBMIT::_apply_list_of_funcs::{self._identity}"
            )
            (
                self._data,
                new_length,
                new_width,
                self._ip_cache,
            ) = _apply_list_of_funcs.remote(call_queue, data)
        else:
            # We handle `len(call_queue) == 1` in a different way because
            # this dramatically improves performance.
            func, f_args, f_kwargs = call_queue[0]
            self._is_debug(log) and log.debug(f"SUBMIT::_apply_func::{self._identity}")
            (
                self._data,
                new_length,
                new_width,
                self._ip_cache,
            ) = _apply_func.remote(data, func, *f_args, **f_kwargs)
        self._is_debug(log) and log.debug(
            f"EXIT::Partition.drain_call_queue::{self._identity}"
        )
        self.call_queue = []

        # GH#4732 if we already have evaluated width/length cached as ints,
        #  don't overwrite that cache with non-evaluated values.
        if not isinstance(self._length_cache, int):
            self._length_cache = new_length
        if not isinstance(self._width_cache, int):
            self._width_cache = new_width

    def wait(self):
        """Wait completing computations on the object wrapped by the partition."""
        self.drain_call_queue()
        UnidistWrapper.wait(self._data)

    def mask(self, row_labels, col_labels):
        """
        Lazily create a mask that extracts the indices provided.

        Parameters
        ----------
        row_labels : list-like, slice or label
            The row labels for the rows to extract.
        col_labels : list-like, slice or label
            The column labels for the columns to extract.

        Returns
        -------
        PandasOnUnidistDataframePartition
            A new ``PandasOnUnidistDataframePartition`` object.
        """
        log = get_logger()
        self._is_debug(log) and log.debug(f"ENTER::Partition.mask::{self._identity}")
        new_obj = super().mask(row_labels, col_labels)
        if isinstance(row_labels, slice) and unidist.is_object_ref(self._length_cache):
            if row_labels == slice(None):
                # fast path - full axis take
                new_obj._length_cache = self._length_cache
            else:
                new_obj._length_cache = compute_sliced_len.remote(
                    row_labels, self._length_cache
                )
        if isinstance(col_labels, slice) and unidist.is_object_ref(self._width_cache):
            if col_labels == slice(None):
                # fast path - full axis take
                new_obj._width_cache = self._width_cache
            else:
                new_obj._width_cache = compute_sliced_len.remote(
                    col_labels, self._width_cache
                )
        self._is_debug(log) and log.debug(f"EXIT::Partition.mask::{self._identity}")
        return new_obj

    @classmethod
    def put(cls, obj):
        """
        Put an object into object store and wrap it with partition object.

        Parameters
        ----------
        obj : any
            An object to be put.

        Returns
        -------
        PandasOnUnidistDataframePartition
            A new ``PandasOnUnidistDataframePartition`` object.
        """
        return cls(cls.execution_wrapper.put(obj), len(obj.index), len(obj.columns))

    @classmethod
    def preprocess_func(cls, func):
        """
        Put a function into the object store to use in ``apply``.

        Parameters
        ----------
        func : callable
            A function to preprocess.

        Returns
        -------
        unidist.ObjectRef
            A reference to `func`.
        """
        return cls.execution_wrapper.put(func)

    def length(self, materialize=True):
        """
        Get the length of the object wrapped by this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int or unidist.ObjectRef
            The length of the object.
        """
        if self._length_cache is None:
            if len(self.call_queue):
                self.drain_call_queue()
            else:
                (
                    self._length_cache,
                    self._width_cache,
                ) = _get_index_and_columns_size.remote(self._data)
        if unidist.is_object_ref(self._length_cache) and materialize:
            self._length_cache = UnidistWrapper.materialize(self._length_cache)
        return self._length_cache

    def width(self, materialize=True):
        """
        Get the width of the object wrapped by the partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        int or unidist.ObjectRef
            The width of the object.
        """
        if self._width_cache is None:
            if len(self.call_queue):
                self.drain_call_queue()
            else:
                (
                    self._length_cache,
                    self._width_cache,
                ) = _get_index_and_columns_size.remote(self._data)
        if unidist.is_object_ref(self._width_cache) and materialize:
            self._width_cache = UnidistWrapper.materialize(self._width_cache)
        return self._width_cache

    def ip(self, materialize=True):
        """
        Get the node IP address of the object wrapped by this partition.

        Parameters
        ----------
        materialize : bool, default: True
            Whether to forcibly materialize the result into an integer. If ``False``
            was specified, may return a future of the result if it hasn't been
            materialized yet.

        Returns
        -------
        str
            IP address of the node that holds the data.
        """
        if self._ip_cache is None:
            if len(self.call_queue):
                self.drain_call_queue()
            else:
                self._ip_cache = self.apply(lambda df: pandas.DataFrame([]))._ip_cache
        if materialize and unidist.is_object_ref(self._ip_cache):
            self._ip_cache = UnidistWrapper.materialize(self._ip_cache)
        return self._ip_cache


@unidist.remote(num_returns=2)
def _get_index_and_columns_size(df):  # pragma: no cover
    """
    Get the number of rows and columns of a pandas DataFrame.

    Parameters
    ----------
    df : pandas.DataFrame
        A pandas DataFrame which dimensions are needed.

    Returns
    -------
    int
        The number of rows.
    int
        The number of columns.
    """
    return len(df.index), len(df.columns)


@unidist.remote(num_returns=4)
def _apply_func(partition, func, *args, **kwargs):  # pragma: no cover
    """
    Execute a function on the partition in a worker process.

    Parameters
    ----------
    partition : pandas.DataFrame
        A pandas DataFrame the function needs to be executed on.
    func : callable
        The function to perform on the partition.
    *args : list
        Positional arguments to pass to ``func``.
    **kwargs : dict
        Keyword arguments to pass to ``func``.

    Returns
    -------
    pandas.DataFrame
        The resulting pandas DataFrame.
    int
        The number of rows of the resulting pandas DataFrame.
    int
        The number of columns of the resulting pandas DataFrame.
    str
        The node IP address of the worker process.

    Notes
    -----
    Directly passing a call queue entry (i.e. a list of [func, args, kwargs]) instead of
    destructuring it causes a performance penalty.
    """
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            result = func(partition, *args, **kwargs)
    # Sometimes Arrow forces us to make a copy of an object before we operate on it. We
    # don't want the error to propagate to the user, and we want to avoid copying unless
    # we absolutely have to.
    except ValueError:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            result = func(partition.copy(), *args, **kwargs)
    return (
        result,
        len(result) if hasattr(result, "__len__") else 0,
        len(getattr(result, "columns", ())),
        unidist.get_ip(),
    )


@unidist.remote(num_returns=4)
def _apply_list_of_funcs(call_queue, partition):  # pragma: no cover
    """
    Execute all operations stored in the call queue on the partition in a worker process.

    Parameters
    ----------
    call_queue : list
        A call queue that needs to be executed on the partition.
    partition : pandas.DataFrame
        A pandas DataFrame the call queue needs to be executed on.

    Returns
    -------
    pandas.DataFrame
        The resulting pandas DataFrame.
    int
        The number of rows of the resulting pandas DataFrame.
    int
        The number of columns of the resulting pandas DataFrame.
    str
        The node IP address of the worker process.
    """
    for func, f_args, f_kwargs in call_queue:
        func = deserialize(func)
        args = deserialize(f_args)
        kwargs = deserialize(f_kwargs)
        try:
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=FutureWarning)
                partition = func(partition, *args, **kwargs)
        # Sometimes Arrow forces us to make a copy of an object before we operate on it. We
        # don't want the error to propagate to the user, and we want to avoid copying unless
        # we absolutely have to.
        except ValueError:
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=FutureWarning)
                partition = func(partition.copy(), *args, **kwargs)

    return (
        partition,
        len(partition) if hasattr(partition, "__len__") else 0,
        len(partition.columns) if hasattr(partition, "columns") else 0,
        unidist.get_ip(),
    )


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition_manager.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses class that implements ``GenericUnidistDataframePartitionManager`` using Unidist."""

from modin.core.execution.modin_aqp import progress_bar_wrapper
from modin.core.execution.unidist.common import UnidistWrapper
from modin.core.execution.unidist.generic.partitioning import (
    GenericUnidistDataframePartitionManager,
)

from .partition import PandasOnUnidistDataframePartition
from .virtual_partition import (
    PandasOnUnidistDataframeColumnPartition,
    PandasOnUnidistDataframeRowPartition,
)


class PandasOnUnidistDataframePartitionManager(GenericUnidistDataframePartitionManager):
    """The class implements the interface in `PandasDataframePartitionManager`."""

    # This object uses PandasOnUnidistDataframePartition objects as the underlying store.
    _partition_class = PandasOnUnidistDataframePartition
    _column_partitions_class = PandasOnUnidistDataframeColumnPartition
    _row_partition_class = PandasOnUnidistDataframeRowPartition
    _execution_wrapper = UnidistWrapper

    @classmethod
    def wait_partitions(cls, partitions):
        """
        Wait on the objects wrapped by `partitions` in parallel, without materializing them.

        This method will block until all computations in the list have completed.

        Parameters
        ----------
        partitions : np.ndarray
            NumPy array with ``PandasDataframePartition``-s.
        """
        UnidistWrapper.wait(
            [block for partition in partitions for block in partition.list_of_blocks]
        )


def _make_wrapped_method(name: str):
    """
    Define new attribute that should work with progress bar.

    Parameters
    ----------
    name : str
        Name of `GenericUnidistDataframePartitionManager` attribute that should be reused.

    Notes
    -----
    - `classmethod` decorator shouldn't be applied twice, so we refer to `__func__` attribute.
    - New attribute is defined for `PandasOnUnidistDataframePartitionManager`.
    """
    setattr(
        PandasOnUnidistDataframePartitionManager,
        name,
        classmethod(
            progress_bar_wrapper(
                getattr(GenericUnidistDataframePartitionManager, name).__func__
            )
        ),
    )


for method in (
    "map_partitions",
    "lazy_map_partitions",
    "map_axis_partitions",
    "_apply_func_to_list_of_partitions",
    "apply_func_to_select_indices",
    "apply_func_to_select_indices_along_full_axis",
    "apply_func_to_indices_both_axis",
    "n_ary_operation",
):
    _make_wrapped_method(method)


================================================
FILE: modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/virtual_partition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses classes responsible for storing a virtual partition and applying a function to it."""

import warnings

import pandas
import unidist

from modin.core.dataframe.pandas.partitioning.axis_partition import (
    PandasDataframeAxisPartition,
)
from modin.core.execution.unidist.common import UnidistWrapper
from modin.core.execution.unidist.common.utils import deserialize
from modin.utils import _inherit_docstrings

from .partition import PandasOnUnidistDataframePartition


class PandasOnUnidistDataframeVirtualPartition(PandasDataframeAxisPartition):
    """
    The class implements the interface in ``PandasDataframeAxisPartition``.

    Parameters
    ----------
    list_of_partitions : Union[list, PandasOnUnidistDataframePartition]
        List of ``PandasOnUnidistDataframePartition`` and
        ``PandasOnUnidistDataframeVirtualPartition`` objects, or a single
        ``PandasOnUnidistDataframePartition``.
    get_ip : bool, default: False
        Whether to get node IP addresses to conforming partitions or not.
    full_axis : bool, default: True
        Whether or not the virtual partition encompasses the whole axis.
    call_queue : list, optional
        A list of tuples (callable, args, kwargs) that contains deferred calls.
    length : unidist.ObjectRef or int, optional
        Length, or reference to length, of wrapped ``pandas.DataFrame``.
    width : unidist.ObjectRef or int, optional
        Width, or reference to width, of wrapped ``pandas.DataFrame``.
    """

    _PARTITIONS_METADATA_LEN = 3  # (length, width, ip)
    partition_type = PandasOnUnidistDataframePartition
    axis = None

    # these variables are intentionally initialized at runtime (see #6023)
    _DEPLOY_AXIS_FUNC = None
    _DEPLOY_SPLIT_FUNC = None
    _DRAIN_FUNC = None

    @classmethod
    def _get_deploy_axis_func(cls):  # noqa: GL08
        if cls._DEPLOY_AXIS_FUNC is None:
            cls._DEPLOY_AXIS_FUNC = UnidistWrapper.put(
                PandasDataframeAxisPartition.deploy_axis_func
            )
        return cls._DEPLOY_AXIS_FUNC

    @classmethod
    def _get_deploy_split_func(cls):  # noqa: GL08
        if cls._DEPLOY_SPLIT_FUNC is None:
            cls._DEPLOY_SPLIT_FUNC = UnidistWrapper.put(
                PandasDataframeAxisPartition.deploy_splitting_func
            )
        return cls._DEPLOY_SPLIT_FUNC

    @classmethod
    def _get_drain_func(cls):  # noqa: GL08
        if cls._DRAIN_FUNC is None:
            cls._DRAIN_FUNC = UnidistWrapper.put(PandasDataframeAxisPartition.drain)
        return cls._DRAIN_FUNC

    @property
    def list_of_ips(self):
        """
        Get the IPs holding the physical objects composing this partition.

        Returns
        -------
        List
            A list of IPs as ``unidist.ObjectRef`` or str.
        """
        # Defer draining call queue until we get the ip address
        result = [None] * len(self.list_of_block_partitions)
        for idx, partition in enumerate(self.list_of_block_partitions):
            partition.drain_call_queue()
            result[idx] = partition.ip(materialize=False)
        return result

    @classmethod
    @_inherit_docstrings(PandasDataframeAxisPartition.deploy_splitting_func)
    def deploy_splitting_func(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        *partitions,
        extract_metadata=False,
    ):
        return _deploy_unidist_func.options(
            num_returns=(
                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
                if extract_metadata
                else num_splits
            ),
        ).remote(
            cls._get_deploy_split_func(),
            axis,
            func,
            f_args,
            f_kwargs,
            num_splits,
            *partitions,
            extract_metadata=extract_metadata,
        )

    @classmethod
    def deploy_axis_func(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        maintain_partitioning,
        *partitions,
        min_block_size,
        lengths=None,
        manual_partition=False,
        max_retries=None,
    ):
        """
        Deploy a function along a full axis.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        func : callable
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to ``func``.
        f_kwargs : dict
            Keyword arguments to pass to ``func``.
        num_splits : int
            The number of splits to return (see ``split_result_of_axis_func_pandas``).
        maintain_partitioning : bool
            If True, keep the old partitioning if possible.
            If False, create a new partition layout.
        *partitions : iterable
            All partitions that make up the full axis (row or column).
        min_block_size : int
            Minimum number of rows/columns in a single split.
        lengths : list, optional
            The list of lengths to shuffle the object.
        manual_partition : bool, default: False
            If True, partition the result with `lengths`.
        max_retries : int, default: None
            The max number of times to retry the func.

        Returns
        -------
        list
            A list of ``unidist.ObjectRef``-s.
        """
        return _deploy_unidist_func.options(
            num_returns=(num_splits if lengths is None else len(lengths))
            * (1 + cls._PARTITIONS_METADATA_LEN),
            **({"max_retries": max_retries} if max_retries is not None else {}),
        ).remote(
            cls._get_deploy_axis_func(),
            axis,
            func,
            f_args,
            f_kwargs,
            num_splits,
            maintain_partitioning,
            *partitions,
            manual_partition=manual_partition,
            min_block_size=min_block_size,
            lengths=lengths,
        )

    @classmethod
    def deploy_func_between_two_axis_partitions(
        cls,
        axis,
        func,
        f_args,
        f_kwargs,
        num_splits,
        len_of_left,
        other_shape,
        *partitions,
        min_block_size,
    ):
        """
        Deploy a function along a full axis between two data sets.

        Parameters
        ----------
        axis : {0, 1}
            The axis to perform the function along.
        func : callable
            The function to perform.
        f_args : list or tuple
            Positional arguments to pass to ``func``.
        f_kwargs : dict
            Keyword arguments to pass to ``func``.
        num_splits : int
            The number of splits to return (see ``split_result_of_axis_func_pandas``).
        len_of_left : int
            The number of values in `partitions` that belong to the left data set.
        other_shape : np.ndarray
            The shape of right frame in terms of partitions, i.e.
            (other_shape[i-1], other_shape[i]) will indicate slice to restore i-1 axis partition.
        *partitions : iterable
            All partitions that make up the full axis (row or column) for both data sets.
        min_block_size : int
            Minimum number of rows/columns in a single split.

        Returns
        -------
        list
            A list of ``unidist.ObjectRef``-s.
        """
        return _deploy_unidist_func.options(
            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
        ).remote(
            PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions,
            axis,
            func,
            f_args,
            f_kwargs,
            num_splits,
            len_of_left,
            other_shape,
            *partitions,
            min_block_size=min_block_size,
        )

    def wait(self):
        """Wait completing computations on the object wrapped by the partition."""
        self.drain_call_queue()
        futures = self.list_of_blocks
        UnidistWrapper.wait(futures)


@_inherit_docstrings(PandasOnUnidistDataframeVirtualPartition)
class PandasOnUnidistDataframeColumnPartition(PandasOnUnidistDataframeVirtualPartition):
    axis = 0


@_inherit_docstrings(PandasOnUnidistDataframeVirtualPartition)
class PandasOnUnidistDataframeRowPartition(PandasOnUnidistDataframeVirtualPartition):
    axis = 1


@unidist.remote
def _deploy_unidist_func(
    deployer,
    axis,
    f_to_deploy,
    f_args,
    f_kwargs,
    *args,
    extract_metadata=True,
    **kwargs,
):  # pragma: no cover
    """
    Execute a function on an axis partition in a worker process.

    This is ALWAYS called on either ``PandasDataframeAxisPartition.deploy_axis_func``
    or ``PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions``, which both
    serve to deploy another dataframe function on a unidist worker process. The provided ``f_args``
    is thus are deserialized here (on the unidist worker) before the function is called (``f_kwargs``
    will never contain more unidist objects, and thus does not require deserialization).

    Parameters
    ----------
    deployer : callable
        A `PandasDataFrameAxisPartition.deploy_*` method that will call ``f_to_deploy``.
    axis : {0, 1}
        The axis to perform the function along.
    f_to_deploy : callable or unidist.ObjectRef
        The function to deploy.
    f_args : list or tuple
        Positional arguments to pass to ``f_to_deploy``.
    f_kwargs : dict
        Keyword arguments to pass to ``f_to_deploy``.
    *args : list
        Positional arguments to pass to ``deployer``.
    extract_metadata : bool, default: True
        Whether to return metadata (length, width, ip) of the result. Passing `False` may relax
        the load on object storage as the remote function would return 4 times fewer futures.
        Passing `False` makes sense for temporary results where you know for sure that the
        metadata will never be requested.
    **kwargs : dict
        Keyword arguments to pass to ``deployer``.

    Returns
    -------
    list : Union[tuple, list]
        The result of the function call, and metadata for it.

    Notes
    -----
    Unidist functions are not detected by codecov (thus pragma: no cover).
    """
    f_args = deserialize(f_args)
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=FutureWarning)
        result = deployer(axis, f_to_deploy, f_args, f_kwargs, *args, **kwargs)
    if not extract_metadata:
        return result
    ip = unidist.get_ip()
    if isinstance(result, pandas.DataFrame):
        return result, len(result), len(result.columns), ip
    elif all(isinstance(r, pandas.DataFrame) for r in result):
        return [i for r in result for i in [r, len(r), len(r.columns), ip]]
    else:
        return [i for r in result for i in [r, None, None, ip]]


================================================
FILE: modin/core/execution/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""General utils for execution module."""

import contextlib
import os

from modin.error_message import ErrorMessage


@contextlib.contextmanager
def set_env(**environ):
    """
    Temporarily set the process environment variables.
    """
    old_environ = os.environ.copy()
    os.environ.update(environ)
    try:
        yield
    finally:
        os.environ.clear()
        os.environ.update(old_environ)


if "_MODIN_DOC_CHECKER_" in os.environ:

    # The doc checker should get the non-processed functions
    def remote_function(func, ignore_defaults=False):
        return func


# Check if the function already exists to avoid circular imports
elif "remote_function" not in dir():
    # TODO(https://github.com/modin-project/modin/issues/7429): Use
    # frame-level engine config.

    from modin.config import Engine

    if Engine.get() == "Ray":
        from modin.core.execution.ray.common import RayWrapper

        _preprocess_func = RayWrapper.put
    elif Engine.get() == "Unidist":
        from modin.core.execution.unidist.common import UnidistWrapper

        _preprocess_func = UnidistWrapper.put
    elif Engine.get() == "Dask":
        from modin.core.execution.dask.common import DaskWrapper

        # The function cache is not supported for Dask
        def remote_function(func, ignore_defaults=False):
            return DaskWrapper.put(func)

    else:

        def remote_function(func, ignore_defaults=False):
            return func

    if "remote_function" not in dir():
        _remote_function_cache = {}

        def remote_function(func, ignore_defaults=False):  # noqa: F811
            if "<locals>" in func.__qualname__:  # Nested function
                if func.__closure__:
                    ErrorMessage.single_warning(
                        f"The nested function {func} can not be cached, because "
                        + "it captures objects from the outer scope."
                    )
                    return func
                if not ignore_defaults and func.__defaults__:
                    ErrorMessage.single_warning(
                        f"The nested function {func} can not be cached, because it has "
                        + "default values. Use `ignore_defaults` to forcibly enable caching."
                    )
                    return func
                # For the nested functions, use __code__ as the key, because it's the same
                # object for each instance of the function.
                key = id(func.__code__)
            else:
                key = func
            ref = _remote_function_cache.get(key, None)
            if ref is None:
                ref = _preprocess_func(func)
                _remote_function_cache[key] = ref
            return ref


================================================
FILE: modin/core/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""IO functions implementations."""

from .column_stores.feather_dispatcher import FeatherDispatcher
from .column_stores.hdf_dispatcher import HDFDispatcher
from .column_stores.parquet_dispatcher import ParquetDispatcher
from .file_dispatcher import FileDispatcher
from .io import BaseIO
from .sql.sql_dispatcher import SQLDispatcher
from .text.csv_dispatcher import CSVDispatcher
from .text.excel_dispatcher import ExcelDispatcher
from .text.fwf_dispatcher import FWFDispatcher
from .text.json_dispatcher import JSONDispatcher
from .text.text_file_dispatcher import TextFileDispatcher

__all__ = [
    "BaseIO",
    "CSVDispatcher",
    "FWFDispatcher",
    "JSONDispatcher",
    "FileDispatcher",
    "TextFileDispatcher",
    "ParquetDispatcher",
    "HDFDispatcher",
    "FeatherDispatcher",
    "SQLDispatcher",
    "ExcelDispatcher",
]


================================================
FILE: modin/core/io/column_stores/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Columnar store format type IO functions implementations."""


================================================
FILE: modin/core/io/column_stores/column_store_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module houses `ColumnStoreDispatcher` class.

`ColumnStoreDispatcher` contains utils for handling columnar store format files,
inherits util functions for handling files from `FileDispatcher` class and can be
used as base class for dipatchers of specific columnar store formats.
"""

import numpy as np
import pandas

from modin.config import MinColumnPartitionSize, MinRowPartitionSize, NPartitions
from modin.core.io.file_dispatcher import FileDispatcher
from modin.core.storage_formats.pandas.utils import compute_chunksize


class ColumnStoreDispatcher(FileDispatcher):
    """
    Class handles utils for reading columnar store format files.

    Inherits some util functions for processing files from `FileDispatcher` class.
    """

    @classmethod
    def call_deploy(cls, fname, col_partitions, **kwargs):
        """
        Deploy remote tasks to the workers with passed parameters.

        Parameters
        ----------
        fname : str, path object or file-like object
            Name of the file to read.
        col_partitions : list
            List of arrays with columns names that should be read
            by each partition.
        **kwargs : dict
            Parameters of deploying read_* function.

        Returns
        -------
        np.ndarray
            Array with references to the task deploy result for each partition.
        """
        return np.array(
            [
                cls.deploy(
                    func=cls.parse,
                    f_kwargs={
                        "fname": fname,
                        "columns": cols,
                        "num_splits": NPartitions.get(),
                        **kwargs,
                    },
                    num_returns=NPartitions.get() + 2,
                )
                for cols in col_partitions
            ]
        ).T

    @classmethod
    def build_partition(cls, partition_ids, row_lengths, column_widths):
        """
        Build array with partitions of `cls.frame_partition_cls` class.

        Parameters
        ----------
        partition_ids : list
            Array with references to the partitions data.
        row_lengths : list
            Partitions rows lengths.
        column_widths : list
            Number of columns in each partition.

        Returns
        -------
        np.ndarray
            array with shape equals to the shape of `partition_ids` and
            filed with partition objects.
        """
        return np.array(
            [
                [
                    cls.frame_partition_cls(
                        partition_ids[i][j],
                        length=row_lengths[i],
                        width=column_widths[j],
                    )
                    for j in range(len(partition_ids[i]))
                ]
                for i in range(len(partition_ids))
            ]
        )

    @classmethod
    def build_index(cls, partition_ids):
        """
        Compute index and its split sizes of resulting Modin DataFrame.

        Parameters
        ----------
        partition_ids : list
            Array with references to the partitions data.

        Returns
        -------
        index : pandas.Index
            Index of resulting Modin DataFrame.
        row_lengths : list
            List with lengths of index chunks.
        """
        index_len = (
            0 if len(partition_ids) == 0 else cls.materialize(partition_ids[-2][0])
        )
        if isinstance(index_len, int):
            index = pandas.RangeIndex(index_len)
        else:
            index = index_len
            index_len = len(index)
        num_partitions = NPartitions.get()
        min_block_size = MinRowPartitionSize.get()
        index_chunksize = compute_chunksize(index_len, num_partitions, min_block_size)
        if index_chunksize > index_len:
            row_lengths = [index_len] + [0 for _ in range(num_partitions - 1)]
        else:
            row_lengths = [
                (
                    index_chunksize
                    if (i + 1) * index_chunksize < index_len
                    else max(0, index_len - (index_chunksize * i))
                )
                for i in range(num_partitions)
            ]
        return index, row_lengths

    @classmethod
    def build_columns(cls, columns, num_row_parts=None):
        """
        Split columns into chunks that should be read by workers.

        Parameters
        ----------
        columns : list
            List of columns that should be read from file.
        num_row_parts : int, optional
            Number of parts the dataset is split into. This parameter is used
            to align the column partitioning with it so we won't end up with an
            over partitioned frame.

        Returns
        -------
        col_partitions : list
            List of lists with columns for reading by workers.
        column_widths : list
            List with lengths of `col_partitions` subarrays
            (number of columns that should be read by workers).
        """
        columns_length = len(columns)
        if columns_length == 0:
            return [], []
        if num_row_parts is None:
            # in column formats we mostly read columns in parallel rather than rows,
            # so we try to chunk columns as much as possible
            min_block_size = 1
        else:
            num_remaining_parts = round(NPartitions.get() / num_row_parts)
            min_block_size = min(
                columns_length // num_remaining_parts, MinColumnPartitionSize.get()
            )
        column_splits = compute_chunksize(
            columns_length, NPartitions.get(), max(1, min_block_size)
        )
        col_partitions = [
            columns[i : i + column_splits]
            for i in range(0, columns_length, column_splits)
        ]
        column_widths = [len(c) for c in col_partitions]
        return col_partitions, column_widths

    @classmethod
    def build_dtypes(cls, partition_ids, columns):
        """
        Compute common for all partitions `dtypes` for each of the DataFrame column.

        Parameters
        ----------
        partition_ids : list
            Array with references to the partitions data.
        columns : list
            List of columns that should be read from file.

        Returns
        -------
        dtypes : pandas.Series
            Series with dtypes for columns.
        """
        dtypes = pandas.concat(cls.materialize(list(partition_ids)), axis=0)
        dtypes.index = columns
        return dtypes

    @classmethod
    def build_query_compiler(cls, path, columns, **kwargs):
        """
        Build query compiler from deployed tasks outputs.

        Parameters
        ----------
        path : str, path object or file-like object
            Path to the file to read.
        columns : list
            List of columns that should be read from file.
        **kwargs : dict
            Parameters of deploying read_* function.

        Returns
        -------
        new_query_compiler : BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        col_partitions, column_widths = cls.build_columns(columns)
        partition_ids = cls.call_deploy(path, col_partitions, **kwargs)
        index, row_lens = cls.build_index(partition_ids)
        remote_parts = cls.build_partition(partition_ids[:-2], row_lens, column_widths)
        dtypes = (
            cls.build_dtypes(partition_ids[-1], columns)
            if len(partition_ids) > 0
            else None
        )
        new_query_compiler = cls.query_compiler_cls(
            cls.frame_cls(
                remote_parts,
                index,
                columns,
                row_lens,
                column_widths,
                dtypes=dtypes,
            )
        )
        return new_query_compiler


================================================
FILE: modin/core/io/column_stores/feather_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `FeatherDispatcher` class, that is used for reading `.feather` files."""

from pandas.io.common import stringify_path

from modin.core.io.column_stores.column_store_dispatcher import ColumnStoreDispatcher
from modin.core.io.file_dispatcher import OpenFile
from modin.utils import import_optional_dependency


class FeatherDispatcher(ColumnStoreDispatcher):
    """Class handles utils for reading `.feather` files."""

    @classmethod
    def _read(cls, path, columns=None, **kwargs):
        """
        Read data from the file path, returning a query compiler.

        Parameters
        ----------
        path : str or file-like object
            The filepath of the feather file.
        columns : array-like, optional
            Columns to read from file. If not provided, all columns are read.
        **kwargs : dict
            `read_feather` function kwargs.

        Returns
        -------
        BaseQueryCompiler
            Query compiler with imported data for further processing.

        Notes
        -----
        `PyArrow` engine and local files only are supported for now,
        multi threading is set to False by default.
        PyArrow feather is used. Please refer to the documentation here
        https://arrow.apache.org/docs/python/api.html#feather-format
        """
        path = stringify_path(path)
        path = cls.get_path(path)
        if columns is None:
            import_optional_dependency(
                "pyarrow", "pyarrow is required to read feather files."
            )
            from pyarrow import ipc

            with OpenFile(
                path,
                **(kwargs.get("storage_options", None) or {}),
            ) as file:
                # Opens the file to extract its metadata
                reader = ipc.open_file(file)
            # TODO: pyarrow's schema contains much more metadata than just column names, it also
            # has dtypes and index information that we could use when building a dataframe
            index_cols = frozenset(
                col
                for col in reader.schema.pandas_metadata["index_columns"]
                # 'index_columns' field may also contain dictionary fields describing actual
                # RangeIndices, so we're only filtering here for string column names
                if isinstance(col, str)
            )
            # Filtering out the columns that describe the frame's index
            columns = [col for col in reader.schema.names if col not in index_cols]
        return cls.build_query_compiler(
            path,
            columns,
            use_threads=False,
            storage_options=kwargs["storage_options"],
            dtype_backend=kwargs["dtype_backend"],
        )


================================================
FILE: modin/core/io/column_stores/hdf_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `HDFDispatcher` class, that is used for reading hdf data."""

import pandas

from modin.core.io.column_stores.column_store_dispatcher import ColumnStoreDispatcher


class HDFDispatcher(ColumnStoreDispatcher):  # pragma: no cover
    """
    Class handles utils for reading hdf data.

    Inherits some common for columnar store files util functions from
    `ColumnStoreDispatcher` class.
    """

    @classmethod
    def _validate_hdf_format(cls, path_or_buf):
        """
        Validate `path_or_buf` and then return `table_type` parameter of store group attribute.

        Parameters
        ----------
        path_or_buf : str, buffer or path object
            Path to the file to open, or an open :class:`pandas.HDFStore` object.

        Returns
        -------
        str
            `table_type` parameter of store group attribute.
        """
        s = pandas.HDFStore(path_or_buf)
        groups = s.groups()
        if len(groups) == 0:
            raise ValueError("No dataset in HDF5 file.")
        candidate_only_group = groups[0]
        format = getattr(candidate_only_group._v_attrs, "table_type", None)
        s.close()
        return format

    @classmethod
    def _read(cls, path_or_buf, **kwargs):
        """
        Load an h5 file from the file path or buffer, returning a query compiler.

        Parameters
        ----------
        path_or_buf : str, buffer or path object
            Path to the file to open, or an open :class:`pandas.HDFStore` object.
        **kwargs : dict
            Pass into pandas.read_hdf function.

        Returns
        -------
        BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        if cls._validate_hdf_format(path_or_buf=path_or_buf) is None:
            return cls.single_worker_read(
                path_or_buf,
                reason="File format seems to be `fixed`. For better distribution consider "
                + "saving the file in `table` format. df.to_hdf(format=`table`).",
                **kwargs
            )

        columns = kwargs.pop("columns", None)
        # Have to do this because of Dask's keyword arguments
        kwargs["_key"] = kwargs.pop("key", None)
        if not columns:
            start = kwargs.pop("start", None)
            stop = kwargs.pop("stop", None)
            empty_pd_df = pandas.read_hdf(path_or_buf, start=0, stop=0, **kwargs)
            if start is not None:
                kwargs["start"] = start
            if stop is not None:
                kwargs["stop"] = stop
            columns = empty_pd_df.columns
        return cls.build_query_compiler(path_or_buf, columns, **kwargs)


================================================
FILE: modin/core/io/column_stores/parquet_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `ParquetDispatcher` class, that is used for reading `.parquet` files."""

from __future__ import annotations

import functools
import json
import os
import re
from typing import TYPE_CHECKING

import fsspec
import numpy as np
import pandas
import pandas._libs.lib as lib
from fsspec.core import url_to_fs
from fsspec.spec import AbstractBufferedFile
from packaging import version
from pandas.io.common import stringify_path

from modin.config import MinColumnPartitionSize, MinRowPartitionSize, NPartitions
from modin.core.io.column_stores.column_store_dispatcher import ColumnStoreDispatcher
from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings

if TYPE_CHECKING:
    from modin.core.storage_formats.pandas.parsers import ParquetFileToRead


class ColumnStoreDataset:
    """
    Base class that encapsulates Parquet engine-specific details.

    This class exposes a set of functions that are commonly used in the
    `read_parquet` implementation.

    Attributes
    ----------
    path : str, path object or file-like object
        The filepath of the parquet file in local filesystem or hdfs.
    storage_options : dict
        Parameters for specific storage engine.
    _fs_path : str, path object or file-like object
        The filepath or handle of the parquet dataset specific to the
        filesystem implementation. E.g. for `s3://test/example`, _fs
        would be set to S3FileSystem and _fs_path would be `test/example`.
    _fs : Filesystem
        Filesystem object specific to the given parquet file/dataset.
    dataset : ParquetDataset or ParquetFile
        Underlying dataset implementation for PyArrow and fastparquet
        respectively.
    """

    def __init__(self, path, storage_options):  # noqa : PR01
        self.path = path.__fspath__() if isinstance(path, os.PathLike) else path
        self.storage_options = storage_options
        self._fs_path = None
        self._fs = None
        self.dataset = self._init_dataset()

    @property
    def pandas_metadata(self):
        """Return the pandas metadata of the dataset."""
        raise NotImplementedError

    @property
    def columns(self):
        """Return the list of columns in the dataset."""
        raise NotImplementedError

    @property
    def engine(self):
        """Return string representing what engine is being used."""
        raise NotImplementedError

    @functools.cached_property
    def files(self):
        """Return the list of formatted file paths of the dataset."""
        raise NotImplementedError

    @functools.cached_property
    def row_groups_per_file(self):
        """Return a list with the number of row groups per file."""
        raise NotImplementedError

    @property
    def fs(self):
        """
        Return the filesystem object associated with the dataset path.

        Returns
        -------
        filesystem
            Filesystem object.
        """
        if self._fs is None:
            if isinstance(self.path, AbstractBufferedFile):
                self._fs = self.path.fs
            else:
                self._fs, self._fs_path = url_to_fs(self.path, **self.storage_options)
        return self._fs

    @property
    def fs_path(self):
        """
        Return the filesystem-specific path or file handle.

        Returns
        -------
        fs_path : str, path object or file-like object
            String path specific to filesystem or a file handle.
        """
        if self._fs_path is None:
            if isinstance(self.path, AbstractBufferedFile):
                self._fs_path = self.path
            else:
                self._fs, self._fs_path = url_to_fs(self.path, **self.storage_options)
        return self._fs_path

    def to_pandas_dataframe(self, columns):
        """
        Read the given columns as a pandas dataframe.

        Parameters
        ----------
        columns : list
            List of columns that should be read from file.
        """
        raise NotImplementedError

    def _get_files(self, files):
        """
        Retrieve list of formatted file names in dataset path.

        Parameters
        ----------
        files : list
            List of files from path.

        Returns
        -------
        fs_files : list
            List of files from path with fs-protocol prepended.
        """
        # Older versions of fsspec doesn't support unstrip_protocol(). It
        # was only added relatively recently:
        # https://github.com/fsspec/filesystem_spec/pull/828

        def _unstrip_protocol(protocol, path):
            protos = (protocol,) if isinstance(protocol, str) else protocol
            for protocol in protos:
                if path.startswith(f"{protocol}://"):
                    return path
            return f"{protos[0]}://{path}"

        if isinstance(self.path, AbstractBufferedFile):
            return [self.path]
        # version.parse() is expensive, so we can split this into two separate loops
        if version.parse(fsspec.__version__) < version.parse("2022.5.0"):
            fs_files = [_unstrip_protocol(self.fs.protocol, fpath) for fpath in files]
        else:
            fs_files = [self.fs.unstrip_protocol(fpath) for fpath in files]

        return fs_files


@_inherit_docstrings(ColumnStoreDataset)
class PyArrowDataset(ColumnStoreDataset):
    def _init_dataset(self):  # noqa: GL08
        from pyarrow.parquet import ParquetDataset

        return ParquetDataset(self.fs_path, filesystem=self.fs)

    @property
    def pandas_metadata(self):
        return self.dataset.schema.pandas_metadata

    @property
    def columns(self):
        return self.dataset.schema.names

    @property
    def engine(self):
        return "pyarrow"

    @functools.cached_property
    def row_groups_per_file(self):
        from pyarrow.parquet import ParquetFile

        row_groups_per_file = []
        # Count up the total number of row groups across all files and
        # keep track of row groups per file to use later.
        for file in self.files:
            with self.fs.open(file) as f:
                row_groups = ParquetFile(f).num_row_groups
                row_groups_per_file.append(row_groups)
        return row_groups_per_file

    @functools.cached_property
    def files(self):
        files = self.dataset.files
        return self._get_files(files)

    def to_pandas_dataframe(
        self,
        columns,
    ):
        from pyarrow.parquet import read_table

        return read_table(
            self._fs_path, columns=columns, filesystem=self.fs
        ).to_pandas()


@_inherit_docstrings(ColumnStoreDataset)
class FastParquetDataset(ColumnStoreDataset):
    def _init_dataset(self):  # noqa: GL08
        from fastparquet import ParquetFile

        return ParquetFile(self.fs_path, fs=self.fs)

    @property
    def pandas_metadata(self):
        if "pandas" not in self.dataset.key_value_metadata:
            return {}
        return json.loads(self.dataset.key_value_metadata["pandas"])

    @property
    def columns(self):
        return self.dataset.columns

    @property
    def engine(self):
        return "fastparquet"

    @functools.cached_property
    def row_groups_per_file(self):
        from fastparquet import ParquetFile

        row_groups_per_file = []
        # Count up the total number of row groups across all files and
        # keep track of row groups per file to use later.
        for file in self.files:
            with self.fs.open(file) as f:
                row_groups = ParquetFile(f).info["row_groups"]
                row_groups_per_file.append(row_groups)
        return row_groups_per_file

    @functools.cached_property
    def files(self):
        return self._get_files(self._get_fastparquet_files())

    def to_pandas_dataframe(self, columns):
        return self.dataset.to_pandas(columns=columns)

    # Karthik Velayutham writes:
    #
    # fastparquet doesn't have a nice method like PyArrow, so we
    # have to copy some of their logic here while we work on getting
    # an easier method to get a list of valid files.
    # See: https://github.com/dask/fastparquet/issues/795
    def _get_fastparquet_files(self):  # noqa: GL08
        if "*" in self.path:
            files = self.fs.glob(self.path)
        else:
            # (Resolving issue #6778)
            #
            # Users will pass in a directory to a delta table, which stores parquet
            # files in various directories along with other, non-parquet files. We
            # need to identify those parquet files and not the non-parquet files.
            #
            # However, we also need to support users passing in explicit files that
            # don't necessarily have the `.parq` or `.parquet` extension -- if a user
            # says that a file is parquet, then we should probably give it a shot.
            if self.fs.isfile(self.path):
                files = self.fs.find(self.path)
            else:
                files = [
                    f
                    for f in self.fs.find(self.path)
                    if f.endswith(".parquet") or f.endswith(".parq")
                ]
        return files


class ParquetDispatcher(ColumnStoreDispatcher):
    """Class handles utils for reading `.parquet` files."""

    index_regex = re.compile(r"__index_level_\d+__")

    @classmethod
    def get_dataset(cls, path, engine, storage_options):
        """
        Retrieve Parquet engine specific Dataset implementation.

        Parameters
        ----------
        path : str, path object or file-like object
            The filepath of the parquet file in local filesystem or hdfs.
        engine : str
            Parquet library to use (only 'PyArrow' is supported for now).
        storage_options : dict
            Parameters for specific storage engine.

        Returns
        -------
        Dataset
            Either a PyArrowDataset or FastParquetDataset object.
        """
        if engine == "auto":
            # We follow in concordance with pandas
            engine_classes = [PyArrowDataset, FastParquetDataset]

            error_msgs = ""
            for engine_class in engine_classes:
                try:
                    return engine_class(path, storage_options)
                except ImportError as err:
                    error_msgs += "\n - " + str(err)

            raise ImportError(
                "Unable to find a usable engine; "
                + "tried using: 'pyarrow', 'fastparquet'.\n"
                + "A suitable version of "
                + "pyarrow or fastparquet is required for parquet "
                + "support.\n"
                + "Trying to import the above resulted in these errors:"
                + f"{error_msgs}"
            )
        elif engine == "pyarrow":
            return PyArrowDataset(path, storage_options)
        elif engine == "fastparquet":
            return FastParquetDataset(path, storage_options)
        else:
            raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")

    @classmethod
    def _determine_partitioning(
        cls, dataset: ColumnStoreDataset
    ) -> "list[list[ParquetFileToRead]]":
        """
        Determine which partition will read certain files/row groups of the dataset.

        Parameters
        ----------
        dataset : ColumnStoreDataset

        Returns
        -------
        list[list[ParquetFileToRead]]
            Each element in the returned list describes a list of files that a partition has to read.
        """
        from modin.core.storage_formats.pandas.parsers import ParquetFileToRead

        parquet_files = dataset.files
        row_groups_per_file = dataset.row_groups_per_file
        num_row_groups = sum(row_groups_per_file)

        if num_row_groups == 0:
            return []

        num_splits = min(NPartitions.get(), num_row_groups)
        part_size = num_row_groups // num_splits
        # If 'num_splits' does not divide 'num_row_groups' then we can't cover all of
        # the row groups using the original 'part_size'. According to the 'reminder'
        # there has to be that number of partitions that should read 'part_size + 1'
        # number of row groups.
        reminder = num_row_groups % num_splits
        part_sizes = [part_size] * (num_splits - reminder) + [part_size + 1] * reminder

        partition_files = []
        file_idx = 0
        row_group_idx = 0
        row_groups_left_in_current_file = row_groups_per_file[file_idx]
        # this is used for sanity check at the end, verifying that we indeed added all of the row groups
        total_row_groups_added = 0
        for size in part_sizes:
            row_groups_taken = 0
            part_files = []
            while row_groups_taken != size:
                if row_groups_left_in_current_file < 1:
                    file_idx += 1
                    row_group_idx = 0
                    row_groups_left_in_current_file = row_groups_per_file[file_idx]

                to_take = min(size - row_groups_taken, row_groups_left_in_current_file)
                part_files.append(
                    ParquetFileToRead(
                        parquet_files[file_idx],
                        row_group_start=row_group_idx,
                        row_group_end=row_group_idx + to_take,
                    )
                )
                row_groups_left_in_current_file -= to_take
                row_groups_taken += to_take
                row_group_idx += to_take

            total_row_groups_added += row_groups_taken
            partition_files.append(part_files)

        sanity_check = (
            len(partition_files) == num_splits
            and total_row_groups_added == num_row_groups
        )
        ErrorMessage.catch_bugs_and_request_email(
            failure_condition=not sanity_check,
            extra_log="row groups added does not match total num of row groups across parquet files",
        )
        return partition_files

    @classmethod
    def call_deploy(
        cls,
        partition_files: "list[list[ParquetFileToRead]]",
        col_partitions: "list[list[str]]",
        storage_options: dict,
        engine: str,
        **kwargs,
    ):
        """
        Deploy remote tasks to the workers with passed parameters.

        Parameters
        ----------
        partition_files : list[list[ParquetFileToRead]]
            List of arrays with files that should be read by each partition.
        col_partitions : list[list[str]]
            List of arrays with columns names that should be read
            by each partition.
        storage_options : dict
            Parameters for specific storage engine.
        engine : {"auto", "pyarrow", "fastparquet"}
            Parquet library to use for reading.
        **kwargs : dict
            Parameters of deploying read_* function.

        Returns
        -------
        List
            Array with references to the task deploy result for each partition.
        """
        # If we don't have any columns to read, we should just return an empty
        # set of references.
        if len(col_partitions) == 0:
            return []

        all_partitions = []
        for files_to_read in partition_files:
            all_partitions.append(
                [
                    cls.deploy(
                        func=cls.parse,
                        f_kwargs={
                            "files_for_parser": files_to_read,
                            "columns": cols,
                            "engine": engine,
                            "storage_options": storage_options,
                            **kwargs,
                        },
                        num_returns=3,
                    )
                    for cols in col_partitions
                ]
            )
        return all_partitions

    @classmethod
    def build_partition(cls, partition_ids, column_widths):
        """
        Build array with partitions of `cls.frame_partition_cls` class.

        Parameters
        ----------
        partition_ids : list
            Array with references to the partitions data.
        column_widths : list
            Number of columns in each partition.

        Returns
        -------
        np.ndarray
            array with shape equals to the shape of `partition_ids` and
            filed with partition objects.

        Notes
        -----
        The second level of partitions_ids contains a list of object references
        for each read call:
        partition_ids[i][j] -> [ObjectRef(df), ObjectRef(df.index), ObjectRef(len(df))].
        """
        return np.array(
            [
                [
                    cls.frame_partition_cls(
                        part_id[0],
                        length=part_id[2],
                        width=col_width,
                    )
                    for part_id, col_width in zip(part_ids, column_widths)
                ]
                for part_ids in partition_ids
            ]
        )

    @classmethod
    def build_index(cls, dataset, partition_ids, index_columns, filters):
        """
        Compute index and its split sizes of resulting Modin DataFrame.

        Parameters
        ----------
        dataset : Dataset
            Dataset object of Parquet file/files.
        partition_ids : list
            Array with references to the partitions data.
        index_columns : list
            List of index columns specified by pandas metadata.
        filters : list
            List of filters to be used in reading the Parquet file/files.

        Returns
        -------
        index : pandas.Index
            Index of resulting Modin DataFrame.
        needs_index_sync : bool
            Whether the partition indices need to be synced with frame
            index because there's no index column, or at least one
            index column is a RangeIndex.

        Notes
        -----
        See `build_partition` for more detail on the contents of partitions_ids.
        """
        range_index = True
        range_index_metadata = None
        column_names_to_read = []
        for column in index_columns:
            # https://pandas.pydata.org/docs/development/developer.html#storing-pandas-dataframe-objects-in-apache-parquet-format
            # describes the format of the index column metadata.
            # It is a list, where each entry is either a string or a dictionary.
            # A string means that a column stored in the dataset is (part of) the index.
            # A dictionary is metadata about a RangeIndex, which is metadata-only and not stored
            # in the dataset as a column.
            # There cannot be both for a single dataframe, because a MultiIndex can only contain
            # "actual data" columns and not RangeIndex objects.
            # See similar code in pyarrow: https://github.com/apache/arrow/blob/44811ba18477560711d512939535c8389dd7787b/python/pyarrow/pandas_compat.py#L912-L926
            # and in fastparquet, here is where RangeIndex is handled: https://github.com/dask/fastparquet/blob/df1219300a96bc1baf9ebad85f4f5676a130c9e8/fastparquet/api.py#L809-L815
            if isinstance(column, str):
                column_names_to_read.append(column)
                range_index = False
            elif column["kind"] == "range":
                range_index_metadata = column

        # When the index has meaningful values, stored in a column, we will replicate those
        # exactly in the Modin dataframe's index. This index may have repeated values, be unsorted,
        # etc. This is all fine.
        # A range index is the special case: we want the Modin dataframe to have a single range,
        # not a range that keeps restarting. i.e. if the partitions have index 0-9, 0-19, 0-29,
        # we want our Modin dataframe to have 0-59.
        # When there are no filters, it is relatively cheap to construct the index by
        # actually reading in the necessary data, here in the main process.
        # When there are filters, we let the workers materialize the indices before combining to
        # get a single range.

        # For the second check, let us consider the case where we have an empty dataframe,
        # that has a valid index.
        if (range_index and filters is None) or (
            len(partition_ids) == 0 and len(column_names_to_read) != 0
        ):
            complete_index = dataset.to_pandas_dataframe(
                columns=column_names_to_read
            ).index
        # Empty DataFrame case
        elif len(partition_ids) == 0:
            return [], False
        else:
            index_ids = [part_id[0][1] for part_id in partition_ids if len(part_id) > 0]
            index_objs = cls.materialize(index_ids)
            if range_index:
                # There are filters, so we had to materialize in order to
                # determine how many items there actually are
                total_filtered_length = sum(
                    len(index_part) for index_part in index_objs
                )

                metadata_length_mismatch = False
                if range_index_metadata is not None:
                    metadata_implied_length = (
                        range_index_metadata["stop"] - range_index_metadata["start"]
                    ) / range_index_metadata["step"]
                    metadata_length_mismatch = (
                        total_filtered_length != metadata_implied_length
                    )

                # pyarrow ignores the RangeIndex metadata if it is not consistent with data length.
                # https://github.com/apache/arrow/blob/44811ba18477560711d512939535c8389dd7787b/python/pyarrow/pandas_compat.py#L924-L926
                # fastparquet keeps the start and step from the metadata and just adjusts to the length.
                # https://github.com/dask/fastparquet/blob/df1219300a96bc1baf9ebad85f4f5676a130c9e8/fastparquet/api.py#L815
                if range_index_metadata is None or (
                    isinstance(dataset, PyArrowDataset) and metadata_length_mismatch
                ):
                    complete_index = pandas.RangeIndex(total_filtered_length)
                else:
                    complete_index = pandas.RangeIndex(
                        start=range_index_metadata["start"],
                        step=range_index_metadata["step"],
                        stop=(
                            range_index_metadata["start"]
                            + (total_filtered_length * range_index_metadata["step"])
                        ),
                        name=range_index_metadata["name"],
                    )
            else:
                complete_index = index_objs[0].append(index_objs[1:])
        return complete_index, range_index or (len(index_columns) == 0)

    @classmethod
    def _normalize_partitioning(cls, remote_parts, row_lengths, column_widths):
        """
        Normalize partitioning according to the default partitioning scheme in Modin.

        The result of 'read_parquet()' is often under partitioned over rows and over partitioned
        over columns, so this method expands the number of row splits and shrink the number of column splits.

        Parameters
        ----------
        remote_parts : np.ndarray
        row_lengths : list of ints or None
            Row lengths, if 'None', won't repartition across rows.
        column_widths : list of ints

        Returns
        -------
        remote_parts : np.ndarray
        row_lengths : list of ints or None
        column_widths : list of ints
        """
        if len(remote_parts) == 0:
            return remote_parts, row_lengths, column_widths

        from modin.core.storage_formats.pandas.utils import get_length_list

        # The code in this function is actually a duplication of what 'BaseQueryCompiler.repartition()' does,
        # however this implementation works much faster for some reason

        actual_row_nparts = remote_parts.shape[0]

        if row_lengths is not None:
            desired_row_nparts = max(
                1, min(sum(row_lengths) // MinRowPartitionSize.get(), NPartitions.get())
            )
        else:
            desired_row_nparts = actual_row_nparts

        # only repartition along rows if the actual number of row splits 1.5 times SMALLER than desired
        if 1.5 * actual_row_nparts < desired_row_nparts:
            # assuming that the sizes of parquet's row groups are more or less equal,
            # so trying to use the same number of splits for each partition
            splits_per_partition = desired_row_nparts // actual_row_nparts
            remainder = desired_row_nparts % actual_row_nparts

            new_parts = []
            new_row_lengths = []

            for row_idx, (part_len, row_parts) in enumerate(
                zip(row_lengths, remote_parts)
            ):
                num_splits = splits_per_partition
                # 'remainder' indicates how many partitions have to be split into 'num_splits + 1' splits
                # to have exactly 'desired_row_nparts' in the end
                if row_idx < remainder:
                    num_splits += 1

                if num_splits == 1:
                    new_parts.append(row_parts)
                    new_row_lengths.append(part_len)
                    continue

                offset = len(new_parts)
                # adding empty row parts according to the number of splits
                new_parts.extend([[] for _ in range(num_splits)])
                for part in row_parts:
                    split = cls.frame_cls._partition_mgr_cls._column_partitions_class(
                        [part]
                    ).apply(
                        lambda df: df,
                        num_splits=num_splits,
                        maintain_partitioning=False,
                    )
                    for i in range(num_splits):
                        new_parts[offset + i].append(split[i])

                new_row_lengths.extend(
                    get_length_list(part_len, num_splits, MinRowPartitionSize.get())
                )

            remote_parts = np.array(new_parts)
            row_lengths = new_row_lengths

        desired_col_nparts = max(
            1,
            min(sum(column_widths) // MinColumnPartitionSize.get(), NPartitions.get()),
        )
        # only repartition along cols if the actual number of col splits 1.5 times BIGGER than desired
        if 1.5 * desired_col_nparts < remote_parts.shape[1]:
            remote_parts = np.array(
                [
                    (
                        cls.frame_cls._partition_mgr_cls._row_partition_class(
                            row_parts
                        ).apply(
                            lambda df: df,
                            num_splits=desired_col_nparts,
                            maintain_partitioning=False,
                        )
                    )
                    for row_parts in remote_parts
                ]
            )
            column_widths = get_length_list(
                sum(column_widths), desired_col_nparts, MinColumnPartitionSize.get()
            )

        return remote_parts, row_lengths, column_widths

    @classmethod
    def build_query_compiler(cls, dataset, columns, index_columns, **kwargs):
        """
        Build query compiler from deployed tasks outputs.

        Parameters
        ----------
        dataset : Dataset
            Dataset object of Parquet file/files.
        columns : list
            List of columns that should be read from file.
        index_columns : list
            List of index columns specified by pandas metadata.
        **kwargs : dict
            Parameters of deploying read_* function.

        Returns
        -------
        new_query_compiler : BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        storage_options = kwargs.pop("storage_options", {}) or {}
        filters = kwargs.get("filters", None)

        partition_files = cls._determine_partitioning(dataset)
        col_partitions, column_widths = cls.build_columns(
            columns,
            num_row_parts=len(partition_files),
        )
        partition_ids = cls.call_deploy(
            partition_files, col_partitions, storage_options, dataset.engine, **kwargs
        )
        index, sync_index = cls.build_index(
            dataset, partition_ids, index_columns, filters
        )
        remote_parts = cls.build_partition(partition_ids, column_widths)
        if len(partition_ids) > 0:
            row_lengths = [part.length() for part in remote_parts.T[0]]
        else:
            row_lengths = None

        remote_parts, row_lengths, column_widths = cls._normalize_partitioning(
            remote_parts, row_lengths, column_widths
        )

        if (
            dataset.pandas_metadata
            and "column_indexes" in dataset.pandas_metadata
            and len(dataset.pandas_metadata["column_indexes"]) == 1
            and dataset.pandas_metadata["column_indexes"][0]["numpy_type"] == "int64"
        ):
            columns = pandas.Index(columns).astype("int64").to_list()

        frame = cls.frame_cls(
            remote_parts,
            index,
            columns,
            row_lengths=row_lengths,
            column_widths=column_widths,
            dtypes=None,
        )
        if sync_index:
            frame.synchronize_labels(axis=0)
        return cls.query_compiler_cls(frame)

    @classmethod
    def _read(cls, path, engine, columns, use_nullable_dtypes, dtype_backend, **kwargs):
        """
        Load a parquet object from the file path, returning a query compiler.

        Parameters
        ----------
        path : str, path object or file-like object
            The filepath of the parquet file in local filesystem or hdfs.
        engine : {"auto", "pyarrow", "fastparquet"}
            Parquet library to use.
        columns : list
            If not None, only these columns will be read from the file.
        use_nullable_dtypes : Union[bool, lib.NoDefault]
        dtype_backend : {"numpy_nullable", "pyarrow", lib.no_default}
        **kwargs : dict
            Keyword arguments.

        Returns
        -------
        BaseQueryCompiler
            A new Query Compiler.

        Notes
        -----
        ParquetFile API is used. Please refer to the documentation here
        https://arrow.apache.org/docs/python/parquet.html
        """
        if (
            (set(kwargs) - {"storage_options", "filters", "filesystem"})
            or use_nullable_dtypes != lib.no_default
            or kwargs.get("filesystem") is not None
        ):
            return cls.single_worker_read(
                path,
                engine=engine,
                columns=columns,
                use_nullable_dtypes=use_nullable_dtypes,
                dtype_backend=dtype_backend,
                reason="Parquet options that are not currently supported",
                **kwargs,
            )

        path = stringify_path(path)
        if isinstance(path, list):
            # TODO(https://github.com/modin-project/modin/issues/5723): read all
            # files in parallel.
            compilers: list[cls.query_compiler_cls] = [
                cls._read(
                    p, engine, columns, use_nullable_dtypes, dtype_backend, **kwargs
                )
                for p in path
            ]
            return compilers[0].concat(axis=0, other=compilers[1:], ignore_index=True)
        if isinstance(path, str):
            if os.path.isdir(path):
                path_generator = os.walk(path)
            else:
                storage_options = kwargs.get("storage_options")
                if storage_options is not None:
                    fs, fs_path = url_to_fs(path, **storage_options)
                else:
                    fs, fs_path = url_to_fs(path)
                path_generator = fs.walk(fs_path)
            partitioned_columns = set()
            # We do a tree walk of the path directory because partitioned
            # parquet directories have a unique column at each directory level.
            # Thus, we can use os.walk(), which does a dfs search, to walk
            # through the different columns that the data is partitioned on
            for _, dir_names, files in path_generator:
                if dir_names:
                    partitioned_columns.add(dir_names[0].split("=")[0])
                if files:
                    # Metadata files, git files, .DSStore
                    # TODO: fix conditional for column partitioning, see issue #4637
                    if len(files[0]) > 0 and files[0][0] == ".":
                        continue
                    break
            partitioned_columns = list(partitioned_columns)
            if len(partitioned_columns):
                return cls.single_worker_read(
                    path,
                    engine=engine,
                    columns=columns,
                    use_nullable_dtypes=use_nullable_dtypes,
                    dtype_backend=dtype_backend,
                    reason="Mixed partitioning columns in Parquet",
                    **kwargs,
                )

        dataset = cls.get_dataset(path, engine, kwargs.get("storage_options") or {})
        index_columns = (
            dataset.pandas_metadata.get("index_columns", [])
            if dataset.pandas_metadata
            else []
        )
        # If we have columns as None, then we default to reading in all the columns
        column_names = columns if columns else dataset.columns
        columns = [
            c
            for c in column_names
            if c not in index_columns and not cls.index_regex.match(c)
        ]

        return cls.build_query_compiler(
            dataset, columns, index_columns, dtype_backend=dtype_backend, **kwargs
        )

    @classmethod
    def write(cls, qc, **kwargs):
        """
        Write a ``DataFrame`` to the binary parquet format.

        Parameters
        ----------
        qc : BaseQueryCompiler
            The query compiler of the Modin dataframe that we want to run `to_parquet` on.
        **kwargs : dict
            Parameters for `pandas.to_parquet(**kwargs)`.
        """
        kwargs["path"] = stringify_path(kwargs["path"])
        output_path = kwargs["path"]
        if not isinstance(output_path, str):
            return cls.base_io.to_parquet(qc, **kwargs)
        client_kwargs = (kwargs.get("storage_options") or {}).get("client_kwargs", {})
        fs, url = fsspec.core.url_to_fs(output_path, client_kwargs=client_kwargs)
        fs.mkdirs(url, exist_ok=True)

        def func(df, **kw):  # pragma: no cover
            """
            Dump a chunk of rows as parquet, then save them to target maintaining order.

            Parameters
            ----------
            df : pandas.DataFrame
                A chunk of rows to write to a parquet file.
            **kw : dict
                Arguments to pass to ``pandas.to_parquet(**kwargs)`` plus an extra argument
                `partition_idx` serving as chunk index to maintain rows order.
            """
            compression = kwargs["compression"]
            partition_idx = kw["partition_idx"]
            kwargs["path"] = (
                f"{output_path}/part-{partition_idx:04d}.{compression}.parquet"
            )
            df.to_parquet(**kwargs)
            return pandas.DataFrame()

        # Ensure that the metadata is synchronized
        qc._modin_frame._propagate_index_objs(axis=None)
        result = qc._modin_frame._partition_mgr_cls.map_axis_partitions(
            axis=1,
            partitions=qc._modin_frame._partitions,
            map_func=func,
            keep_partitioning=True,
            lengths=None,
            enumerate_partitions=True,
        )
        # pending completion
        cls.materialize([part.list_of_blocks[0] for row in result for part in row])


================================================
FILE: modin/core/io/file_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module houses `FileDispatcher` class.

`FileDispatcher` can be used as abstract base class for dispatchers of specific file formats or
for direct files processing.
"""

import os

import fsspec
import numpy as np
from pandas.io.common import is_fsspec_url, is_url

from modin.config import AsyncReadMode
from modin.logging import ClassLogger
from modin.logging.config import LogLevel
from modin.utils import ModinAssumptionError

NOT_IMPLEMENTED_MESSAGE = "Implement in children classes!"


class OpenFile:
    """
    OpenFile is a context manager for an input file.

    OpenFile uses fsspec to open files on __enter__. On __exit__, it closes the
    fsspec file. This class exists to encapsulate the special behavior in
    __enter__ around anon=False and anon=True for s3 buckets.

    Parameters
    ----------
    file_path : str
        String that represents the path to the file (paths to S3 buckets
        are also acceptable).
    mode : str, default: "rb"
        String, which defines which mode file should be open.
    compression : str, default: "infer"
        File compression name.
    **kwargs : dict
        Keywords arguments to be passed into ``fsspec.open`` function.

    Attributes
    ----------
    file_path : str
        String that represents the path to the file
    mode : str
        String that defines which mode the file should be opened in.
    compression : str
        File compression name.
    file : fsspec.core.OpenFile
        The opened file.
    kwargs : dict
        Keywords arguments to be passed into ``fsspec.open`` function.
    """

    def __init__(self, file_path, mode="rb", compression="infer", **kwargs):
        self.file_path = file_path
        self.mode = mode
        self.compression = compression
        self.kwargs = kwargs

    def __enter__(self):
        """
        Open the file with fsspec and return the opened file.

        Returns
        -------
        fsspec.core.OpenFile
            The opened file.
        """
        try:
            from botocore.exceptions import NoCredentialsError

            credential_error_type = (
                NoCredentialsError,
                PermissionError,
            )
        except ModuleNotFoundError:
            credential_error_type = (PermissionError,)

        args = (self.file_path, self.mode, self.compression)

        self.file = fsspec.open(*args, **self.kwargs)
        try:
            return self.file.open()
        except credential_error_type:
            self.kwargs["anon"] = True
            self.file = fsspec.open(*args, **self.kwargs)
        return self.file.open()

    def __exit__(self, *args):
        """
        Close the file.

        Parameters
        ----------
        *args : any type
            Variable positional arguments, all unused.
        """
        self.file.close()


class FileDispatcher(ClassLogger, modin_layer="CORE-IO", log_level=LogLevel.DEBUG):
    """
    Class handles util functions for reading data from different kinds of files.

    Notes
    -----
    `_read`, `deploy`, `parse` and `materialize` are abstract methods and should be
    implemented in the child classes (functions signatures can differ between child
    classes).
    """

    BUFFER_UNSUPPORTED_MSG = (
        "Reading from buffers or other non-path-like objects is not supported"
    )

    frame_cls = None
    frame_partition_cls = None
    query_compiler_cls = None

    @classmethod
    def read(cls, *args, **kwargs):
        """
        Read data according passed `args` and `kwargs`.

        Parameters
        ----------
        *args : iterable
            Positional arguments to be passed into `_read` function.
        **kwargs : dict
            Keywords arguments to be passed into `_read` function.

        Returns
        -------
        query_compiler : BaseQueryCompiler
            Query compiler with imported data for further processing.

        Notes
        -----
        `read` is high-level function that calls specific for defined storage format, engine and
        dispatcher class `_read` function with passed parameters and performs some
        postprocessing work on the resulting query_compiler object.
        """
        try:
            query_compiler = cls._read(*args, **kwargs)
        except ModinAssumptionError as err:
            param_name = "path_or_buf" if "path_or_buf" in kwargs else "fname"
            fname = kwargs.pop(param_name)
            return cls.single_worker_read(fname, *args, reason=str(err), **kwargs)
        # TextFileReader can also be returned from `_read`.
        if not AsyncReadMode.get() and hasattr(query_compiler, "dtypes"):
            # at the moment it is not possible to use `wait_partitions` function;
            # in a situation where the reading function is called in a row with the
            # same parameters, `wait_partitions` considers that we have waited for
            # the end of remote calculations, however, when trying to materialize the
            # received data, it is clear that the calculations have not yet ended.
            # for example, `test_io_exp.py::test_read_evaluated_dict` is failed because of that.
            # see #5944 for details
            _ = query_compiler.dtypes
        return query_compiler

    @classmethod
    def _read(cls, *args, **kwargs):
        """
        Perform reading of the data from file.

        Should be implemented in the child class.

        Parameters
        ----------
        *args : iterable
            Positional arguments of the function.
        **kwargs : dict
            Keywords arguments of the function.
        """
        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)

    @classmethod
    def get_path(cls, file_path):
        """
        Process `file_path` in accordance to it's type.

        Parameters
        ----------
        file_path : str, os.PathLike[str] object or file-like object
            The file, or a path to the file. Paths to S3 buckets are also
            acceptable.

        Returns
        -------
        str
            Updated or verified `file_path` parameter.

        Notes
        -----
        if `file_path` is a URL, parameter will be returned as is, otherwise
        absolute path will be returned.
        """
        if is_fsspec_url(file_path) or is_url(file_path):
            return file_path
        else:
            return os.path.abspath(file_path)

    @classmethod
    def file_size(cls, f):
        """
        Get the size of file associated with file handle `f`.

        Parameters
        ----------
        f : file-like object
            File-like object, that should be used to get file size.

        Returns
        -------
        int
            File size in bytes.
        """
        cur_pos = f.tell()
        f.seek(0, os.SEEK_END)
        size = f.tell()
        f.seek(cur_pos, os.SEEK_SET)
        return size

    @classmethod
    def file_exists(cls, file_path, storage_options=None):
        """
        Check if `file_path` exists.

        Parameters
        ----------
        file_path : str
            String that represents the path to the file (paths to S3 buckets
            are also acceptable).
        storage_options : dict, optional
            Keyword from `read_*` functions.

        Returns
        -------
        bool
            Whether file exists or not.
        """
        if not is_fsspec_url(file_path) and not is_url(file_path):
            return os.path.exists(file_path)

        try:
            from botocore.exceptions import (
                ConnectTimeoutError,
                EndpointConnectionError,
                NoCredentialsError,
            )

            credential_error_type = (
                NoCredentialsError,
                PermissionError,
                EndpointConnectionError,
                ConnectTimeoutError,
            )
        except ModuleNotFoundError:
            credential_error_type = (PermissionError,)

        if storage_options is not None:
            new_storage_options = dict(storage_options)
            new_storage_options.pop("anon", None)
        else:
            new_storage_options = {}

        fs, _ = fsspec.core.url_to_fs(file_path, **new_storage_options)
        exists = False
        try:
            exists = fs.exists(file_path)
        except credential_error_type:
            fs, _ = fsspec.core.url_to_fs(file_path, anon=True, **new_storage_options)
            exists = fs.exists(file_path)

        return exists

    @classmethod
    def deploy(cls, func, *args, num_returns=1, **kwargs):  # noqa: PR01
        """
        Deploy remote task.

        Should be implemented in the task class (for example in the `RayWrapper`).
        """
        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)

    def parse(self, func, args, num_returns):  # noqa: PR01
        """
        Parse file's data in the worker process.

        Should be implemented in the parser class (for example in the `PandasCSVParser`).
        """
        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)

    @classmethod
    def materialize(cls, obj_id):  # noqa: PR01
        """
        Get results from worker.

        Should be implemented in the task class (for example in the `RayWrapper`).
        """
        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)

    @classmethod
    def build_partition(cls, partition_ids, row_lengths, column_widths):
        """
        Build array with partitions of `cls.frame_partition_cls` class.

        Parameters
        ----------
        partition_ids : list
            Array with references to the partitions data.
        row_lengths : list
            Partitions rows lengths.
        column_widths : list
            Number of columns in each partition.

        Returns
        -------
        np.ndarray
            array with shape equals to the shape of `partition_ids` and
            filed with partition objects.
        """
        return np.array(
            [
                [
                    cls.frame_partition_cls(
                        partition_ids[i][j],
                        length=row_lengths[i],
                        width=column_widths[j],
                    )
                    for j in range(len(partition_ids[i]))
                ]
                for i in range(len(partition_ids))
            ]
        )

    @classmethod
    def _file_not_found_msg(cls, filename: str):  # noqa: GL08
        return f"No such file: '{filename}'"


================================================
FILE: modin/core/io/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module houses `BaseIO` class.

`BaseIO` is base class for IO classes, that stores IO functions.
"""

from typing import Any

import pandas
from pandas._libs.lib import no_default
from pandas.util._decorators import doc

from modin.core.storage_formats import BaseQueryCompiler
from modin.db_conn import ModinDatabaseConnection
from modin.error_message import ErrorMessage
from modin.pandas.io import ExcelFile
from modin.utils import _inherit_docstrings

_doc_default_io_method = """
{summary} using pandas.
For parameters description please refer to pandas API.

Returns
-------
{returns}
"""

_doc_returns_qc = """BaseQueryCompiler
    QueryCompiler with read data."""

_doc_returns_qc_or_parser = """BaseQueryCompiler or TextParser
    QueryCompiler or TextParser with read data."""


class BaseIO:
    """Class for basic utils and default implementation of IO functions."""

    query_compiler_cls: BaseQueryCompiler = None
    frame_cls = None
    _should_warn_on_default_to_pandas: bool = True

    @classmethod
    def _maybe_warn_on_default(cls, *, message: str = "", reason: str = "") -> None:
        """
        If this class is configured to warn on default to pandas, warn.

        Parameters
        ----------
        message : str, default: ""
            Method that is causing a default to pandas.
        reason : str, default: ""
            Reason for default.
        """
        if cls._should_warn_on_default_to_pandas:
            ErrorMessage.default_to_pandas(message=message, reason=reason)

    @classmethod
    def from_non_pandas(cls, *args, **kwargs):
        """
        Create a Modin `query_compiler` from a non-pandas `object`.

        Parameters
        ----------
        *args : iterable
            Positional arguments to be passed into `func`.
        **kwargs : dict
            Keyword arguments to be passed into `func`.
        """
        return None

    @classmethod
    def from_pandas(cls, df) -> BaseQueryCompiler:
        """
        Create a Modin `query_compiler` from a `pandas.DataFrame`.

        Parameters
        ----------
        df : pandas.DataFrame
            The pandas DataFrame to convert from.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the `pandas.DataFrame`.
        """
        return cls.query_compiler_cls.from_pandas(df, cls.frame_cls)

    @classmethod
    def from_arrow(cls, at):
        """
        Create a Modin `query_compiler` from a `pyarrow.Table`.

        Parameters
        ----------
        at : Arrow Table
            The Arrow Table to convert from.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the Arrow Table.
        """
        return cls.query_compiler_cls.from_arrow(at, cls.frame_cls)

    @classmethod
    def from_interchange_dataframe(cls, df):
        """
        Create a Modin QueryCompiler from a DataFrame supporting the DataFrame exchange protocol `__dataframe__()`.

        Parameters
        ----------
        df : DataFrame
            The DataFrame object supporting the DataFrame exchange protocol.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the DataFrame.
        """
        return cls.query_compiler_cls.from_interchange_dataframe(df, cls.frame_cls)

    @classmethod
    def from_ray(cls, ray_obj):
        """
        Create a Modin `query_compiler` from a Ray Dataset.

        Parameters
        ----------
        ray_obj : ray.data.Dataset
            The Ray Dataset to convert from.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the Ray Dataset.

        Notes
        -----
        Ray Dataset can only be converted to a Modin Dataframe if Modin uses a Ray engine.
        If another engine is used, the runtime exception will be raised.
        """
        raise RuntimeError(
            "Modin Dataframe can only be converted to a Ray Dataset if Modin uses a Ray engine."
        )

    @classmethod
    def from_dask(cls, dask_obj):
        """
        Create a Modin `query_compiler` from a Dask DataFrame.

        Parameters
        ----------
        dask_obj : dask.dataframe.DataFrame
            The Dask DataFrame to convert from.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the Dask DataFrame.

        Notes
        -----
        Dask DataFrame can only be converted to a Modin DataFrame if Modin uses a Dask engine.
        If another engine is used, the runtime exception will be raised.
        """
        raise RuntimeError(
            "Modin DataFrame can only be converted to a Dask DataFrame if Modin uses a Dask engine."
        )

    @classmethod
    def from_map(cls, func, iterable, *args, **kwargs):
        """
        Create a Modin `query_compiler` from a map function.

        This method will construct a Modin `query_compiler` split by row partitions.
        The number of row partitions matches the number of elements in the iterable object.

        Parameters
        ----------
        func : callable
            Function to map across the iterable object.
        iterable : Iterable
            An iterable object.
        *args : tuple
            Positional arguments to pass in `func`.
        **kwargs : dict
            Keyword arguments to pass in `func`.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data returned by map function.
        """
        raise RuntimeError(
            "Modin DataFrame can only be created if Modin uses Ray, Dask or MPI engine."
        )

    @classmethod
    @_inherit_docstrings(pandas.read_parquet, apilink="pandas.read_parquet")
    @doc(
        _doc_default_io_method,
        summary="Load a parquet object from the file path, returning a query compiler",
        returns=_doc_returns_qc,
    )
    def read_parquet(cls, **kwargs):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_parquet`")
        return cls.from_pandas(pandas.read_parquet(**kwargs))

    @classmethod
    @_inherit_docstrings(pandas.read_csv, apilink="pandas.read_csv")
    @doc(
        _doc_default_io_method,
        summary="Read a comma-separated values (CSV) file into query compiler",
        returns=_doc_returns_qc_or_parser,
    )
    def read_csv(
        cls,
        filepath_or_buffer,
        **kwargs,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_csv`")
        pd_obj = pandas.read_csv(filepath_or_buffer, **kwargs)
        if isinstance(pd_obj, pandas.DataFrame):
            return cls.from_pandas(pd_obj)
        if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
            # Overwriting the read method should return a Modin DataFrame for calls
            # to __next__ and get_chunk
            pd_read = pd_obj.read
            pd_obj.read = lambda *args, **kw: cls.from_pandas(pd_read(*args, **kw))
        return pd_obj

    @classmethod
    @_inherit_docstrings(pandas.read_json, apilink="pandas.read_json")
    @doc(
        _doc_default_io_method,
        summary="Convert a JSON string to query compiler",
        returns=_doc_returns_qc,
    )
    def read_json(
        cls,
        **kwargs,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_json`")
        return cls.from_pandas(pandas.read_json(**kwargs))

    @classmethod
    @_inherit_docstrings(pandas.read_gbq, apilink="pandas.read_gbq")
    @doc(
        _doc_default_io_method,
        summary="Load data from Google BigQuery into query compiler",
        returns=_doc_returns_qc,
    )
    def read_gbq(
        cls,
        query: str,
        project_id=None,
        index_col=None,
        col_order=None,
        reauth=False,
        auth_local_webserver=False,
        dialect=None,
        location=None,
        configuration=None,
        credentials=None,
        use_bqstorage_api=None,
        private_key=None,
        verbose=None,
        progress_bar_type=None,
        max_results=None,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_gbq`")
        return cls.from_pandas(
            pandas.read_gbq(
                query,
                project_id=project_id,
                index_col=index_col,
                col_order=col_order,
                reauth=reauth,
                auth_local_webserver=auth_local_webserver,
                dialect=dialect,
                location=location,
                configuration=configuration,
                credentials=credentials,
                use_bqstorage_api=use_bqstorage_api,
                progress_bar_type=progress_bar_type,
                max_results=max_results,
            )
        )

    @classmethod
    @_inherit_docstrings(pandas.read_html, apilink="pandas.read_html")
    @doc(
        _doc_default_io_method,
        summary="Read HTML tables into query compiler",
        returns=_doc_returns_qc,
    )
    def read_html(
        cls,
        io,
        *,
        match=".+",
        flavor=None,
        header=None,
        index_col=None,
        skiprows=None,
        attrs=None,
        parse_dates=False,
        thousands=",",
        encoding=None,
        decimal=".",
        converters=None,
        na_values=None,
        keep_default_na=True,
        displayed_only=True,
        **kwargs,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_html`")
        result = pandas.read_html(
            io=io,
            match=match,
            flavor=flavor,
            header=header,
            index_col=index_col,
            skiprows=skiprows,
            attrs=attrs,
            parse_dates=parse_dates,
            thousands=thousands,
            encoding=encoding,
            decimal=decimal,
            converters=converters,
            na_values=na_values,
            keep_default_na=keep_default_na,
            displayed_only=displayed_only,
            **kwargs,
        )
        return (cls.from_pandas(df) for df in result)

    @classmethod
    @_inherit_docstrings(pandas.read_clipboard, apilink="pandas.read_clipboard")
    @doc(
        _doc_default_io_method,
        summary="Read text from clipboard into query compiler",
        returns=_doc_returns_qc,
    )
    def read_clipboard(cls, sep=r"\s+", **kwargs):  # pragma: no cover # noqa: PR01
        cls._maybe_warn_on_default(message="`read_clipboard`")
        return cls.from_pandas(pandas.read_clipboard(sep=sep, **kwargs))

    @classmethod
    @_inherit_docstrings(pandas.read_excel, apilink="pandas.read_excel")
    @doc(
        _doc_default_io_method,
        summary="Read an Excel file into query compiler",
        returns="""BaseQueryCompiler or dict :
    QueryCompiler or dict with read data.""",
    )
    def read_excel(cls, **kwargs):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_excel`")
        if isinstance(kwargs["io"], ExcelFile):
            # otherwise, Modin objects may be passed to the pandas context, resulting
            # in undefined behavior
            # for example in the case: pd.read_excel(pd.ExcelFile), since reading from
            # pd.ExcelFile in `read_excel` isn't supported
            kwargs["io"]._set_pandas_mode()
        intermediate = pandas.read_excel(**kwargs)
        if isinstance(intermediate, dict):
            parsed = type(intermediate)()
            for key in intermediate.keys():
                parsed[key] = cls.from_pandas(intermediate.get(key))
            return parsed
        else:
            return cls.from_pandas(intermediate)

    @classmethod
    @_inherit_docstrings(pandas.read_hdf, apilink="pandas.read_hdf")
    @doc(
        _doc_default_io_method,
        summary="Read data from hdf store into query compiler",
        returns=_doc_returns_qc,
    )
    def read_hdf(
        cls,
        path_or_buf,
        key=None,
        mode: str = "r",
        errors: str = "strict",
        where=None,
        start=None,
        stop=None,
        columns=None,
        iterator=False,
        chunksize=None,
        **kwargs,
    ):  # noqa: PR01
        from modin.pandas.io import HDFStore

        cls._maybe_warn_on_default(message="`read_hdf`")
        modin_store = isinstance(path_or_buf, HDFStore)
        if modin_store:
            path_or_buf._return_modin_dataframe = False
        df = pandas.read_hdf(
            path_or_buf,
            key=key,
            mode=mode,
            columns=columns,
            errors=errors,
            where=where,
            start=start,
            stop=stop,
            iterator=iterator,
            chunksize=chunksize,
            **kwargs,
        )
        if modin_store:
            path_or_buf._return_modin_dataframe = True

        return cls.from_pandas(df)

    @classmethod
    @_inherit_docstrings(pandas.read_feather, apilink="pandas.read_feather")
    @doc(
        _doc_default_io_method,
        summary="Load a feather-format object from the file path into query compiler",
        returns=_doc_returns_qc,
    )
    def read_feather(
        cls,
        path,
        **kwargs,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_feather`")
        return cls.from_pandas(
            pandas.read_feather(
                path,
                **kwargs,
            )
        )

    @classmethod
    @_inherit_docstrings(pandas.read_stata, apilink="pandas.read_stata")
    @doc(
        _doc_default_io_method,
        summary="Read Stata file into query compiler",
        returns=_doc_returns_qc,
    )
    def read_stata(
        cls,
        filepath_or_buffer,
        **kwargs,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_stata`")
        return cls.from_pandas(pandas.read_stata(filepath_or_buffer, **kwargs))

    @classmethod
    @_inherit_docstrings(pandas.read_sas, apilink="pandas.read_sas")
    @doc(
        _doc_default_io_method,
        summary="Read SAS files stored as either XPORT or SAS7BDAT format files\ninto query compiler",
        returns=_doc_returns_qc,
    )
    def read_sas(
        cls,
        filepath_or_buffer,
        *,
        format=None,
        index=None,
        encoding=None,
        chunksize=None,
        iterator=False,
        **kwargs,
    ):  # pragma: no cover # noqa: PR01
        cls._maybe_warn_on_default(message="`read_sas`")
        return cls.from_pandas(
            pandas.read_sas(
                filepath_or_buffer,
                format=format,
                index=index,
                encoding=encoding,
                chunksize=chunksize,
                iterator=iterator,
                **kwargs,
            )
        )

    @classmethod
    @_inherit_docstrings(pandas.read_pickle, apilink="pandas.read_pickle")
    @doc(
        _doc_default_io_method,
        summary="Load pickled pandas object (or any object) from file into query compiler",
        returns=_doc_returns_qc,
    )
    def read_pickle(
        cls,
        filepath_or_buffer,
        **kwargs,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_pickle`")
        return cls.from_pandas(
            pandas.read_pickle(
                filepath_or_buffer,
                **kwargs,
            )
        )

    @classmethod
    @_inherit_docstrings(pandas.read_sql, apilink="pandas.read_sql")
    @doc(
        _doc_default_io_method,
        summary="Read SQL query or database table into query compiler",
        returns=_doc_returns_qc,
    )
    def read_sql(
        cls,
        sql,
        con,
        index_col=None,
        coerce_float=True,
        params=None,
        parse_dates=None,
        columns=None,
        chunksize=None,
        dtype_backend=no_default,
        dtype=None,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_sql`")
        if isinstance(con, ModinDatabaseConnection):
            con = con.get_connection()
        result = pandas.read_sql(
            sql,
            con,
            index_col=index_col,
            coerce_float=coerce_float,
            params=params,
            parse_dates=parse_dates,
            columns=columns,
            chunksize=chunksize,
            dtype_backend=dtype_backend,
            dtype=dtype,
        )

        if isinstance(result, (pandas.DataFrame, pandas.Series)):
            return cls.from_pandas(result)
        return (cls.from_pandas(df) for df in result)

    @classmethod
    @_inherit_docstrings(pandas.read_fwf, apilink="pandas.read_fwf")
    @doc(
        _doc_default_io_method,
        summary="Read a table of fixed-width formatted lines into query compiler",
        returns=_doc_returns_qc_or_parser,
    )
    def read_fwf(
        cls,
        filepath_or_buffer,
        *,
        colspecs="infer",
        widths=None,
        infer_nrows=100,
        dtype_backend=no_default,
        iterator=False,
        chunksize=None,
        **kwds,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_fwf`")
        pd_obj = pandas.read_fwf(
            filepath_or_buffer,
            colspecs=colspecs,
            widths=widths,
            infer_nrows=infer_nrows,
            dtype_backend=dtype_backend,
            iterator=iterator,
            chunksize=chunksize,
            **kwds,
        )
        if isinstance(pd_obj, pandas.DataFrame):
            return cls.from_pandas(pd_obj)
        if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
            # Overwriting the read method should return a Modin DataFrame for calls
            # to __next__ and get_chunk
            pd_read = pd_obj.read
            pd_obj.read = lambda *args, **kwargs: cls.from_pandas(
                pd_read(*args, **kwargs)
            )
        return pd_obj

    @classmethod
    @_inherit_docstrings(pandas.read_sql_table, apilink="pandas.read_sql_table")
    @doc(
        _doc_default_io_method,
        summary="Read SQL database table into query compiler",
        returns=_doc_returns_qc,
    )
    def read_sql_table(
        cls,
        table_name,
        con,
        schema=None,
        index_col=None,
        coerce_float=True,
        parse_dates=None,
        columns=None,
        chunksize=None,
        dtype_backend=no_default,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_sql_table`")
        return cls.from_pandas(
            pandas.read_sql_table(
                table_name,
                con,
                schema=schema,
                index_col=index_col,
                coerce_float=coerce_float,
                parse_dates=parse_dates,
                columns=columns,
                chunksize=chunksize,
                dtype_backend=dtype_backend,
            )
        )

    @classmethod
    @_inherit_docstrings(pandas.read_sql_query, apilink="pandas.read_sql_query")
    @doc(
        _doc_default_io_method,
        summary="Read SQL query into query compiler",
        returns=_doc_returns_qc,
    )
    def read_sql_query(
        cls,
        sql,
        con,
        **kwargs,
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_sql_query`")
        return cls.from_pandas(
            pandas.read_sql_query(
                sql,
                con,
                **kwargs,
            )
        )

    @classmethod
    @_inherit_docstrings(pandas.read_spss, apilink="pandas.read_spss")
    @doc(
        _doc_default_io_method,
        summary="Load an SPSS file from the file path, returning a query compiler",
        returns=_doc_returns_qc,
    )
    def read_spss(
        cls, path, usecols, convert_categoricals, dtype_backend
    ):  # noqa: PR01
        cls._maybe_warn_on_default(message="`read_spss`")
        return cls.from_pandas(
            pandas.read_spss(
                path,
                usecols=usecols,
                convert_categoricals=convert_categoricals,
                dtype_backend=dtype_backend,
            )
        )

    @classmethod
    @_inherit_docstrings(pandas.DataFrame.to_sql, apilink="pandas.DataFrame.to_sql")
    def to_sql(
        cls,
        qc,
        name,
        con,
        schema=None,
        if_exists="fail",
        index=True,
        index_label=None,
        chunksize=None,
        dtype=None,
        method=None,
    ):  # noqa: PR01
        """
        Write records stored in a DataFrame to a SQL database using pandas.

        For parameters description please refer to pandas API.
        """
        cls._maybe_warn_on_default(message="`to_sql`")
        df = qc.to_pandas()
        df.to_sql(
            name=name,
            con=con,
            schema=schema,
            if_exists=if_exists,
            index=index,
            index_label=index_label,
            chunksize=chunksize,
            dtype=dtype,
            method=method,
        )

    @classmethod
    @_inherit_docstrings(
        pandas.DataFrame.to_pickle, apilink="pandas.DataFrame.to_pickle"
    )
    def to_pickle(
        cls,
        obj: Any,
        filepath_or_buffer,
        **kwargs,
    ):  # noqa: PR01, D200
        """
        Pickle (serialize) object to file.
        """
        cls._maybe_warn_on_default(message="`to_pickle`")
        if isinstance(obj, BaseQueryCompiler):
            obj = obj.to_pandas()

        return pandas.to_pickle(
            obj,
            filepath_or_buffer=filepath_or_buffer,
            **kwargs,
        )

    @classmethod
    @_inherit_docstrings(pandas.DataFrame.to_csv, apilink="pandas.DataFrame.to_csv")
    def to_csv(cls, obj, **kwargs):  # noqa: PR01
        """
        Write object to a comma-separated values (CSV) file using pandas.

        For parameters description please refer to pandas API.
        """
        cls._maybe_warn_on_default(message="`to_csv`")
        if isinstance(obj, BaseQueryCompiler):
            obj = obj.to_pandas()

        return obj.to_csv(**kwargs)

    @classmethod
    @_inherit_docstrings(pandas.DataFrame.to_json, apilink="pandas.DataFrame.to_json")
    def to_json(cls, obj, path, **kwargs):  # noqa: PR01
        """
        Convert the object to a JSON string.

        For parameters description please refer to pandas API.
        """
        cls._maybe_warn_on_default(message="`to_json`")
        if isinstance(obj, BaseQueryCompiler):
            obj = obj.to_pandas()

        return obj.to_json(path, **kwargs)

    @classmethod
    @_inherit_docstrings(pandas.Series.to_json, apilink="pandas.Series.to_json")
    def to_json_series(cls, obj, path, **kwargs):  # noqa: PR01
        """
        Convert the object to a JSON string.

        For parameters description please refer to pandas API.
        """
        cls._maybe_warn_on_default(message="`to_json`")
        if isinstance(obj, BaseQueryCompiler):
            obj = obj.to_pandas().squeeze(axis=1)

        return obj.to_json(path, **kwargs)

    @classmethod
    @_inherit_docstrings(pandas.DataFrame.to_xml, apilink="pandas.DataFrame.to_xml")
    def to_xml(cls, obj, path_or_buffer, **kwargs):  # noqa: PR01
        """
        Convert the object to a XML string.

        For parameters description please refer to pandas API.
        """
        cls._maybe_warn_on_default(message="`to_xml`")
        if isinstance(obj, BaseQueryCompiler):
            obj = obj.to_pandas()

        return obj.to_xml(path_or_buffer, **kwargs)

    @classmethod
    @_inherit_docstrings(
        pandas.DataFrame.to_parquet, apilink="pandas.DataFrame.to_parquet"
    )
    def to_parquet(cls, obj, path, **kwargs):  # noqa: PR01
        """
        Write object to the binary parquet format using pandas.

        For parameters description please refer to pandas API.
        """
        cls._maybe_warn_on_default(message="`to_parquet`")
        if isinstance(obj, BaseQueryCompiler):
            obj = obj.to_pandas()

        return obj.to_parquet(path, **kwargs)

    @classmethod
    def to_ray(cls, modin_obj):
        """
        Convert a Modin DataFrame/Series to a Ray Dataset.

        Parameters
        ----------
        modin_obj : modin.pandas.DataFrame, modin.pandas.Series
            The Modin DataFrame/Series to convert.

        Returns
        -------
        ray.data.Dataset
            Converted object with type depending on input.

        Notes
        -----
        Modin DataFrame/Series can only be converted to a Ray Dataset if Modin uses a Ray engine.
        If another engine is used, the runtime exception will be raised.
        """
        raise RuntimeError(
            "Modin Dataframe can only be converted to a Ray Dataset if Modin uses a Ray engine."
        )

    @classmethod
    def to_dask(cls, modin_obj):
        """
        Convert a Modin DataFrame to a Dask DataFrame.

        Parameters
        ----------
        modin_obj : modin.pandas.DataFrame, modin.pandas.Series
            The Modin DataFrame/Series to convert.

        Returns
        -------
        dask.dataframe.DataFrame or dask.dataframe.Series
            Converted object with type depending on input.

        Notes
        -----
        Modin DataFrame/Series can only be converted to a Dask DataFrame/Series if Modin uses a Dask engine.
        If another engine is used, the runtime exception will be raised.
        """
        raise RuntimeError(
            "Modin DataFrame can only be converted to a Dask DataFrame if Modin uses a Dask engine."
        )


================================================
FILE: modin/core/io/sql/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""SQL format type IO functions implementations."""


================================================
FILE: modin/core/io/sql/sql_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module houses `SQLDispatcher` class.

`SQLDispatcher` contains utils for handling SQL queries or database tables,
inherits util functions for handling files from `FileDispatcher` class and can be
used as base class for dipatchers of SQL queries.
"""

import math

import numpy as np
import pandas

from modin.config import NPartitions, ReadSqlEngine
from modin.core.io.file_dispatcher import FileDispatcher
from modin.db_conn import ModinDatabaseConnection


class SQLDispatcher(FileDispatcher):
    """Class handles utils for reading SQL queries or database tables."""

    @classmethod
    def _is_supported_sqlalchemy_object(cls, obj):  # noqa: GL08
        supported = None
        try:
            import sqlalchemy as sa

            supported = isinstance(obj, (sa.engine.Engine, sa.engine.Connection))
        except ImportError:
            supported = False
        return supported

    @classmethod
    def _read(cls, sql, con, index_col=None, **kwargs):
        """
        Read a SQL query or database table into a query compiler.

        Parameters
        ----------
        sql : str or SQLAlchemy Selectable (select or text object)
            SQL query to be executed or a table name.
        con : SQLAlchemy connectable, str, sqlite3 connection, or ModinDatabaseConnection
            Connection object to database.
        index_col : str or list of str, optional
            Column(s) to set as index(MultiIndex).
        **kwargs : dict
            Parameters to pass into `pandas.read_sql` function.

        Returns
        -------
        BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        if isinstance(con, str):
            con = ModinDatabaseConnection("sqlalchemy", con)

        if cls._is_supported_sqlalchemy_object(con):
            con = ModinDatabaseConnection(
                "sqlalchemy", con.engine.url.render_as_string(hide_password=False)
            )

        if not isinstance(con, ModinDatabaseConnection):
            return cls.single_worker_read(
                sql,
                con=con,
                index_col=index_col,
                read_sql_engine=ReadSqlEngine.get(),
                reason="To use the parallel implementation of `read_sql`, pass either "
                + "a SQLAlchemy connectable, the SQL connection string, or a ModinDatabaseConnection "
                + "with the arguments required to make a connection, instead "
                + f"of {type(con)}. For documentation on the ModinDatabaseConnection, see "
                + "https://modin.readthedocs.io/en/latest/supported_apis/io_supported.html#connecting-to-a-database-for-read-sql",
                **kwargs,
            )
        row_count_query = con.row_count_query(sql)
        connection_for_pandas = con.get_connection()
        colum_names_query = con.column_names_query(sql)
        row_cnt = pandas.read_sql(row_count_query, connection_for_pandas).squeeze()
        cols_names_df = pandas.read_sql(
            colum_names_query, connection_for_pandas, index_col=index_col
        )
        cols_names = cols_names_df.columns
        num_partitions = NPartitions.get()
        partition_ids = [None] * num_partitions
        index_ids = [None] * num_partitions
        dtypes_ids = [None] * num_partitions
        limit = math.ceil(row_cnt / num_partitions)
        for part in range(num_partitions):
            offset = part * limit
            query = con.partition_query(sql, limit, offset)
            *partition_ids[part], index_ids[part], dtypes_ids[part] = cls.deploy(
                func=cls.parse,
                f_kwargs={
                    "num_splits": num_partitions,
                    "sql": query,
                    "con": con,
                    "index_col": index_col,
                    "read_sql_engine": ReadSqlEngine.get(),
                    **kwargs,
                },
                num_returns=num_partitions + 2,
            )
            partition_ids[part] = [
                cls.frame_partition_cls(obj) for obj in partition_ids[part]
            ]
        if index_col is None:  # sum all lens returned from partitions
            index_lens = cls.materialize(index_ids)
            new_index = pandas.RangeIndex(sum(index_lens))
        else:  # concat index returned from partitions
            index_lst = [
                x for part_index in cls.materialize(index_ids) for x in part_index
            ]
            new_index = pandas.Index(index_lst).set_names(index_col)
        new_frame = cls.frame_cls(np.array(partition_ids), new_index, cols_names)
        new_frame.synchronize_labels(axis=0)
        return cls.query_compiler_cls(new_frame)

    @classmethod
    def write(cls, qc, **kwargs):
        """
        Write records stored in the `qc` to a SQL database.

        Parameters
        ----------
        qc : BaseQueryCompiler
            The query compiler of the Modin dataframe that we want to run ``to_sql`` on.
        **kwargs : dict
            Parameters for ``pandas.to_sql(**kwargs)``.
        """
        # we first insert an empty DF in order to create the full table in the database
        # This also helps to validate the input against pandas
        # we would like to_sql() to complete only when all rows have been inserted into the database
        # since the mapping operation is non-blocking, each partition will return an empty DF
        # so at the end, the blocking operation will be this empty DF to_pandas

        if not isinstance(
            kwargs["con"], str
        ) and not cls._is_supported_sqlalchemy_object(kwargs["con"]):
            return cls.base_io.to_sql(qc, **kwargs)

        # In the case that we are given a SQLAlchemy Connection or Engine, the objects
        # are not pickleable. We have to convert it to the URL string and connect from
        # each of the workers.
        if cls._is_supported_sqlalchemy_object(kwargs["con"]):
            kwargs["con"] = kwargs["con"].engine.url.render_as_string(
                hide_password=False
            )

        empty_df = qc.getitem_row_array([0]).to_pandas().head(0)
        empty_df.to_sql(**kwargs)
        # so each partition will append its respective DF
        kwargs["if_exists"] = "append"
        columns = qc.columns

        def func(df):  # pragma: no cover
            """
            Override column names in the wrapped dataframe and convert it to SQL.

            Notes
            -----
            This function returns an empty ``pandas.DataFrame`` because ``apply_full_axis``
            expects a Frame object as a result of operation (and ``to_sql`` has no dataframe result).
            """
            df.columns = columns
            df.to_sql(**kwargs)
            return pandas.DataFrame()

        # Ensure that the metadata is synchronized
        qc._modin_frame._propagate_index_objs(axis=None)
        result = qc._modin_frame.apply_full_axis(1, func, new_index=[], new_columns=[])
        cls.materialize(
            [part.list_of_blocks[0] for row in result._partitions for part in row]
        )


================================================
FILE: modin/core/io/text/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Text format type IO functions implementations."""


================================================
FILE: modin/core/io/text/csv_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `CSVDispatcher` class, that is used for reading `.csv` files."""

from modin.core.io.text.text_file_dispatcher import TextFileDispatcher


class CSVDispatcher(TextFileDispatcher):
    """Class handles utils for reading `.csv` files."""


================================================
FILE: modin/core/io/text/excel_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `ExcelDispatcher` class, that is used for reading excel files."""

import os
import re
import warnings
from io import BytesIO

import pandas
from pandas.io.common import stringify_path

from modin.config import NPartitions
from modin.core.io.text.text_file_dispatcher import TextFileDispatcher
from modin.pandas.io import ExcelFile

EXCEL_READ_BLOCK_SIZE = 4096


class ExcelDispatcher(TextFileDispatcher):
    """Class handles utils for reading excel files."""

    @classmethod
    def _read(cls, io, **kwargs):
        """
        Read data from `io` according to the passed `read_excel` `kwargs` parameters.

        Parameters
        ----------
        io : str, bytes, ExcelFile, xlrd.Book, path object, or file-like object
            `io` parameter of `read_excel` function.
        **kwargs : dict
            Parameters of `read_excel` function.

        Returns
        -------
        new_query_compiler : BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        io = stringify_path(io)
        if (
            kwargs.get("engine", None) is not None
            and kwargs.get("engine") != "openpyxl"
        ):
            return cls.single_worker_read(
                io,
                reason="Modin only implements parallel `read_excel` with `openpyxl` engine, "
                + 'please specify `engine=None` or `engine="openpyxl"` to '
                + "use Modin's parallel implementation.",
                **kwargs
            )

        if kwargs.get("skiprows") is not None:
            return cls.single_worker_read(
                io,
                reason="Modin doesn't support 'skiprows' parameter of `read_excel`",
                **kwargs
            )

        if isinstance(io, bytes):
            io = BytesIO(io)

        # isinstance(ExcelFile, os.PathLike) == True
        if not isinstance(io, (str, os.PathLike, BytesIO)) or isinstance(
            io, (ExcelFile, pandas.ExcelFile)
        ):
            if isinstance(io, ExcelFile):
                io._set_pandas_mode()
            return cls.single_worker_read(
                io,
                reason="Modin only implements parallel `read_excel` the following types of `io`: "
                + "str, os.PathLike, io.BytesIO.",
                **kwargs
            )

        from zipfile import ZipFile

        from openpyxl.reader.excel import ExcelReader
        from openpyxl.worksheet._reader import WorksheetReader
        from openpyxl.worksheet.worksheet import Worksheet

        from modin.core.storage_formats.pandas.parsers import PandasExcelParser

        sheet_name = kwargs.get("sheet_name", 0)
        if sheet_name is None or isinstance(sheet_name, list):
            return cls.single_worker_read(
                io,
                reason="`read_excel` functionality is only implemented for a single sheet at a "
                + "time. Multiple sheet reading coming soon!",
                **kwargs
            )

        warnings.warn(
            "Parallel `read_excel` is a new feature! If you run into any "
            + "problems, please visit https://github.com/modin-project/modin/issues. "
            + "If you find a new issue and can't file it on GitHub, please "
            + "email bug_reports@modin.org."
        )

        # NOTE: ExcelReader() in read-only mode does not close file handle by itself
        # work around that by passing file object if we received some path
        io_file = open(io, "rb") if isinstance(io, (str, os.PathLike)) else io
        try:
            ex = ExcelReader(io_file, read_only=True)
            ex.read()
            wb = ex.wb

            # Get shared strings
            ex.read_manifest()
            ex.read_strings()
            ws = Worksheet(wb)
        finally:
            if isinstance(io, (str, os.PathLike)):
                # close only if it were us who opened the object
                io_file.close()

        pandas_kw = dict(kwargs)  # preserve original kwargs
        with ZipFile(io) as z:
            # Convert index to sheet name in file
            if isinstance(sheet_name, int):
                sheet_name = "sheet{}".format(sheet_name + 1)
            else:
                sheet_name = "sheet{}".format(wb.sheetnames.index(sheet_name) + 1)
            if any(sheet_name.lower() in name for name in z.namelist()):
                sheet_name = sheet_name.lower()
            elif any(sheet_name.title() in name for name in z.namelist()):
                sheet_name = sheet_name.title()
            else:
                raise ValueError("Sheet {} not found".format(sheet_name.lower()))
            # Pass this value to the workers
            kwargs["sheet_name"] = sheet_name

            f = z.open("xl/worksheets/{}.xml".format(sheet_name))
            f = BytesIO(f.read())
            total_bytes = cls.file_size(f)

            # Read some bytes from the sheet so we can extract the XML header and first
            # line. We need to make sure we get the first line of the data as well
            # because that is where the column names are. The header information will
            # be extracted and sent to all of the nodes.
            sheet_block = f.read(EXCEL_READ_BLOCK_SIZE)
            end_of_row_tag = b"</row>"
            while end_of_row_tag not in sheet_block:
                sheet_block += f.read(EXCEL_READ_BLOCK_SIZE)
            idx_of_header_end = sheet_block.index(end_of_row_tag) + len(end_of_row_tag)
            sheet_header_with_first_row = sheet_block[:idx_of_header_end]

            if kwargs["header"] is not None:
                # Reset the file pointer to begin at the end of the header information.
                f.seek(idx_of_header_end)
                sheet_header = sheet_header_with_first_row
            else:
                start_of_row_tag = b"<row"
                idx_of_header_start = sheet_block.index(start_of_row_tag)
                sheet_header = sheet_block[:idx_of_header_start]
                # Reset the file pointer to begin at the end of the header information.
                f.seek(idx_of_header_start)

            kwargs["_header"] = sheet_header
            footer = b"</sheetData></worksheet>"
            # Use openpyxml to parse the data
            common_args = (
                ws,
                BytesIO(sheet_header_with_first_row + footer),
                ex.shared_strings,
                False,
            )
            if cls.need_rich_text_param():
                reader = WorksheetReader(*common_args, rich_text=False)
            else:
                reader = WorksheetReader(*common_args)
            # Attach cells to the worksheet
            reader.bind_cells()
            data = PandasExcelParser.get_sheet_data(
                ws, kwargs.get("convert_float", True)
            )
            # Extract column names from parsed data.
            if kwargs["header"] is None:
                column_names = pandas.RangeIndex(len(data[0]))
            else:
                column_names = pandas.Index(data[0])
            index_col = kwargs.get("index_col", None)
            # Remove column names that are specified as `index_col`
            if index_col is not None:
                column_names = column_names.drop(column_names[index_col])

            if not all(column_names) or kwargs.get("usecols"):
                # some column names are empty, use pandas reader to take the names from it
                pandas_kw["nrows"] = 1
                df = pandas.read_excel(io, **pandas_kw)
                column_names = df.columns

            # Compute partition metadata upfront so it is uniform for all partitions
            chunk_size = max(1, (total_bytes - f.tell()) // NPartitions.get())
            column_widths, num_splits = cls._define_metadata(
                pandas.DataFrame(columns=column_names), column_names
            )
            kwargs["fname"] = io
            # Skiprows will be used to inform a partition how many rows come before it.
            kwargs["skiprows"] = 0
            row_count = 0
            data_ids = []
            index_ids = []
            dtypes_ids = []

            kwargs["num_splits"] = num_splits

            while f.tell() < total_bytes:
                args = kwargs
                args["skiprows"] = row_count + args["skiprows"]
                args["start"] = f.tell()
                chunk = f.read(chunk_size)
                # This edge case can happen when we have reached the end of the data
                # but not the end of the file.
                if b"<row" not in chunk:
                    break
                row_close_tag = b"</row>"
                row_count = re.subn(row_close_tag, b"", chunk)[1]

                # Make sure we are reading at least one row.
                while row_count == 0:
                    chunk += f.read(chunk_size)
                    row_count += re.subn(row_close_tag, b"", chunk)[1]

                last_index = chunk.rindex(row_close_tag)
                f.seek(-(len(chunk) - last_index) + len(row_close_tag), 1)
                args["end"] = f.tell()

                # If there is no data, exit before triggering computation.
                if b"</row>" not in chunk and b"</sheetData>" in chunk:
                    break
                remote_results_list = cls.deploy(
                    func=cls.parse,
                    f_kwargs=args,
                    num_returns=num_splits + 2,
                )
                data_ids.append(remote_results_list[:-2])
                index_ids.append(remote_results_list[-2])
                dtypes_ids.append(remote_results_list[-1])

                # The end of the spreadsheet
                if b"</sheetData>" in chunk:
                    break

        # Compute the index based on a sum of the lengths of each partition (by default)
        # or based on the column(s) that were requested.
        if index_col is None:
            row_lengths = cls.materialize(index_ids)
            new_index = pandas.RangeIndex(sum(row_lengths))
        else:
            index_objs = cls.materialize(index_ids)
            row_lengths = [len(o) for o in index_objs]
            new_index = index_objs[0].append(index_objs[1:])

        data_ids = cls.build_partition(data_ids, row_lengths, column_widths)

        # Compute dtypes by getting collecting and combining all of the partitions. The
        # reported dtypes from differing rows can be different based on the inference in
        # the limited data seen by each worker. We use pandas to compute the exact dtype
        # over the whole column for each column. The index is set below.
        dtypes = cls.get_dtypes(dtypes_ids, column_names)

        new_frame = cls.frame_cls(
            data_ids,
            new_index,
            column_names,
            row_lengths,
            column_widths,
            dtypes=dtypes,
        )
        new_query_compiler = cls.query_compiler_cls(new_frame)
        if index_col is None:
            new_query_compiler._modin_frame.synchronize_labels(axis=0)
        return new_query_compiler


================================================
FILE: modin/core/io/text/fwf_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `FWFDispatcher` class, that is used for reading of tables with fixed-width formatted lines."""

from typing import Optional, Sequence, Tuple, Union

from modin.core.io.text.text_file_dispatcher import TextFileDispatcher


class FWFDispatcher(TextFileDispatcher):
    """Class handles utils for reading of tables with fixed-width formatted lines."""

    @classmethod
    def check_parameters_support(
        cls,
        filepath_or_buffer,
        read_kwargs: dict,
        skiprows_md: Union[Sequence, callable, int],
        header_size: int,
    ) -> Tuple[bool, Optional[str]]:
        """
        Check support of parameters of `read_fwf` function.

        Parameters
        ----------
        filepath_or_buffer : str, path object or file-like object
            `filepath_or_buffer` parameter of `read_fwf` function.
        read_kwargs : dict
            Parameters of `read_fwf` function.
        skiprows_md : int, array or callable
            `skiprows` parameter modified for easier handling by Modin.
        header_size : int
            Number of rows that are used by header.

        Returns
        -------
        bool
            Whether passed parameters are supported or not.
        Optional[str]
            `None` if parameters are supported, otherwise an error
            message describing why parameters are not supported.
        """
        if read_kwargs["infer_nrows"] > 100:
            return (
                False,
                "`infer_nrows` is a significant portion of the number of rows, so Pandas may be faster",
            )
        return super().check_parameters_support(
            filepath_or_buffer, read_kwargs, skiprows_md, header_size
        )


================================================
FILE: modin/core/io/text/json_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `JSONDispatcher` class, that is used for reading `.json` files."""

from io import BytesIO

import numpy as np
import pandas
from pandas.io.common import stringify_path

from modin.config import NPartitions
from modin.core.io.file_dispatcher import OpenFile
from modin.core.io.text.text_file_dispatcher import TextFileDispatcher


class JSONDispatcher(TextFileDispatcher):
    """Class handles utils for reading `.json` files."""

    @classmethod
    def _read(cls, path_or_buf, **kwargs):
        """
        Read data from `path_or_buf` according to the passed `read_json` `kwargs` parameters.

        Parameters
        ----------
        path_or_buf : str, path object or file-like object
            `path_or_buf` parameter of `read_json` function.
        **kwargs : dict
            Parameters of `read_json` function.

        Returns
        -------
        BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        path_or_buf = stringify_path(path_or_buf)
        path_or_buf = cls.get_path_or_buffer(path_or_buf)
        if isinstance(path_or_buf, str):
            if not cls.file_exists(
                path_or_buf, storage_options=kwargs.get("storage_options")
            ):
                return cls.single_worker_read(
                    path_or_buf, reason=cls._file_not_found_msg(path_or_buf), **kwargs
                )
            path_or_buf = cls.get_path(path_or_buf)
        elif not cls.pathlib_or_pypath(path_or_buf):
            return cls.single_worker_read(
                path_or_buf, reason=cls.BUFFER_UNSUPPORTED_MSG, **kwargs
            )
        if not kwargs.get("lines", False):
            return cls.single_worker_read(
                path_or_buf, reason="`lines` argument not supported", **kwargs
            )
        with OpenFile(
            path_or_buf,
            "rb",
            **(kwargs.get("storage_options", None) or {}),
        ) as f:
            columns = pandas.read_json(BytesIO(b"" + f.readline()), lines=True).columns
        kwargs["columns"] = columns
        empty_pd_df = pandas.DataFrame(columns=columns)

        with OpenFile(
            path_or_buf,
            "rb",
            kwargs.get("compression", "infer"),
            **(kwargs.get("storage_options", None) or {}),
        ) as f:
            column_widths, num_splits = cls._define_metadata(empty_pd_df, columns)
            args = {"fname": path_or_buf, "num_splits": num_splits, **kwargs}
            splits, _ = cls.partitioned_file(
                f,
                num_partitions=NPartitions.get(),
            )
            partition_ids = [None] * len(splits)
            index_ids = [None] * len(splits)
            dtypes_ids = [None] * len(splits)
            for idx, (start, end) in enumerate(splits):
                args.update({"start": start, "end": end})
                *partition_ids[idx], index_ids[idx], dtypes_ids[idx], _ = cls.deploy(
                    func=cls.parse,
                    f_kwargs=args,
                    num_returns=num_splits + 3,
                )
        # partition_id[-1] contains the columns for each partition, which will be useful
        # for implementing when `lines=False`.
        row_lengths = cls.materialize(index_ids)
        new_index = pandas.RangeIndex(sum(row_lengths))

        partition_ids = cls.build_partition(partition_ids, row_lengths, column_widths)

        # Compute dtypes by getting collecting and combining all of the partitions. The
        # reported dtypes from differing rows can be different based on the inference in
        # the limited data seen by each worker. We use pandas to compute the exact dtype
        # over the whole column for each column. The index is set below.
        dtypes = cls.get_dtypes(dtypes_ids, columns)

        new_frame = cls.frame_cls(
            np.array(partition_ids),
            new_index,
            columns,
            row_lengths,
            column_widths,
            dtypes=dtypes,
        )
        new_frame.synchronize_labels(axis=0)
        return cls.query_compiler_cls(new_frame)


================================================
FILE: modin/core/io/text/text_file_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module houses `TextFileDispatcher` class.

`TextFileDispatcher` contains utils for text formats files, inherits util functions for
files from `FileDispatcher` class and can be used as base class for dipatchers of SQL queries.
"""
import codecs
import io
import os
import warnings
from csv import QUOTE_NONE
from typing import Callable, Optional, Sequence, Tuple, Union

import numpy as np
import pandas
import pandas._libs.lib as lib
from pandas.core.dtypes.common import is_list_like
from pandas.io.common import stringify_path

from modin.config import MinColumnPartitionSize, NPartitions
from modin.core.io.file_dispatcher import FileDispatcher, OpenFile
from modin.core.io.text.utils import CustomNewlineIterator
from modin.core.storage_formats.pandas.utils import compute_chunksize
from modin.utils import _inherit_docstrings

ColumnNamesTypes = Tuple[Union[pandas.Index, pandas.MultiIndex]]
IndexColType = Union[int, str, bool, Sequence[int], Sequence[str], None]


class TextFileDispatcher(FileDispatcher):
    """Class handles utils for reading text formats files."""

    @classmethod
    def get_path_or_buffer(cls, filepath_or_buffer):
        """
        Extract path from `filepath_or_buffer`.

        Parameters
        ----------
        filepath_or_buffer : str, path object or file-like object
            `filepath_or_buffer` parameter of `read_csv` function.

        Returns
        -------
        str or path object
            verified `filepath_or_buffer` parameter.

        Notes
        -----
        Given a buffer, try and extract the filepath from it so that we can
        use it without having to fall back to pandas and share file objects between
        workers. Given a filepath, return it immediately.
        """
        if (
            hasattr(filepath_or_buffer, "name")
            and hasattr(filepath_or_buffer, "seekable")
            and filepath_or_buffer.seekable()
            and filepath_or_buffer.tell() == 0
        ):
            buffer_filepath = filepath_or_buffer.name
            if cls.file_exists(buffer_filepath):
                warnings.warn(
                    "For performance reasons, the filepath will be "
                    + "used in place of the file handle passed in "
                    + "to load the data"
                )
                return cls.get_path(buffer_filepath)
        return filepath_or_buffer

    @classmethod
    def build_partition(cls, partition_ids, row_lengths, column_widths):
        """
        Build array with partitions of `cls.frame_partition_cls` class.

        Parameters
        ----------
        partition_ids : list
                Array with references to the partitions data.
        row_lengths : list
                Partitions rows lengths.
        column_widths : list
                Number of columns in each partition.

        Returns
        -------
        np.ndarray
            array with shape equals to the shape of `partition_ids` and
            filed with partitions objects.
        """
        return np.array(
            [
                [
                    cls.frame_partition_cls(
                        partition_ids[i][j],
                        length=row_lengths[i],
                        width=column_widths[j],
                    )
                    for j in range(len(partition_ids[i]))
                ]
                for i in range(len(partition_ids))
            ]
        )

    @classmethod
    def pathlib_or_pypath(cls, filepath_or_buffer):
        """
        Check if `filepath_or_buffer` is instance of `py.path.local` or `pathlib.Path`.

        Parameters
        ----------
        filepath_or_buffer : str, path object or file-like object
            `filepath_or_buffer` parameter of `read_csv` function.

        Returns
        -------
        bool
            Whether or not `filepath_or_buffer` is instance of `py.path.local`
            or `pathlib.Path`.
        """
        try:
            import py

            if isinstance(filepath_or_buffer, py.path.local):
                return True
        except ImportError:  # pragma: no cover
            pass
        try:
            import pathlib

            if isinstance(filepath_or_buffer, pathlib.Path):
                return True
        except ImportError:  # pragma: no cover
            pass
        return False

    @classmethod
    def offset(
        cls,
        f,
        offset_size: int,
        quotechar: bytes = b'"',
        is_quoting: bool = True,
        encoding: str = None,
        newline: bytes = None,
    ):
        """
        Move the file offset at the specified amount of bytes.

        Parameters
        ----------
        f : file-like object
            File handle that should be used for offset movement.
        offset_size : int
            Number of bytes to read and ignore.
        quotechar : bytes, default: b'"'
            Indicate quote in a file.
        is_quoting : bool, default: True
            Whether or not to consider quotes.
        encoding : str, optional
            Encoding of `f`.
        newline : bytes, optional
            Byte or sequence of bytes indicating line endings.

        Returns
        -------
        bool
            If file pointer reached the end of the file, but did not find
            closing quote returns `False`. `True` in any other case.
        """
        if is_quoting:
            chunk = f.read(offset_size)
            outside_quotes = not chunk.count(quotechar) % 2
        else:
            f.seek(offset_size, os.SEEK_CUR)
            outside_quotes = True

        # after we read `offset_size` bytes, we most likely break the line but
        # the modin implementation doesn't work correctly in the case, so we must
        # make sure that the line is read completely to the lineterminator,
        # which is what the `_read_rows` does
        outside_quotes, _ = cls._read_rows(
            f,
            nrows=1,
            quotechar=quotechar,
            is_quoting=is_quoting,
            outside_quotes=outside_quotes,
            encoding=encoding,
            newline=newline,
        )

        return outside_quotes

    @classmethod
    def partitioned_file(
        cls,
        f,
        num_partitions: int = None,
        nrows: int = None,
        skiprows: int = None,
        quotechar: bytes = b'"',
        is_quoting: bool = True,
        encoding: str = None,
        newline: bytes = None,
        header_size: int = 0,
        pre_reading: int = 0,
        get_metadata_kw: dict = None,
    ):
        """
        Compute chunk sizes in bytes for every partition.

        Parameters
        ----------
        f : file-like object
            File handle of file to be partitioned.
        num_partitions : int, optional
            For what number of partitions split a file.
            If not specified grabs the value from `modin.config.NPartitions.get()`.
        nrows : int, optional
            Number of rows of file to read.
        skiprows : int, optional
            Specifies rows to skip.
        quotechar : bytes, default: b'"'
            Indicate quote in a file.
        is_quoting : bool, default: True
            Whether or not to consider quotes.
        encoding : str, optional
            Encoding of `f`.
        newline : bytes, optional
            Byte or sequence of bytes indicating line endings.
        header_size : int, default: 0
            Number of rows, that occupied by header.
        pre_reading : int, default: 0
            Number of rows between header and skipped rows, that should be read.
        get_metadata_kw : dict, optional
            Keyword arguments for `cls.read_callback` to compute metadata if needed.
            This option is not compatible with `pre_reading!=0`.

        Returns
        -------
        list
            List with the next elements:
                int : partition start read byte
                int : partition end read byte
        pandas.DataFrame or None
            Dataframe from which metadata can be retrieved. Can be None if `get_metadata_kw=None`.
        """
        if get_metadata_kw is not None and pre_reading != 0:
            raise ValueError(
                f"Incompatible combination of parameters: {get_metadata_kw=}, {pre_reading=}"
            )
        read_rows_counter = 0
        outside_quotes = True

        if num_partitions is None:
            num_partitions = NPartitions.get() - 1 if pre_reading else NPartitions.get()

        rows_skipper = cls.rows_skipper_builder(
            f, quotechar, is_quoting=is_quoting, encoding=encoding, newline=newline
        )
        result = []

        file_size = cls.file_size(f)

        pd_df_metadata = None
        if pre_reading:
            rows_skipper(header_size)
            pre_reading_start = f.tell()
            outside_quotes, read_rows = cls._read_rows(
                f,
                nrows=pre_reading,
                quotechar=quotechar,
                is_quoting=is_quoting,
                outside_quotes=outside_quotes,
                encoding=encoding,
                newline=newline,
            )
            read_rows_counter += read_rows

            result.append((pre_reading_start, f.tell()))

            # add outside_quotes
            if is_quoting and not outside_quotes:
                warnings.warn("File has mismatched quotes")
            rows_skipper(skiprows)
        else:
            rows_skipper(skiprows)
            if get_metadata_kw:
                start = f.tell()
                # For correct behavior, if we want to avoid double skipping rows,
                # we need to get metadata after skipping.
                pd_df_metadata = cls.read_callback(f, **get_metadata_kw)
                f.seek(start)
            rows_skipper(header_size)

        start = f.tell()
        if nrows:
            partition_size = max(1, num_partitions, nrows // num_partitions)
            while f.tell() < file_size and read_rows_counter < nrows:
                if read_rows_counter + partition_size > nrows:
                    # it's possible only if is_quoting==True
                    partition_size = nrows - read_rows_counter
                outside_quotes, read_rows = cls._read_rows(
                    f,
                    nrows=partition_size,
                    quotechar=quotechar,
                    is_quoting=is_quoting,
                    encoding=encoding,
                    newline=newline,
                )
                result.append((start, f.tell()))
                start = f.tell()
                read_rows_counter += read_rows

                # add outside_quotes
                if is_quoting and not outside_quotes:
                    warnings.warn("File has mismatched quotes")
        else:
            partition_size = max(1, num_partitions, file_size // num_partitions)
            while f.tell() < file_size:
                outside_quotes = cls.offset(
                    f,
                    offset_size=partition_size,
                    quotechar=quotechar,
                    is_quoting=is_quoting,
                    encoding=encoding,
                    newline=newline,
                )

                result.append((start, f.tell()))
                start = f.tell()

                # add outside_quotes
                if is_quoting and not outside_quotes:
                    warnings.warn("File has mismatched quotes")
        return result, pd_df_metadata

    @classmethod
    def _read_rows(
        cls,
        f,
        nrows: int,
        quotechar: bytes = b'"',
        is_quoting: bool = True,
        outside_quotes: bool = True,
        encoding: str = None,
        newline: bytes = None,
    ):
        """
        Move the file offset at the specified amount of rows.

        Parameters
        ----------
        f : file-like object
            File handle that should be used for offset movement.
        nrows : int
            Number of rows to read.
        quotechar : bytes, default: b'"'
            Indicate quote in a file.
        is_quoting : bool, default: True
            Whether or not to consider quotes.
        outside_quotes : bool, default: True
            Whether the file pointer is within quotes or not at the time this function is called.
        encoding : str, optional
            Encoding of `f`.
        newline : bytes, optional
            Byte or sequence of bytes indicating line endings.

        Returns
        -------
        bool
            If file pointer reached the end of the file, but did not find closing quote
            returns `False`. `True` in any other case.
        int
            Number of rows that were read.
        """
        if nrows is not None and nrows <= 0:
            return True, 0

        rows_read = 0

        if encoding and (
            "utf" in encoding
            and "8" not in encoding
            or encoding == "unicode_escape"
            or encoding.replace("-", "_") == "utf_8_sig"
        ):
            iterator = CustomNewlineIterator(f, newline)
        else:
            iterator = f

        for line in iterator:
            if is_quoting and line.count(quotechar) % 2:
                outside_quotes = not outside_quotes
            if outside_quotes:
                rows_read += 1
                if rows_read >= nrows:
                    break

        if isinstance(iterator, CustomNewlineIterator):
            iterator.seek()

        # case when EOF
        if not outside_quotes:
            rows_read += 1

        return outside_quotes, rows_read

    @classmethod
    def compute_newline(cls, file_like, encoding, quotechar):
        """
        Compute byte or sequence of bytes indicating line endings.

        Parameters
        ----------
        file_like : file-like object
            File handle that should be used for line endings computing.
        encoding : str
            Encoding of `file_like`.
        quotechar : str
            Quotechar used for parsing `file-like`.

        Returns
        -------
        bytes
            line endings
        """
        newline = None

        if encoding is None:
            return newline, quotechar.encode("UTF-8")

        quotechar = quotechar.encode(encoding)
        encoding = encoding.replace("-", "_")

        if (
            "utf" in encoding
            and "8" not in encoding
            or encoding == "unicode_escape"
            or encoding == "utf_8_sig"
        ):
            # trigger for computing f.newlines
            file_like.readline()
            # in bytes
            newline = file_like.newlines.encode(encoding)
            boms = ()
            if encoding == "utf_8_sig":
                boms = (codecs.BOM_UTF8,)
            elif "16" in encoding:
                boms = (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)
            elif "32" in encoding:
                boms = (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)

            for bom in boms:
                if newline.startswith(bom):
                    bom_len = len(bom)
                    newline = newline[bom_len:]
                    quotechar = quotechar[bom_len:]
                    break

        return newline, quotechar

    # _read helper functions
    @classmethod
    def rows_skipper_builder(
        cls, f, quotechar, is_quoting, encoding=None, newline=None
    ):
        """
        Build object for skipping passed number of lines.

        Parameters
        ----------
        f : file-like object
            File handle that should be used for offset movement.
        quotechar : bytes
            Indicate quote in a file.
        is_quoting : bool
            Whether or not to consider quotes.
        encoding : str, optional
            Encoding of `f`.
        newline : bytes, optional
            Byte or sequence of bytes indicating line endings.

        Returns
        -------
        object
            skipper object.
        """

        def skipper(n):
            if n == 0 or n is None:
                return 0
            else:
                return cls._read_rows(
                    f,
                    quotechar=quotechar,
                    is_quoting=is_quoting,
                    nrows=n,
                    encoding=encoding,
                    newline=newline,
                )[1]

        return skipper

    @classmethod
    def _define_header_size(
        cls,
        header: Union[int, Sequence[int], str, None] = "infer",
        names: Optional[Sequence] = lib.no_default,
    ) -> int:
        """
        Define the number of rows that are used by header.

        Parameters
        ----------
        header : int, list of int or str, default: "infer"
            Original `header` parameter of `read_csv` function.
        names :  array-like, optional
            Original names parameter of `read_csv` function.

        Returns
        -------
        header_size : int
            The number of rows that are used by header.
        """
        header_size = 0
        if header == "infer" and names in [lib.no_default, None]:
            header_size += 1
        elif isinstance(header, int):
            header_size += header + 1
        elif hasattr(header, "__iter__") and not isinstance(header, str):
            header_size += max(header) + 1

        return header_size

    @classmethod
    def _define_metadata(
        cls,
        df: pandas.DataFrame,
        column_names: ColumnNamesTypes,
    ) -> Tuple[list, int]:
        """
        Define partitioning metadata.

        Parameters
        ----------
        df : pandas.DataFrame
            The DataFrame to split.
        column_names : ColumnNamesTypes
            Column names of df.

        Returns
        -------
        column_widths : list
            Column width to use during new frame creation (number of
            columns for each partition).
        num_splits : int
            The maximum number of splits to separate the DataFrame into.
        """
        # This is the number of splits for the columns
        num_splits = min(len(column_names) or 1, NPartitions.get())
        min_block_size = MinColumnPartitionSize.get()
        column_chunksize = compute_chunksize(df.shape[1], num_splits, min_block_size)
        if column_chunksize > len(column_names):
            column_widths = [len(column_names)]
            # This prevents us from unnecessarily serializing a bunch of empty
            # objects.
            num_splits = 1
        else:
            # split columns into chunks with maximal size column_chunksize, for example
            # if num_splits == 4, len(column_names) == 80 and column_chunksize == 32,
            # column_widths will be [32, 32, 16, 0]
            column_widths = [
                (
                    column_chunksize
                    if len(column_names) > (column_chunksize * (i + 1))
                    else (
                        0
                        if len(column_names) < (column_chunksize * i)
                        else len(column_names) - (column_chunksize * i)
                    )
                )
                for i in range(num_splits)
            ]

        return column_widths, num_splits

    _parse_func = None

    @classmethod
    def preprocess_func(cls):  # noqa: RT01
        """Prepare a function for transmission to remote workers."""
        if cls._parse_func is None:
            cls._parse_func = cls.put(cls.parse)
        return cls._parse_func

    @classmethod
    def _launch_tasks(
        cls, splits: list, *partition_args, **partition_kwargs
    ) -> Tuple[list, list, list]:
        """
        Launch tasks to read partitions.

        Parameters
        ----------
        splits : list
            List of tuples with partitions data, which defines
            parser task (start/end read bytes and etc.).
        *partition_args : tuple
            Positional arguments to be passed to the parser function.
        **partition_kwargs : dict
            `kwargs` that should be passed to the parser function.

        Returns
        -------
        partition_ids : list
            array with references to the partitions data.
        index_ids : list
            array with references to the partitions index objects.
        dtypes_ids : list
            array with references to the partitions dtypes objects.
        """
        partition_ids = [None] * len(splits)
        index_ids = [None] * len(splits)
        dtypes_ids = [None] * len(splits)
        # this is done mostly for performance; see PR#5678 for details
        func = cls.preprocess_func()
        for idx, (start, end) in enumerate(splits):
            partition_kwargs.update({"start": start, "end": end})
            *partition_ids[idx], index_ids[idx], dtypes_ids[idx] = cls.deploy(
                func=func,
                f_args=partition_args,
                f_kwargs=partition_kwargs,
                num_returns=partition_kwargs.get("num_splits") + 2,
            )
        return partition_ids, index_ids, dtypes_ids

    @classmethod
    def check_parameters_support(
        cls,
        filepath_or_buffer,
        read_kwargs: dict,
        skiprows_md: Union[Sequence, callable, int],
        header_size: int,
    ) -> Tuple[bool, Optional[str]]:
        """
        Check support of only general parameters of `read_*` function.

        Parameters
        ----------
        filepath_or_buffer : str, path object or file-like object
            `filepath_or_buffer` parameter of `read_*` function.
        read_kwargs : dict
            Parameters of `read_*` function.
        skiprows_md : int, array or callable
            `skiprows` parameter modified for easier handling by Modin.
        header_size : int
            Number of rows that are used by header.

        Returns
        -------
        bool
            Whether passed parameters are supported or not.
        Optional[str]
            `None` if parameters are supported, otherwise an error
            message describing why parameters are not supported.
        """
        skiprows = read_kwargs.get("skiprows")
        if isinstance(filepath_or_buffer, str):
            if not cls.file_exists(
                filepath_or_buffer, read_kwargs.get("storage_options")
            ):
                return (False, cls._file_not_found_msg(filepath_or_buffer))
        elif not cls.pathlib_or_pypath(filepath_or_buffer):
            return (False, cls.BUFFER_UNSUPPORTED_MSG)

        if read_kwargs["chunksize"] is not None:
            return (False, "`chunksize` parameter is not supported")

        if read_kwargs.get("iterator"):
            return (False, "`iterator==True` parameter is not supported")

        if read_kwargs.get("dialect") is not None:
            return (False, "`dialect` parameter is not supported")

        if read_kwargs["lineterminator"] is not None:
            return (False, "`lineterminator` parameter is not supported")

        if read_kwargs["escapechar"] is not None:
            return (False, "`escapechar` parameter is not supported")

        if read_kwargs.get("skipfooter"):
            if read_kwargs.get("nrows") or read_kwargs.get("engine") == "c":
                return (False, "Exception is raised by pandas itself")

        skiprows_supported = True
        if is_list_like(skiprows_md) and skiprows_md[0] < header_size:
            skiprows_supported = False
        elif callable(skiprows):
            # check if `skiprows` callable gives True for any of header indices
            is_intersection = any(
                cls._get_skip_mask(pandas.RangeIndex(header_size), skiprows)
            )
            if is_intersection:
                skiprows_supported = False

        if not skiprows_supported:
            return (
                False,
                "Values of `header` and `skiprows` parameters have intersections; "
                + "this case is unsupported by Modin",
            )

        return (True, None)

    @classmethod
    @_inherit_docstrings(pandas.io.parsers.base_parser.ParserBase._validate_usecols_arg)
    def _validate_usecols_arg(cls, usecols):
        msg = (
            "'usecols' must either be list-like of all strings, all unicode, "
            + "all integers or a callable."
        )
        if usecols is not None:
            if callable(usecols):
                return usecols, None

            if not is_list_like(usecols):
                raise ValueError(msg)

            usecols_dtype = lib.infer_dtype(usecols, skipna=False)

            if usecols_dtype not in ("empty", "integer", "string"):
                raise ValueError(msg)

            usecols = set(usecols)

            return usecols, usecols_dtype
        return usecols, None

    @classmethod
    def _manage_skiprows_parameter(
        cls,
        skiprows: Union[int, Sequence[int], Callable, None] = None,
        header_size: int = 0,
    ) -> Tuple[Union[int, Sequence, Callable], bool, int]:
        """
        Manage `skiprows` parameter of read_csv and read_fwf functions.

        Change `skiprows` parameter in the way Modin could more optimally
        process it. `csv_dispatcher` and `fwf_dispatcher` have two mechanisms of rows skipping:

        1) During file partitioning (setting of file limits that should be read
        by each partition) exact rows can be excluded from partitioning scope,
        thus they won't be read at all and can be considered as skipped. This is
        the most effective way of rows skipping (since it doesn't require any
        actual data reading and postprocessing), but in this case `skiprows`
        parameter can be an integer only. When it possible Modin always uses
        this approach by setting of `skiprows_partitioning` return value.

        2) Rows for skipping can be dropped after full dataset import. This is
        more expensive way since it requires extra IO work and postprocessing
        afterwards, but `skiprows` parameter can be of any non-integer type
        supported by any pandas read function. These rows is
        specified by setting of `skiprows_md` return value.

        In some cases, if `skiprows` is uniformly distributed array (e.g. [1,2,3]),
        `skiprows` can be "squashed" and represented as integer to make a fastpath.
        If there is a gap between the first row for skipping and the last line of
        the header (that will be skipped too), then assign to read this gap first
        (assign the first partition to read these rows be setting of `pre_reading`
        return value). See `Examples` section for details.

        Parameters
        ----------
        skiprows : int, array or callable, optional
            Original `skiprows` parameter of any pandas read function.
        header_size : int, default: 0
            Number of rows that are used by header.

        Returns
        -------
        skiprows_md : int, array or callable
            Updated skiprows parameter. If `skiprows` is an array, this
            array will be sorted. Also parameter will be aligned to
            actual data in the `query_compiler` (which, for example,
            doesn't contain header rows)
        pre_reading : int
            The number of rows that should be read before data file
            splitting for further reading (the number of rows for
            the first partition).
        skiprows_partitioning : int
            The number of rows that should be skipped virtually (skipped during
            data file partitioning).

        Examples
        --------
        Let's consider case when `header`="infer" and `skiprows`=[3,4,5]. In
        this specific case fastpath can be done since `skiprows` is uniformly
        distributed array, so we can "squash" it to integer and set
        `skiprows_partitioning`=3. But if no additional action will be done,
        these three rows will be skipped right after header line, that corresponds
        to `skiprows`=[1,2,3]. Now, to avoid this discrepancy, we need to assign
        the first partition to read data between header line and the first
        row for skipping by setting of `pre_reading` parameter, so setting
        `pre_reading`=2. During data file partitiong, these lines will be assigned
        for reading for the first partition, and then file position will be set at
        the beginning of rows that should be skipped by `skiprows_partitioning`.
        After skipping of these rows, the rest data will be divided between the
        rest of partitions, see rows assignement below:

        0 - header line (skip during partitioning)
        1 - pre_reading (assign to read by the first partition)
        2 - pre_reading (assign to read by the first partition)
        3 - skiprows_partitioning (skip during partitioning)
        4 - skiprows_partitioning (skip during partitioning)
        5 - skiprows_partitioning (skip during partitioning)
        6 - data to partition (divide between the rest of partitions)
        7 - data to partition (divide between the rest of partitions)
        """
        pre_reading = skiprows_partitioning = skiprows_md = 0
        if isinstance(skiprows, int):
            skiprows_partitioning = skiprows
        elif is_list_like(skiprows) and len(skiprows) > 0:
            skiprows_md = np.sort(skiprows)
            if np.all(np.diff(skiprows_md) == 1):
                # `skiprows` is uniformly distributed array.
                pre_reading = (
                    skiprows_md[0] - header_size if skiprows_md[0] > header_size else 0
                )
                skiprows_partitioning = len(skiprows_md)
                skiprows_md = 0
            elif skiprows_md[0] > header_size:
                skiprows_md = skiprows_md - header_size
        elif callable(skiprows):

            def skiprows_func(x):
                return skiprows(x + header_size)

            skiprows_md = skiprows_func

        return skiprows_md, pre_reading, skiprows_partitioning

    @classmethod
    def _define_index(
        cls,
        index_ids: list,
        index_name: str,
    ) -> Tuple[IndexColType, list]:
        """
        Compute the resulting DataFrame index and index lengths for each of partitions.

        Parameters
        ----------
        index_ids : list
            Array with references to the partitions index objects.
        index_name : str
            Name that should be assigned to the index if `index_col`
            is not provided.

        Returns
        -------
        new_index : IndexColType
            Index that should be passed to the new_frame constructor.
        row_lengths : list
            Partitions rows lengths.
        """
        index_objs = cls.materialize(index_ids)
        if len(index_objs) == 0 or isinstance(index_objs[0], int):
            row_lengths = index_objs
            new_index = pandas.RangeIndex(sum(index_objs))
        else:
            row_lengths = [len(o) for o in index_objs]
            new_index = index_objs[0].append(index_objs[1:])
            new_index.name = index_name

        return new_index, row_lengths

    @classmethod
    def _get_new_qc(
        cls,
        partition_ids: list,
        index_ids: list,
        dtypes_ids: list,
        index_col: IndexColType,
        index_name: str,
        column_widths: list,
        column_names: ColumnNamesTypes,
        skiprows_md: Union[Sequence, callable, None] = None,
        header_size: int = None,
        **kwargs,
    ):
        """
        Get new query compiler from data received from workers.

        Parameters
        ----------
        partition_ids : list
            Array with references to the partitions data.
        index_ids : list
            Array with references to the partitions index objects.
        dtypes_ids : list
            Array with references to the partitions dtypes objects.
        index_col : IndexColType
            `index_col` parameter of `read_csv` function.
        index_name : str
            Name that should be assigned to the index if `index_col`
            is not provided.
        column_widths : list
            Number of columns in each partition.
        column_names : ColumnNamesTypes
            Array with columns names.
        skiprows_md : array-like or callable, optional
            Specifies rows to skip.
        header_size : int, default: 0
            Number of rows, that occupied by header.
        **kwargs : dict
            Parameters of `read_csv` function needed for postprocessing.

        Returns
        -------
        new_query_compiler : BaseQueryCompiler
            New query compiler, created from `new_frame`.
        """
        partition_ids = cls.build_partition(
            partition_ids, [None] * len(index_ids), column_widths
        )

        new_frame = cls.frame_cls(
            partition_ids,
            lambda: cls._define_index(index_ids, index_name),
            column_names,
            None,
            column_widths,
            dtypes=lambda: cls.get_dtypes(dtypes_ids, column_names),
        )
        new_query_compiler = cls.query_compiler_cls(new_frame)
        skipfooter = kwargs.get("skipfooter", None)
        if skipfooter:
            new_query_compiler = new_query_compiler.drop(
                new_query_compiler.index[-skipfooter:]
            )
        if skiprows_md is not None:
            # skip rows that passed as array or callable
            nrows = kwargs.get("nrows", None)
            index_range = pandas.RangeIndex(len(new_query_compiler.index))
            if is_list_like(skiprows_md):
                new_query_compiler = new_query_compiler.take_2d_positional(
                    index=index_range.delete(skiprows_md)
                )
            elif callable(skiprows_md):
                skip_mask = cls._get_skip_mask(index_range, skiprows_md)
                if not isinstance(skip_mask, np.ndarray):
                    skip_mask = skip_mask.to_numpy("bool")
                view_idx = index_range[~skip_mask]
                new_query_compiler = new_query_compiler.take_2d_positional(
                    index=view_idx
                )
            else:
                raise TypeError(
                    f"Not acceptable type of `skiprows` parameter: {type(skiprows_md)}"
                )

            if not isinstance(new_query_compiler.index, pandas.MultiIndex):
                new_query_compiler = new_query_compiler.reset_index(drop=True)

            if nrows:
                new_query_compiler = new_query_compiler.take_2d_positional(
                    pandas.RangeIndex(len(new_query_compiler.index))[:nrows]
                )
        if index_col is None or index_col is False:
            new_query_compiler._modin_frame.synchronize_labels(axis=0)

        return new_query_compiler

    @classmethod
    def _read(cls, filepath_or_buffer, **kwargs):
        """
        Read data from `filepath_or_buffer` according to `kwargs` parameters.

        Used in `read_csv` and `read_fwf` Modin implementations.

        Parameters
        ----------
        filepath_or_buffer : str, path object or file-like object
            `filepath_or_buffer` parameter of read functions.
        **kwargs : dict
            Parameters of read functions.

        Returns
        -------
        new_query_compiler : BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        filepath_or_buffer = stringify_path(filepath_or_buffer)
        filepath_or_buffer_md = (
            cls.get_path(filepath_or_buffer)
            if isinstance(filepath_or_buffer, str)
            else cls.get_path_or_buffer(filepath_or_buffer)
        )
        compression_infered = cls.infer_compression(
            filepath_or_buffer, kwargs["compression"]
        )
        # Getting frequently used kwargs;
        # They should be defined in higher level
        names = kwargs["names"]
        index_col = kwargs["index_col"]
        encoding = kwargs["encoding"]
        skiprows = kwargs["skiprows"]
        header = kwargs["header"]
        # Define header size for further skipping (Header can be skipped because header
        # information will be obtained further from empty_df, so no need to handle it
        # by workers)
        header_size = cls._define_header_size(
            header,
            names,
        )
        (
            skiprows_md,
            pre_reading,
            skiprows_partitioning,
        ) = cls._manage_skiprows_parameter(skiprows, header_size)
        should_handle_skiprows = skiprows_md is not None and not isinstance(
            skiprows_md, int
        )

        (use_modin_impl, fallback_reason) = cls.check_parameters_support(
            filepath_or_buffer_md,
            kwargs,
            skiprows_md,
            header_size,
        )
        if not use_modin_impl:
            return cls.single_worker_read(
                filepath_or_buffer,
                kwargs,
                reason=fallback_reason,
            )

        is_quoting = kwargs["quoting"] != QUOTE_NONE
        usecols = kwargs["usecols"]
        use_inferred_column_names = cls._uses_inferred_column_names(
            names, skiprows, kwargs["skipfooter"], usecols
        )

        # Computing metadata simultaneously with skipping rows allows us to not
        # do extra work and improve performance for certain cases, as otherwise,
        # it would require double re-reading of skipped rows in order to retrieve metadata.
        can_compute_metadata_while_skipping_rows = (
            # basic supported case: isinstance(skiprows, int) without any additional params
            isinstance(skiprows, int)
            and (usecols is None or skiprows is None)
            and pre_reading == 0
        )
        get_metadata_kw = dict(kwargs, nrows=1, skipfooter=0, index_col=index_col)
        if get_metadata_kw.get("engine", None) == "pyarrow":
            # pyarrow engine doesn't support `nrows` option;
            # https://github.com/pandas-dev/pandas/issues/38872 can be used to track pyarrow engine features
            get_metadata_kw["engine"] = "c"
        if not can_compute_metadata_while_skipping_rows:
            pd_df_metadata = cls.read_callback(
                filepath_or_buffer_md,
                **get_metadata_kw,
            )
            column_names = pd_df_metadata.columns
            column_widths, num_splits = cls._define_metadata(
                pd_df_metadata, column_names
            )
            get_metadata_kw = None
        else:
            get_metadata_kw = dict(get_metadata_kw, skiprows=None)
            # `memory_map` doesn't work with file-like object so we can't use it here.
            # We can definitely skip it without violating the reading logic
            # since this parameter is intended to optimize reading.
            # For reading a couple of lines, this is not essential.
            get_metadata_kw.pop("memory_map", None)
            # These parameters are already used when opening file `f`,
            # they do not need to be used again.
            get_metadata_kw.pop("storage_options", None)
            get_metadata_kw.pop("compression", None)

        with OpenFile(
            filepath_or_buffer_md,
            "rb",
            compression_infered,
            **(kwargs.get("storage_options", None) or {}),
        ) as f:
            old_pos = f.tell()
            fio = io.TextIOWrapper(f, encoding=encoding, newline="")
            newline, quotechar = cls.compute_newline(
                fio, encoding, kwargs.get("quotechar", '"')
            )
            f.seek(old_pos)

            splits, pd_df_metadata_temp = cls.partitioned_file(
                f,
                num_partitions=NPartitions.get(),
                nrows=kwargs["nrows"] if not should_handle_skiprows else None,
                skiprows=skiprows_partitioning,
                quotechar=quotechar,
                is_quoting=is_quoting,
                encoding=encoding,
                newline=newline,
                header_size=header_size,
                pre_reading=pre_reading,
                get_metadata_kw=get_metadata_kw,
            )
            if can_compute_metadata_while_skipping_rows:
                pd_df_metadata = pd_df_metadata_temp

        # compute dtypes if possible
        common_dtypes = None
        if kwargs["dtype"] is None:
            most_common_dtype = (object,)
            common_dtypes = {}
            for col, dtype in pd_df_metadata.dtypes.to_dict().items():
                if dtype in most_common_dtype:
                    common_dtypes[col] = dtype
        column_names = pd_df_metadata.columns
        column_widths, num_splits = cls._define_metadata(pd_df_metadata, column_names)
        # kwargs that will be passed to the workers
        partition_kwargs = dict(
            kwargs,
            header_size=0 if use_inferred_column_names else header_size,
            names=column_names if use_inferred_column_names else names,
            header="infer" if use_inferred_column_names else header,
            skipfooter=0,
            skiprows=None,
            nrows=None,
            compression=compression_infered,
            common_dtypes=common_dtypes,
        )
        # this is done mostly for performance; see PR#5678 for details
        filepath_or_buffer_md_ref = cls.put(filepath_or_buffer_md)
        kwargs_ref = cls.put(partition_kwargs)
        partition_ids, index_ids, dtypes_ids = cls._launch_tasks(
            splits,
            filepath_or_buffer_md_ref,
            kwargs_ref,
            num_splits=num_splits,
        )

        new_query_compiler = cls._get_new_qc(
            partition_ids=partition_ids,
            index_ids=index_ids,
            dtypes_ids=dtypes_ids,
            index_col=index_col,
            index_name=pd_df_metadata.index.name,
            column_widths=column_widths,
            column_names=column_names,
            skiprows_md=skiprows_md if should_handle_skiprows else None,
            header_size=header_size,
            skipfooter=kwargs["skipfooter"],
            parse_dates=kwargs["parse_dates"],
            nrows=kwargs["nrows"] if should_handle_skiprows else None,
        )
        return new_query_compiler

    @classmethod
    def _get_skip_mask(cls, rows_index: pandas.Index, skiprows: Callable):
        """
        Get mask of skipped by callable `skiprows` rows.

        Parameters
        ----------
        rows_index : pandas.Index
            Rows index to get mask for.
        skiprows : Callable
            Callable to check whether row index should be skipped.

        Returns
        -------
        pandas.Index
        """
        try:
            # direct `skiprows` call is more efficient than using of
            # map method, but in some cases it can work incorrectly, e.g.
            # when `skiprows` contains `in` operator
            mask = skiprows(rows_index)
            assert is_list_like(mask)
        except (ValueError, TypeError, AssertionError):
            # ValueError can be raised if `skiprows` callable contains membership operator
            # TypeError is raised if `skiprows` callable contains bitwise operator
            # AssertionError is raised if unexpected behavior was detected
            mask = rows_index.map(skiprows)

        return mask

    @staticmethod
    def _uses_inferred_column_names(names, skiprows, skipfooter, usecols):
        """
        Tell whether need to use inferred column names in workers or not.

        1) ``False`` is returned in 2 cases and means next:
            1.a) `names` parameter was provided from the API layer. In this case parameter
            `names` must be provided as `names` parameter for ``read_csv`` in the workers.
            1.b) `names` parameter wasn't provided from the API layer. In this case column names
            inference must happen in each partition.
        2) ``True`` is returned in case when inferred column names from pre-reading stage must be
            provided as `names` parameter for ``read_csv`` in the workers.

        In case `names` was provided, the other parameters aren't checked. Otherwise, inferred column
        names should be used in a case of not full data reading which is defined by `skipfooter` parameter,
        when need to skip lines at the bottom of file or by `skiprows` parameter, when need to skip lines at
        the top of file (but if `usecols` was provided, column names inference must happen in the workers).

        Parameters
        ----------
        names : array-like
            List of column names to use.
        skiprows : list-like, int or callable
            Line numbers to skip (0-indexed) or number of lines to skip (int) at
            the start of the file. If callable, the callable function will be
            evaluated against the row indices, returning ``True`` if the row should
            be skipped and ``False`` otherwise.
        skipfooter : int
            Number of lines at bottom of the file to skip.
        usecols : list-like or callable
            Subset of the columns.

        Returns
        -------
        bool
            Whether to use inferred column names in ``read_csv`` of the workers or not.
        """
        if names not in [None, lib.no_default]:
            return False
        if skipfooter != 0:
            return True
        if isinstance(skiprows, int) and skiprows == 0:
            return False
        if is_list_like(skiprows):
            return usecols is None
        return skiprows is not None


================================================
FILE: modin/core/io/text/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Contains utility functions for dispatchers."""

import io


class CustomNewlineIterator:
    r"""
    Used to iterate through files in binary mode line by line where newline != b'\n'.

    Parameters
    ----------
    _file : file-like object
        File-like object to iterate over.
    newline : bytes
        Byte or sequence of bytes indicating line endings.
    """

    def __init__(self, _file, newline):
        self.file = _file
        self.newline = newline
        self.bytes_read = self.chunk_size = 0

    def __iter__(self):
        """
        Iterate over lines.

        Yields
        ------
        bytes
            Data from file.
        """
        buffer_size = io.DEFAULT_BUFFER_SIZE
        chunk = self.file.read(buffer_size)
        self.chunk_size = 0
        while chunk:
            self.bytes_read = 0
            self.chunk_size = len(chunk)
            # split remove newline bytes from line
            lines = chunk.split(self.newline)
            for line in lines[:-1]:
                self.bytes_read += len(line) + len(self.newline)
                yield line
            chunk = self.file.read(buffer_size)
            if lines[-1]:
                # last line can be read without newline bytes
                chunk = lines[-1] + chunk

    def seek(self):
        """Change the stream positition to where the last returned line ends."""
        self.file.seek(self.bytes_read - self.chunk_size, 1)


================================================
FILE: modin/core/storage_formats/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Modin's functionality related to storage formats supported."""

from .base import BaseQueryCompiler
from .pandas import PandasQueryCompiler

__all__ = ["BaseQueryCompiler", "PandasQueryCompiler"]


================================================
FILE: modin/core/storage_formats/base/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module represents the base query compiler that defines the common query compiler API."""

from .query_compiler import BaseQueryCompiler

__all__ = ["BaseQueryCompiler"]


================================================
FILE: modin/core/storage_formats/base/doc_utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module contains decorators for documentation of the query compiler methods."""

from functools import partial

from modin.utils import align_indents, append_to_docstring, format_string

_one_column_warning = """
.. warning::
    This method is supported only by one-column query compilers.
"""

_deprecation_warning = """
.. warning::
    This method duplicates logic of ``{0}`` and will be removed soon.
"""

_refer_to_note = """
Notes
-----
Please refer to ``modin.pandas.{0}`` for more information
about parameters and output format.
"""

add_one_column_warning = append_to_docstring(_one_column_warning)


def add_deprecation_warning(replacement_method):
    """
    Build decorator which appends deprecation warning to the function's docstring.

    Appended warning indicates that the current method duplicates functionality of
    some other method and so is slated to be removed in the future.

    Parameters
    ----------
    replacement_method : str
        Name of the method to use instead of deprecated.

    Returns
    -------
    callable
    """
    message = _deprecation_warning.format(replacement_method)
    return append_to_docstring(message)


def add_refer_to(method):
    """
    Build decorator which appends link to the high-level equivalent method to the function's docstring.

    Parameters
    ----------
    method : str
        Method name in ``modin.pandas`` module to refer to.

    Returns
    -------
    callable
    """
    # FIXME: this would break numpydoc if there already is a `Notes` section
    note = _refer_to_note.format(method)
    return append_to_docstring(note)


def doc_qc_method(
    template,
    params=None,
    refer_to=None,
    refer_to_module_name=None,
    one_column_method=False,
    **kwargs,
):
    """
    Build decorator which adds docstring for query compiler method.

    Parameters
    ----------
    template : str
        Method docstring in the NumPy docstyle format. Must contain {params}
        placeholder.
    params : str, optional
        Method parameters in the NumPy docstyle format to substitute
        in the `template`. `params` string should not include the "Parameters"
        header.
    refer_to : str, optional
        Method name in `refer_to_module_name` module to refer to for more information
        about parameters and output format.
    refer_to_module_name : str, optional
    one_column_method : bool, default: False
        Whether to append note that this method is for one-column
        query compilers only.
    **kwargs : dict
        Values to substitute in the `template`.

    Returns
    -------
    callable
    """
    params_template = """

        Parameters
        ----------
        {params}
        """

    params = format_string(params_template, params=params) if params else ""
    substituted = format_string(template, params=params, refer_to=refer_to, **kwargs)
    if refer_to_module_name:
        refer_to = f"{refer_to_module_name}.{refer_to}"

    def decorator(func):
        func.__doc__ = substituted
        appendix = ""
        if refer_to:
            appendix += _refer_to_note.format(refer_to)
        if one_column_method:
            appendix += _one_column_warning
        if appendix:
            func = append_to_docstring(appendix)(func)
        return func

    return decorator


def doc_binary_method(operation, sign, self_on_right=False, op_type="arithmetic"):
    """
    Build decorator which adds docstring for binary method.

    Parameters
    ----------
    operation : str
        Name of the binary operation.
    sign : str
        Sign which represents specified binary operation.
    self_on_right : bool, default: False
        Whether `self` is the right operand.
    op_type : {"arithmetic", "logical", "comparison"}, default: "arithmetic"
        Type of the binary operation.

    Returns
    -------
    callable
    """
    template = """
    Perform element-wise {operation} (``{verbose}``).

    If axes are not equal, perform frames alignment first.

    Parameters
    ----------
    other : BaseQueryCompiler, scalar or array-like
        Other operand of the binary operation.
    broadcast : bool, default: False
        If `other` is a one-column query compiler, indicates whether it is a Series or not.
        Frames and Series have to be processed differently, however we can't distinguish them
        at the query compiler level, so this parameter is a hint that is passed from a high-level API.
    {extra_params}**kwargs : dict
        Serves the compatibility purpose. Does not affect the result.

    Returns
    -------
    BaseQueryCompiler
        Result of binary operation.
    """

    extra_params = {
        "logical": """
        level : int or label
            In case of MultiIndex match index values on the passed level.
        axis : {{0, 1}}
            Axis to match indices along for 1D `other` (list or QueryCompiler that represents Series).
            0 is for index, when 1 is for columns.
        """,
        "arithmetic": """
        level : int or label
            In case of MultiIndex match index values on the passed level.
        axis : {{0, 1}}
            Axis to match indices along for 1D `other` (list or QueryCompiler that represents Series).
            0 is for index, when 1 is for columns.
        fill_value : float or None
            Value to fill missing elements during frame alignment.
        """,
        "series_comparison": """
        level : int or label
            In case of MultiIndex match index values on the passed level.
        fill_value : float or None
            Value to fill missing elements during frame alignment.
        axis : {{0, 1}}
            Unused. Parameter needed for compatibility with DataFrame.
        """,
    }

    verbose_substitution = (
        f"other {sign} self" if self_on_right else f"self {sign} other"
    )
    params_substitution = extra_params.get(op_type, "")
    return doc_qc_method(
        template,
        extra_params=params_substitution,
        operation=operation,
        verbose=verbose_substitution,
    )


def doc_reduce_agg(method, refer_to, params=None, extra_params=None):
    """
    Build decorator which adds docstring for the reduce method.

    Parameters
    ----------
    method : str
        The result of the method.
    refer_to : str
        Method name in ``modin.pandas.DataFrame`` module to refer to for
        more information about parameters and output format.
    params : str, optional
        Method parameters in the NumPy docstyle format to substitute
        to the docstring template.
    extra_params : sequence of str, optional
        Method parameter names to append to the docstring template. Parameter
        type and description will be grabbed from ``extra_params_map`` (Please
        refer to the source code of this function to explore the map).

    Returns
    -------
    callable
    """
    template = """
        Get the {method} for each column or row.
        {params}
        Returns
        -------
        BaseQueryCompiler
            One-column QueryCompiler with index labels of the specified axis,
            where each row contains the {method} for the corresponding
            row or column.
        """

    if params is None:
        params = """
        axis : {{0, 1}}
        numeric_only : bool, optional"""

    extra_params_map = {
        "skipna": """
        skipna : bool, default: True""",
        "min_count": """
        min_count : int""",
        "ddof": """
        ddof : int""",
        "*args": """
        *args : iterable
            Serves the compatibility purpose. Does not affect the result.""",
        "**kwargs": """
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.""",
    }

    params += "".join(
        [
            align_indents(
                source=params, target=extra_params_map.get(param, f"\n{param} : object")
            )
            for param in (extra_params or [])
        ]
    )
    return doc_qc_method(
        template,
        params=params,
        method=method,
        refer_to=f"DataFrame.{refer_to}",
    )


doc_cum_agg = partial(
    doc_qc_method,
    template="""
    Get cumulative {method} for every row or column.

    Parameters
    ----------
    fold_axis : {{0, 1}}
    skipna : bool
    **kwargs : dict
        Serves the compatibility purpose. Does not affect the result.

    Returns
    -------
    BaseQueryCompiler
        QueryCompiler of the same shape as `self`, where each element is the {method}
        of all the previous values in this row or column.
    """,
    refer_to_module_name="DataFrame",
)

doc_resample = partial(
    doc_qc_method,
    template="""
    Resample time-series data and apply aggregation on it.

    Group data into intervals by time-series row/column with
    a specified frequency and {action}.

    Parameters
    ----------
    resample_kwargs : dict
        Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
    {extra_params}
    Returns
    -------
    BaseQueryCompiler
        New QueryCompiler containing the result of resample aggregation built by the
        following rules:

        {build_rules}
    """,
    refer_to_module_name="resample.Resampler",
)


def doc_resample_reduce(result, refer_to, params=None, compatibility_params=True):
    """
    Build decorator which adds docstring for the resample reduce method.

    Parameters
    ----------
    result : str
        The result of the method.
    refer_to : str
        Method name in ``modin.pandas.resample.Resampler`` module to refer to for
        more information about parameters and output format.
    params : str, optional
        Method parameters in the NumPy docstyle format to substitute
        to the docstring template.
    compatibility_params : bool, default: True
        Whether method takes `*args` and `**kwargs` that do not affect
        the result.

    Returns
    -------
    callable
    """
    action = f"compute {result} for each group"

    params_substitution = (
        (
            """
        *args : iterable
            Serves the compatibility purpose. Does not affect the result.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.
        """
        )
        if compatibility_params
        else ""
    )

    if params:
        params_substitution = format_string(
            "{params}\n{params_substitution}",
            params=params,
            params_substitution=params_substitution,
        )

    build_rules = f"""
            - Labels on the specified axis are the group names (time-stamps)
            - Labels on the opposite of specified axis are preserved.
            - Each element of QueryCompiler is the {result} for the
              corresponding group and column/row."""
    return doc_resample(
        action=action,
        extra_params=params_substitution,
        build_rules=build_rules,
        refer_to=refer_to,
    )


def doc_resample_agg(action, output, refer_to, params=None):
    """
    Build decorator which adds docstring for the resample aggregation method.

    Parameters
    ----------
    action : str
        What method does with the resampled data.
    output : str
        What is the content of column names in the result.
    refer_to : str
        Method name in ``modin.pandas.resample.Resampler`` module to refer to for
        more information about parameters and output format.
    params : str, optional
        Method parameters in the NumPy docstyle format to substitute
        to the docstring template.

    Returns
    -------
    callable
    """
    action = f"{action} for each group over the specified axis"

    params_substitution = """
        *args : iterable
            Positional arguments to pass to the aggregation function.
        **kwargs : dict
            Keyword arguments to pass to the aggregation function.
        """

    if params:
        params_substitution = format_string(
            "{params}\n{params_substitution}",
            params=params,
            params_substitution=params_substitution,
        )

    build_rules = f"""
            - Labels on the specified axis are the group names (time-stamps)
            - Labels on the opposite of specified axis are a MultiIndex, where first level
              contains preserved labels of this axis and the second level is the {output}.
            - Each element of QueryCompiler is the result of corresponding function for the
              corresponding group and column/row."""
    return doc_resample(
        action=action,
        extra_params=params_substitution,
        build_rules=build_rules,
        refer_to=refer_to,
    )


def doc_resample_fillna(method, refer_to, params=None, overwrite_template_params=False):
    """
    Build decorator which adds docstring for the resample fillna query compiler method.

    Parameters
    ----------
    method : str
        Fillna method name.
    refer_to : str
        Method name in ``modin.pandas.resample.Resampler`` module to refer to for
        more information about parameters and output format.
    params : str, optional
        Method parameters in the NumPy docstyle format to substitute
        to the docstring template.
    overwrite_template_params : bool, default: False
        If `params` is specified indicates whether to overwrite method parameters in
        the docstring template or append then at the end.

    Returns
    -------
    callable
    """
    action = f"fill missing values in each group independently using {method} method"
    params_substitution = "limit : int\n"

    if params:
        params_substitution = (
            params
            if overwrite_template_params
            else format_string(
                "{params}\n{params_substitution}",
                params=params,
                params_substitution=params_substitution,
            )
        )

    build_rules = "- QueryCompiler contains unsampled data with missing values filled."

    return doc_resample(
        action=action,
        extra_params=params_substitution,
        build_rules=build_rules,
        refer_to=refer_to,
    )


doc_dt = partial(
    doc_qc_method,
    template="""
    Get {prop} for each {dt_type} value.
    {params}
    Returns
    -------
    BaseQueryCompiler
        New QueryCompiler with the same shape as `self`, where each element is
        {prop} for the corresponding {dt_type} value.
    """,
    one_column_method=True,
    refer_to_module_name="Series.dt",
)

doc_dt_timestamp = partial(doc_dt, dt_type="datetime")
doc_dt_interval = partial(doc_dt, dt_type="interval")
doc_dt_period = partial(doc_dt, dt_type="period")

doc_dt_round = partial(
    doc_qc_method,
    template="""
    Perform {refer_to} operation on the underlying time-series data to the specified `freq`.

    Parameters
    ----------
    freq : str
    ambiguous : {{"raise", "infer", "NaT"}} or bool mask, default: "raise"
    nonexistent : {{"raise", "shift_forward", "shift_backward", "NaT"}} or timedelta, default: "raise"

    Returns
    -------
    BaseQueryCompiler
        New QueryCompiler with performed {refer_to} operation on every element.
    """,
    one_column_method=True,
    refer_to_module_name="Series.dt",
)

doc_str_method = partial(
    doc_qc_method,
    template="""
    Apply "{refer_to}" function to each string value in QueryCompiler.
    {params}
    Returns
    -------
    BaseQueryCompiler
        New QueryCompiler containing the result of execution of the "{refer_to}" function
        against each string element.
    """,
    one_column_method=True,
    refer_to_module_name="Series.str",
)


def doc_window_method(
    window_cls_name,
    result,
    refer_to,
    action=None,
    win_type="rolling window",
    params=None,
    build_rules="aggregation",
):
    """
    Build decorator which adds docstring for a window method.

    Parameters
    ----------
    window_cls_name : str
        The Window class the method is on.
    result : str
        The result of the method.
    refer_to : str
        Method name in ``modin.pandas.window.Window`` module to refer to
        for more information about parameters and output format.
    action : str, optional
        What method does with the created window.
    win_type : str, default: "rolling_window"
        Type of window that the method creates.
    params : str, optional
        Method parameters in the NumPy docstyle format to substitute
        to the docstring template.
    build_rules : str, default: "aggregation"
        Description of the data output format.

    Returns
    -------
    callable
    """
    template = """
        Create {win_type} and {action} for each window over the given axis.

        Parameters
        ----------
        fold_axis : {{0, 1}}
        {window_args_name} : list
            Rolling windows arguments with the same signature as ``modin.pandas.DataFrame.rolling``.
        {extra_params}
        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing {result} for each window, built by the following
            rules:

            {build_rules}
        """
    doc_build_rules = {
        "aggregation": f"""
            - Output QueryCompiler has the same shape and axes labels as the source.
            - Each element is the {result} for the corresponding window.""",
        "udf_aggregation": """
            - Labels on the specified axis are preserved.
            - Labels on the opposite of specified axis are MultiIndex, where first level
              contains preserved labels of this axis and the second level has the function names.
            - Each element of QueryCompiler is the result of corresponding function for the
              corresponding window and column/row.""",
    }
    if action is None:
        action = f"compute {result}"
    if win_type == "rolling window":
        window_args_name = "rolling_kwargs"
    elif win_type == "expanding window":
        window_args_name = "expanding_args"
    else:
        window_args_name = "window_kwargs"

    # We need that `params` value ended with new line to have
    # an empty line between "parameters" and "return" sections
    if params and params[-1] != "\n":
        params += "\n"

    if params is None:
        params = ""

    return doc_qc_method(
        template,
        result=result,
        action=action,
        win_type=win_type,
        extra_params=params,
        build_rules=doc_build_rules.get(build_rules, build_rules),
        refer_to=f"{window_cls_name}.{refer_to}",
        window_args_name=window_args_name,
    )


def doc_groupby_method(result, refer_to, action=None):
    """
    Build decorator which adds docstring for the groupby reduce method.

    Parameters
    ----------
    result : str
        The result of reduce.
    refer_to : str
        Method name in ``modin.pandas.groupby`` module to refer to
        for more information about parameters and output format.
    action : str, optional
        What method does with groups.

    Returns
    -------
    callable
    """
    template = """
    Group QueryCompiler data and {action} for every group.

    Parameters
    ----------
    by : BaseQueryCompiler, column or index label, Grouper or list of such
        Object that determine groups.
    axis : {{0, 1}}
        Axis to group and apply aggregation function along.
        0 is for index, when 1 is for columns.
    groupby_kwargs : dict
        GroupBy parameters as expected by ``modin.pandas.DataFrame.groupby`` signature.
    agg_args : list-like
        Positional arguments to pass to the `agg_func`.
    agg_kwargs : dict
        Key arguments to pass to the `agg_func`.
    drop : bool, default: False
        If `by` is a QueryCompiler indicates whether or not by-data came
        from the `self`.

    Returns
    -------
    BaseQueryCompiler
        QueryCompiler containing the result of groupby reduce built by the
        following rules:

        - Labels on the opposite of specified axis are preserved.
        - If groupby_args["as_index"] is True then labels on the specified axis
          are the group names, otherwise labels would be default: 0, 1 ... n.
        - If groupby_args["as_index"] is False, then first N columns/rows of the frame
          contain group names, where N is the columns/rows to group on.
        - Each element of QueryCompiler is the {result} for the
          corresponding group and column/row.

    .. warning
        `map_args` and `reduce_args` parameters are deprecated. They're leaked here from
        ``PandasQueryCompiler.groupby_*``, pandas storage format implements groupby via TreeReduce
        approach, but for other storage formats these parameters make no sense, and so they'll be removed in the future.
    """
    if action is None:
        action = f"compute {result}"

    return doc_qc_method(
        template, result=result, action=action, refer_to=f"GroupBy.{refer_to}"
    )


================================================
FILE: modin/core/storage_formats/base/query_compiler.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains class ``BaseQueryCompiler``.

``BaseQueryCompiler`` is a parent abstract class for any other query compiler class.
"""

from __future__ import annotations

import abc
import warnings
from enum import IntEnum
from functools import cached_property
from types import MappingProxyType
from typing import TYPE_CHECKING, Any, Hashable, List, Literal, Optional, Union

import numpy as np
import pandas
import pandas.core.resample
from pandas._typing import DtypeBackend, IndexLabel, Suffixes
from pandas.core.dtypes.common import is_number, is_scalar

from modin.config.envvars import Backend, Execution
from modin.core.dataframe.algebra.default2pandas import (
    BinaryDefault,
    CatDefault,
    DataFrameDefault,
    DateTimeDefault,
    ExpandingDefault,
    GroupByDefault,
    ListDefault,
    ResampleDefault,
    RollingDefault,
    SeriesDefault,
    SeriesGroupByDefault,
    StrDefault,
    StructDefault,
)
from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
    ProtocolDataframe,
)
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger
from modin.logging.config import LogLevel
from modin.logging.logger_decorator import disable_logging
from modin.utils import MODIN_UNNAMED_SERIES_LABEL, try_cast_to_pandas

from . import doc_utils

if TYPE_CHECKING:
    from typing_extensions import Self

    # TODO: should be ModinDataframe
    # https://github.com/modin-project/modin/issues/7244
    from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
    from modin.pandas import DataFrame, Series
    from modin.pandas.base import BasePandasDataset


def _get_axis(axis):
    """
    Build index labels getter of the specified axis.

    Parameters
    ----------
    axis : {0, 1}
        Axis to get labels from.

    Returns
    -------
    callable(BaseQueryCompiler) -> pandas.Index
    """

    def axis_getter(self: "BaseQueryCompiler") -> pandas.Index:
        self._maybe_warn_on_default(message=f"DataFrame.get_axis({axis})")
        return self.to_pandas().axes[axis]

    return axis_getter


def _set_axis(axis):
    """
    Build index labels setter of the specified axis.

    Parameters
    ----------
    axis : {0, 1}
        Axis to set labels on.

    Returns
    -------
    callable(BaseQueryCompiler)
    """

    def axis_setter(self, labels):
        new_qc = DataFrameDefault.register(pandas.DataFrame.set_axis)(
            self, axis=axis, labels=labels
        )
        self.__dict__.update(new_qc.__dict__)

    return axis_setter


class QCCoercionCost(IntEnum):  # noqa: PR01
    """
    Coercion costs between different Query Compiler backends.

    Coercion costs between query compilers can be expressed
    as integers in the range 0 to 1000, where 1000 is
    considered impossible. Since coercion costs can be a
    function of many variables ( dataset size, partitioning,
    network throughput, and query time ) we define a set range
    of cost values to simplify comparisons between two query
    compilers / engines in a unified way.

    COST_ZERO means there is no cost associated, or that the query compilers
    are the same.

    COST_IMPOSSIBLE means the coercion is effectively impossible, which can
    occur if the target system is unable to store the data as a result
    of the coercion. Currently this does not prevent coercion.
    """

    COST_ZERO = 0
    COST_LOW = 250
    COST_MEDIUM = 500
    COST_HIGH = 750
    COST_IMPOSSIBLE = 1000

    @classmethod
    def validate_coersion_cost(cls, cost: QCCoercionCost):
        """
        Validate that the coercion cost is within range.

        Parameters
        ----------
        cost : QCCoercionCost
        """
        if int(cost) < int(QCCoercionCost.COST_ZERO) or int(cost) > int(
            QCCoercionCost.COST_IMPOSSIBLE
        ):
            raise ValueError("Query compiler coercsion cost out of range")


# FIXME: many of the BaseQueryCompiler methods are hiding actual arguments
# by using *args and **kwargs. They should be spread into actual parameters.
# Currently actual arguments are placed in the methods docstrings, but since they're
# not presented in the function's signature it makes linter to raise `PR02: unknown parameters`
# warning. For now, they're silenced by using `noqa` (Modin issue #3108).
class BaseQueryCompiler(
    ClassLogger, abc.ABC, modin_layer="QUERY-COMPILER", log_level=LogLevel.DEBUG
):
    """
    Abstract class that handles the queries to Modin dataframes.

    This class defines common query compilers API, most of the methods
    are already implemented and defaulting to pandas.

    Attributes
    ----------
    lazy_row_labels : bool, default False
        True if the backend defers computations of the row labels (`df.index` for a frame).
        Used by the frontend to avoid unnecessary execution or defer error validation.
    lazy_row_count : bool, default False
        True if the backend defers computations of the number of rows (`len(df.index)`).
        Used by the frontend to avoid unnecessary execution or defer error validation.
    lazy_column_types : bool, default False
        True if the backend defers computations of the column types (`df.dtypes`).
        Used by the frontend to avoid unnecessary execution or defer error validation.
    lazy_column_labels : bool, default False
        True if the backend defers computations of the column labels (`df.columns`).
        Used by the frontend to avoid unnecessary execution or defer error validation.
    lazy_column_count : bool, default False
        True if the backend defers computations of the number of columns (`len(df.columns)`).
        Used by the frontend to avoid unnecessary execution or defer error validation.
    _shape_hint : {"row", "column", None}, default: None
        Shape hint for frames known to be a column or a row, otherwise None.

    Notes
    -----
    See the Abstract Methods and Fields section immediately below this
    for a list of requirements for subclassing this object.
    """

    # four variables can handle reasonably complex automatic engine-switching
    # behavior, though the operation overhead (both initial and per-row)
    # values may vary by engine.
    _MAX_SIZE_THIS_ENGINE_CAN_HANDLE: int = 1
    _OPERATION_INITIALIZATION_OVERHEAD: int = 0
    _OPERATION_PER_ROW_OVERHEAD: int = 0
    _TRANSFER_THRESHOLD: int = 0

    _modin_frame: PandasDataframe
    _shape_hint: Optional[str]
    _should_warn_on_default_to_pandas: bool = True

    @classmethod
    def _maybe_warn_on_default(cls, *, message: str = "", reason: str = "") -> None:
        """
        If this class is configured to warn on default to pandas, warn.

        Parameters
        ----------
        message : str, default: ""
            Method that is defaulting to pandas.
        reason : str, default: ""
            Reason for default.
        """
        if cls._should_warn_on_default_to_pandas:
            ErrorMessage.default_to_pandas(message=message, reason=reason)

    @disable_logging
    def get_backend(self) -> str:
        """
        Get the backend for this query compiler.

        Returns
        -------
        str
            The backend for this query compiler.
        """
        return Backend.get_backend_for_execution(
            Execution(
                engine=self.engine,
                storage_format=self.storage_format,
            )
        )

    @property
    @abc.abstractmethod
    def storage_format(self) -> str:
        """
        The storage format for this query compiler.

        Returns
        -------
        str
            The storage format.
        """
        pass

    @property
    @abc.abstractmethod
    def engine(self) -> str:
        """
        The engine for this query compiler.

        Returns
        -------
        str
            The engine.
        """
        pass

    def __wrap_in_qc(self, obj):
        """
        Wrap `obj` in query compiler.

        Parameters
        ----------
        obj : any
            Object to wrap.

        Returns
        -------
        BaseQueryCompiler
            Query compiler wrapping the object.
        """
        if isinstance(obj, pandas.Series):
            if obj.name is None:
                obj.name = MODIN_UNNAMED_SERIES_LABEL
            obj = obj.to_frame()
        if isinstance(obj, pandas.DataFrame):
            return self.from_pandas(obj, type(self._modin_frame))
        else:
            return obj

    def default_to_pandas(self, pandas_op, *args, **kwargs) -> Self:
        """
        Do fallback to pandas for the passed function.

        Parameters
        ----------
        pandas_op : callable(pandas.DataFrame) -> object
            Function to apply to the casted to pandas frame.
        *args : iterable
            Positional arguments to pass to `pandas_op`.
        **kwargs : dict
            Key-value arguments to pass to `pandas_op`.

        Returns
        -------
        BaseQueryCompiler
            The result of the `pandas_op`, converted back to ``BaseQueryCompiler``.
        """
        op_name = getattr(pandas_op, "__name__", str(pandas_op))
        self._maybe_warn_on_default(message=op_name)
        args = try_cast_to_pandas(args)
        kwargs = try_cast_to_pandas(kwargs)

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            result = pandas_op(try_cast_to_pandas(self), *args, **kwargs)
        if isinstance(result, (tuple, list)):
            if "Series.tolist" in pandas_op.__name__:
                # fast path: no need to iterate over the result from `tolist` function
                return result
            return [self.__wrap_in_qc(obj) for obj in result]
        return self.__wrap_in_qc(result)

    @disable_logging
    def move_to_cost(
        self,
        other_qc_type: type,
        api_cls_name: Optional[str],
        operation: str,
        arguments: MappingProxyType[str, Any],
    ) -> Optional[int]:
        """
        Return the coercion costs of this qc to other_qc type.

        This is called for forced casting and opportunistic switching
        decision points. Values returned must be within the acceptable
        range of QCCoercionCost

        The question is: What are the transfer costs associated with
        moving this data to the other_qc_type?

        Parameters
        ----------
        other_qc_type : QueryCompiler Class
            The query compiler class to which we should return the cost of switching.
        api_cls_name : Optional[str]
            The name of the class performing the operation which can be used as a
            consideration for the costing analysis. `None` means the function does not belong to a class.
        operation : str
            The operation being performed which can be used as a consideration
            for the costing analysis.
        arguments : MappingProxyType[str, Any]
            The arguments to the operation.

        Returns
        -------
        Optional[int]
            Cost of migrating the data from this qc to the other_qc or
            None if the cost cannot be determined.
        """
        if isinstance(self, other_qc_type):
            return QCCoercionCost.COST_ZERO
        if self.__class__._transfer_threshold() <= 0:
            return QCCoercionCost.COST_ZERO
        cost = int(
            (
                QCCoercionCost.COST_IMPOSSIBLE
                * self._max_shape()[0]
                / self.__class__._transfer_threshold()
            )
        )
        if cost > QCCoercionCost.COST_IMPOSSIBLE:
            return QCCoercionCost.COST_IMPOSSIBLE
        return cost

    @classmethod
    def _stay_cost_rows(
        cls, rows: int, per_row_overhead: int, max_size: int, op_init_overhead: int
    ) -> int:
        """
        Get the cost of staying on this query compiler for an operation.

        Parameters
        ----------
        rows : int
            The number of input rows.
        per_row_overhead : int
            Per-row cost of this operation.
        max_size : int
            Max rows for this query compiler.
        op_init_overhead : int
            Overhead cost of this operation.

        Returns
        -------
        int
            Cost of staying on this query compiler.
        """
        if rows > max_size:
            return QCCoercionCost.COST_IMPOSSIBLE
        cost_all_rows = rows * per_row_overhead
        normalized_cost_all_rows = (
            cost_all_rows / max_size * QCCoercionCost.COST_IMPOSSIBLE
        )
        total_cost = normalized_cost_all_rows + op_init_overhead
        if total_cost > QCCoercionCost.COST_IMPOSSIBLE:
            return QCCoercionCost.COST_IMPOSSIBLE
        return int(total_cost)

    @disable_logging
    def stay_cost(
        self,
        api_cls_name: Optional[str],
        operation: str,
        arguments: MappingProxyType[str, Any],
    ) -> Optional[int]:
        """
        Return the "opportunity cost" of not moving the data.

        This is called for opportunistic decision points where we
        have a single data frame which may be moved to another engine.
        This is can often the inverse of the move_to_cost, but it can
        be independently calculated and different. For instance, the
        move_to_cost may include the cost of network transmission to
        the other engine, where as the cost returned by 'stay_cost'
        may be simply the cost of running the operation locally.

        The question is: What is the cost of running this operation on
        the current dataframe?

        Values returned must be within the acceptable range of
        QCCoercionCost

        Parameters
        ----------
        api_cls_name : str
            The class name performing the operation which can be used as a
            consideration for the costing analysis. `None` means the function is
            not associated with a class.
        operation : str, default: None
            The operation being performed which can be used as a consideration
            for the costing analysis.
        arguments : MappingProxyType[str, Any]
            The arguments to the operation.

        Returns
        -------
        Optional[int]
            Cost of doing this operation on the current backend.
        """
        return self._stay_cost_rows(
            self._max_shape()[0],
            self._OPERATION_PER_ROW_OVERHEAD,
            self.__class__._engine_max_size(),
            self._OPERATION_INITIALIZATION_OVERHEAD,
        )

    @disable_logging
    @classmethod
    def move_to_me_cost(
        cls,
        other_qc: BaseQueryCompiler,
        api_cls_name: Optional[str],
        operation: str,
        arguments: MappingProxyType[str, Any],
    ) -> Optional[int]:
        """
        Return the execution and hidden coercion costs from other_qc.

        This can be implemented as a class method version of stay_cost, though
        since this class is not yet instantiated it may have a different
        implementation. It may also include hidden transport or serialization
        costs.

        Values returned must be within the acceptable range of QCCoercionCost.

        The question is: What is the cost of executing this operation if it
        were to move to this query compiler?

        Parameters
        ----------
        other_qc : BaseQueryCompiler
            The query compiler from which we should return the cost of switching.
        api_cls_name : Optional[str]
            The class name performing the operation which can be used as a
            consideration for the costing analysis. `None` means the function
            is not associated with a class.
        operation : str
            The operation being performed which can be used as a consideration
            for the costing analysis.
        arguments : MappingProxyType[str, Any]
            The arguments to the operation.

        Returns
        -------
        Optional[int]
            Cost of migrating the data from other_qc to this qc or
            None if the cost cannot be determined.
        """
        row_count = other_qc._max_shape()[0]

        return cls._stay_cost_rows(
            row_count,
            cls._OPERATION_PER_ROW_OVERHEAD,
            cls._engine_max_size(),
            cls._OPERATION_INITIALIZATION_OVERHEAD,
        )

    @classmethod
    def _engine_max_size(cls) -> int:
        """Maximum number of rows this engine can handle."""
        return cls._MAX_SIZE_THIS_ENGINE_CAN_HANDLE

    @classmethod
    def _transfer_threshold(cls) -> int:
        """Maximum number of rows this backend can handle before transferring data to another backend."""
        return cls._TRANSFER_THRESHOLD

    @disable_logging
    @classmethod
    def max_cost(cls) -> int:
        """
        Return the max cost allowed by this engine.

        Returns
        -------
        int
            Max cost allowed for migrating the data to this qc.
        """
        return QCCoercionCost.COST_IMPOSSIBLE

    # Abstract Methods and Fields: Must implement in children classes
    # In some cases, there you may be able to use the same implementation for
    # some of these abstract methods, but for the sake of generality they are
    # treated differently.

    lazy_row_labels = False
    lazy_row_count = False
    lazy_column_types = False
    lazy_column_labels = False
    lazy_column_count = False

    def _max_shape(self) -> tuple[int, int]:
        """
        Return the maximum dimensions of the frame.

        For lazily evaluated engines the shape of the dataset may be expensive to
        determine (see lazy_shape), but the maximum shape can be calculated
        inexpensively.

        Returns
        -------
        Tuple
            Maximum shape of the dataframe (height, width).
        """
        return self.get_axis_len(axis=0), self.get_axis_len(axis=1)

    @property
    def lazy_shape(self):
        """
        Whether either of the underlying dataframe's dimensions (row count/column count) are computed lazily.

        If True, the frontend should avoid length/shape checks as much as possible.

        Returns
        -------
        bool
        """
        return self.lazy_row_count or self.lazy_column_count

    _shape_hint = None

    # Metadata modification abstract methods
    def add_prefix(self, prefix, axis=1):
        """
        Add string prefix to the index labels along specified axis.

        Parameters
        ----------
        prefix : str
            The string to add before each label.
        axis : {0, 1}, default: 1
            Axis to add prefix along. 0 is for index and 1 is for columns.

        Returns
        -------
        BaseQueryCompiler
            New query compiler with updated labels.
        """
        return DataFrameDefault.register(pandas.DataFrame.add_prefix)(
            self, prefix=prefix, axis=axis
        )

    def add_suffix(self, suffix, axis=1):
        """
        Add string suffix to the index labels along specified axis.

        Parameters
        ----------
        suffix : str
            The string to add after each label.
        axis : {0, 1}, default: 1
            Axis to add suffix along. 0 is for index and 1 is for columns.

        Returns
        -------
        BaseQueryCompiler
            New query compiler with updated labels.
        """
        return DataFrameDefault.register(pandas.DataFrame.add_suffix)(
            self, suffix=suffix, axis=axis
        )

    # END Metadata modification abstract methods

    # Abstract copy

    def copy(self):
        """
        Make a copy of this object.

        Returns
        -------
        BaseQueryCompiler
            Copy of self.

        Notes
        -----
        For copy, we don't want a situation where we modify the metadata of the
        copies if we end up modifying something here. We copy all of the metadata
        to prevent that.
        """
        return DataFrameDefault.register(pandas.DataFrame.copy)(self)

    # END Abstract copy

    # Abstract join and append helper functions

    def concat(self, axis, other, **kwargs):  # noqa: PR02
        """
        Concatenate `self` with passed query compilers along specified axis.

        Parameters
        ----------
        axis : {0, 1}
            Axis to concatenate along. 0 is for index and 1 is for columns.
        other : BaseQueryCompiler or list of such
            Objects to concatenate with `self`.
        join : {'outer', 'inner', 'right', 'left'}, default: 'outer'
            Type of join that will be used if indices on the other axis are different.
            (note: if specified, has to be passed as ``join=value``).
        ignore_index : bool, default: False
            If True, do not use the index values along the concatenation axis.
            The resulting axis will be labeled 0, …, n - 1.
            (note: if specified, has to be passed as ``ignore_index=value``).
        sort : bool, default: False
            Whether or not to sort non-concatenation axis.
            (note: if specified, has to be passed as ``sort=value``).
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            Concatenated objects.
        """
        concat_join = ["inner", "outer"]

        def concat(df, axis, other, **kwargs):
            kwargs.pop("join_axes", None)
            ignore_index = kwargs.get("ignore_index", False)
            if kwargs.get("join", "outer") in concat_join:
                if not isinstance(other, list):
                    other = [other]
                other = [df] + other
                result = pandas.concat(other, axis=axis, **kwargs)
            else:
                if isinstance(other, (list, np.ndarray)) and len(other) == 1:
                    other = other[0]
                ignore_index = kwargs.pop("ignore_index", None)
                kwargs["how"] = kwargs.pop("join", None)
                if (
                    isinstance(other, (pandas.DataFrame, pandas.Series))
                    or len(other) <= 1
                ):
                    kwargs["rsuffix"] = "r_"
                result = df.join(other, **kwargs)
            if ignore_index:
                if axis == 0:
                    result = result.reset_index(drop=True)
                else:
                    result.columns = pandas.RangeIndex(len(result.columns))
            return result

        return DataFrameDefault.register(concat)(self, axis=axis, other=other, **kwargs)

    # END Abstract join and append helper functions

    # Data Management Methods
    @abc.abstractmethod
    def free(self):
        """Trigger a cleanup of this object."""
        pass

    @abc.abstractmethod
    def finalize(self):
        """Finalize constructing the dataframe calling all deferred functions which were used to build it."""
        pass

    @abc.abstractmethod
    def execute(self):
        """Wait for all computations to complete without materializing data."""
        pass

    def support_materialization_in_worker_process(self) -> bool:
        """
        Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object.

        Returns
        -------
        bool
        """
        return self._modin_frame.support_materialization_in_worker_process()

    # END Data Management Methods

    # Data Movement Methods
    def move_to(self, target_backend: str) -> Union[BaseQueryCompiler, Any]:
        """
        Move this query compiler to the specified backend.

        Parameters
        ----------
        target_backend : str
            The backend to move to.

        Returns
        -------
        BaseQueryCompiler or Any
            The new query compiler with the source data, or a sentinel `NotImplemented`
            value if transfer is not implemented.
        """
        return NotImplemented

    @classmethod
    def move_from(cls, source_qc: BaseQueryCompiler) -> Union[BaseQueryCompiler, Any]:
        """
        Move the source query compiler to the current backend.

        Parameters
        ----------
        source_qc : BaseQueryCompiler
            The source query compiler to move data from.

        Returns
        -------
        BaseQueryCompiler or Any
            A new query compiler with the source data, or a sentinel `NotImplemented`
            value if transfer is not implemented.
        """
        return NotImplemented

    # END Data Movement Methods

    # To/From Pandas
    @abc.abstractmethod
    def to_pandas(self):
        """
        Convert underlying query compilers data to ``pandas.DataFrame``.

        Returns
        -------
        pandas.DataFrame
            The QueryCompiler converted to pandas.
        """
        pass

    @classmethod
    @abc.abstractmethod
    def from_pandas(cls, df, data_cls):
        """
        Build QueryCompiler from pandas DataFrame.

        Parameters
        ----------
        df : pandas.DataFrame
            The pandas DataFrame to convert from.
        data_cls : type
            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class
            (or its descendant) to convert to.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the pandas DataFrame.
        """
        pass

    # END To/From Pandas

    # From Arrow
    @classmethod
    @abc.abstractmethod
    def from_arrow(cls, at, data_cls):
        """
        Build QueryCompiler from Arrow Table.

        Parameters
        ----------
        at : Arrow Table
            The Arrow Table to convert from.
        data_cls : type
            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class
            (or its descendant) to convert to.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the pandas DataFrame.
        """
        pass

    # END From Arrow

    # To NumPy

    def to_numpy(self, **kwargs):  # noqa: PR02
        """
        Convert underlying query compilers data to NumPy array.

        Parameters
        ----------
        dtype : dtype
            The dtype of the resulted array.
        copy : bool
            Whether to ensure that the returned value is not a view on another array.
        na_value : object
            The value to replace missing values with.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        np.ndarray
            The QueryCompiler converted to NumPy array.
        """
        return DataFrameDefault.register(pandas.DataFrame.to_numpy)(self, **kwargs)

    # END To NumPy

    def do_array_ufunc_implementation(
        self,
        frame: BasePandasDataset,
        ufunc: np.ufunc,
        method: str,
        *inputs: Any,
        **kwargs: Any,
    ) -> Union["DataFrame", "Series", Any]:
        """
        Apply the provided NumPy ufunc to the underlying data.

        This method is called by the ``__array_ufunc__`` dispatcher on BasePandasDataset.

        Unlike other query compiler methods, this function directly operates on the input DataFrame/Series
        to allow for easier argument processing. The default implementation defaults to pandas, but
        a query compiler sub-class may override this method to provide a distributed implementation.

        See NumPy docs: https://numpy.org/doc/stable/user/basics.subclassing.html#array-ufunc-for-ufuncs

        Parameters
        ----------
        frame : BasePandasDataset
            The DataFrame or Series on which the ufunc was called. Its query compiler must match ``self``.

        ufunc : np.ufunc
            The function to apply.

        method : str
            The name of the function to apply.

        *inputs : Any
            Positional arguments to pass to ``ufunc``.

        **kwargs : Any
            Keyword arguments to pass to ``ufunc``.

        Returns
        -------
        DataFrame, Series, or Any
            The result of applying the ufunc to ``frame``.
        """
        assert (
            self is frame._query_compiler
        ), "array ufunc called with mismatched query compiler and input frame"
        # we can't use the regular default_to_pandas() method because self is one of the
        # `inputs` to __array_ufunc__, and pandas has some checks on the identity of the
        # inputs [1]. The usual default to pandas will call _to_pandas() on the inputs
        # as well as on self, but that gives inputs[0] a different identity from self.
        #
        # [1] https://github.com/pandas-dev/pandas/blob/2c4c072ade78b96a9eb05097a5fcf4347a3768f3/pandas/_libs/ops_dispatch.pyx#L99-L109
        self._maybe_warn_on_default(message="__array_ufunc__")
        pandas_self = frame._to_pandas()
        pandas_result = pandas_self.__array_ufunc__(
            ufunc,
            method,
            *(
                pandas_self if each_input is frame else try_cast_to_pandas(each_input)
                for each_input in inputs
            ),
            **try_cast_to_pandas(kwargs),
        )
        if isinstance(pandas_result, pandas.DataFrame):
            from modin.pandas import DataFrame

            return DataFrame(pandas_result)
        elif isinstance(pandas_result, pandas.Series):
            from modin.pandas import Series

            return Series(pandas_result)
        # ufuncs are required to be one-to-one mappings, so this branch should never be hit
        return pandas_result  # pragma: no cover

    def do_array_function_implementation(
        self,
        frame: BasePandasDataset,
        func: callable,
        types: tuple,
        args: tuple,
        kwargs: dict,
    ) -> Union["DataFrame", "Series", Any]:
        """
        Apply the provided NumPy array function to the underlying data.

        This method is called by the ``__array_function__`` dispatcher on BasePandasDataset.

        Unlike other query compiler methods, this function directly operates on the input DataFrame/Series
        to allow for easier argument processing. The default implementation defaults to pandas, but
        a query compiler sub-class may override this method to provide a distributed implementation.

        See NumPy docs: https://numpy.org/neps/nep-0018-array-function-protocol.html#nep18

        Parameters
        ----------
        frame : BasePandasDataset
            The DataFrame or Series on which the ufunc was called. Its query compiler must match ``self``.
        func : np.func
            The NumPy func to apply.
        types : tuple
            The types of the args.
        args : tuple
            The args to the func.
        kwargs : dict
            Additional keyword arguments.

        Returns
        -------
        DataFrame | Series | Any
            The result of applying the function to this dataset. By default, it will return
            a NumPy array.
        """
        from modin.pandas.base import BasePandasDataset

        assert (
            self is frame._query_compiler
        ), "__array_function__ called with mismatched query compiler and input frame"
        # Replace each modin type with numpy ndarray, since we convert modin frames to np ndarrays.
        new_types = (
            np.ndarray if issubclass(tpe, BasePandasDataset) else tpe for tpe in types
        )
        return frame.__array__().__array_function__(func, new_types, args, kwargs)

    # Dataframe exchange protocol

    @abc.abstractmethod
    def to_interchange_dataframe(
        self, nan_as_null: bool = False, allow_copy: bool = True
    ) -> ProtocolDataframe:
        """
        Get a DataFrame exchange protocol object representing data of the Modin DataFrame.

        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.

        Parameters
        ----------
        nan_as_null : bool, default: False
            A keyword intended for the consumer to tell the producer
            to overwrite null values in the data with ``NaN`` (or ``NaT``).
            This currently has no effect; once support for nullable extension
            dtypes is added, this value should be propagated to columns.
        allow_copy : bool, default: True
            A keyword that defines whether or not the library is allowed
            to make a copy of the data. For example, copying data would be necessary
            if a library supports strided buffers, given that this protocol
            specifies contiguous buffers. Currently, if the flag is set to ``False``
            and a copy is needed, a ``RuntimeError`` will be raised.

        Returns
        -------
        ProtocolDataframe
            A dataframe object following the DataFrame protocol specification.
        """
        pass

    @classmethod
    @abc.abstractmethod
    def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls):
        """
        Build QueryCompiler from a DataFrame object supporting the dataframe exchange protocol `__dataframe__()`.

        Parameters
        ----------
        df : ProtocolDataframe
            The DataFrame object supporting the dataframe exchange protocol.
        data_cls : type
            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class
            (or its descendant) to convert to.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing data from the DataFrame.
        """
        pass

    # END Dataframe exchange protocol

    def to_list(self):
        """
        Return a list of the values.

        These are each a scalar type, which is a Python scalar (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period).

        Returns
        -------
        list
        """
        return SeriesDefault.register(pandas.Series.to_list)(self)

    @doc_utils.add_refer_to("DataFrame.to_dict")
    def dataframe_to_dict(self, orient="dict", into=dict, index=True):  # noqa: PR01
        """
        Convert the DataFrame to a dictionary.

        Returns
        -------
        dict or `into` instance
        """
        return self.to_pandas().to_dict(orient, into, index)

    @doc_utils.add_refer_to("Series.to_dict")
    def series_to_dict(self, into=dict):  # noqa: PR01
        """
        Convert the Series to a dictionary.

        Returns
        -------
        dict or `into` instance
        """
        return SeriesDefault.register(pandas.Series.to_dict)(self, into)

    # Abstract inter-data operations (e.g. add, sub)
    # These operations require two DataFrames and will change the shape of the
    # data if the index objects don't match. An outer join + op is performed,
    # such that columns/rows that don't have an index on the other DataFrame
    # result in NaN values.

    @doc_utils.add_refer_to("DataFrame.align")
    def align(self, other, **kwargs):
        """
        Align two objects on their axes with the specified join method.

        Join method is specified for each axis Index.

        Parameters
        ----------
        other : BaseQueryCompiler
        **kwargs : dict
            Other arguments for aligning.

        Returns
        -------
        BaseQueryCompiler
            Aligned `self`.
        BaseQueryCompiler
            Aligned `other`.
        """
        return DataFrameDefault.register(pandas.DataFrame.align)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="addition", sign="+")
    def add(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.add)(self, other=other, **kwargs)

    @doc_utils.add_refer_to("DataFrame.combine")
    def combine(self, other, **kwargs):  # noqa: PR02
        """
        Perform column-wise combine with another QueryCompiler with passed `func`.

        If axes are not equal, perform frames alignment first.

        Parameters
        ----------
        other : BaseQueryCompiler
            Left operand of the binary operation.
        func : callable(pandas.Series, pandas.Series) -> pandas.Series
            Function that takes two ``pandas.Series`` with aligned axes
            and returns one ``pandas.Series`` as resulting combination.
        fill_value : float or None
            Value to fill missing values with after frame alignment occurred.
        overwrite : bool
            If True, columns in `self` that do not exist in `other`
            will be overwritten with NaNs.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            Result of combine.
        """
        return BinaryDefault.register(pandas.DataFrame.combine)(
            self, other=other, **kwargs
        )

    @doc_utils.add_refer_to("DataFrame.combine_first")
    def combine_first(self, other, **kwargs):  # noqa: PR02
        """
        Fill null elements of `self` with value in the same location in `other`.

        If axes are not equal, perform frames alignment first.

        Parameters
        ----------
        other : BaseQueryCompiler
            Provided frame to use to fill null values from.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
        """
        return BinaryDefault.register(pandas.DataFrame.combine_first)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="equality comparison", sign="==")
    def eq(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="equality comparison", sign="==", op_type="series_comparison"
    )
    def series_eq(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.Series.eq)(
            self,
            other=other,
            squeeze_self=True,
            squeeze_other=kwargs.pop("squeeze_other", False),
            **kwargs,
        )

    @doc_utils.add_refer_to("DataFrame.equals")
    def equals(self, other):  # noqa: PR01, RT01
        return BinaryDefault.register(pandas.DataFrame.equals)(self, other=other)

    @doc_utils.doc_binary_method(operation="integer division", sign="//")
    def floordiv(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.floordiv)(
            self, other=other, **kwargs
        )

    @doc_utils.add_refer_to("Series.divmod")
    def divmod(self, other, **kwargs):
        """
        Return Integer division and modulo of `self` and `other`, element-wise (binary operator divmod).

        Equivalent to divmod(`self`, `other`), but with support to substitute a fill_value for missing data in either one of the inputs.

        Parameters
        ----------
        other : BaseQueryCompiler or scalar value
        **kwargs : dict
            Other arguments for division.

        Returns
        -------
        BaseQueryCompiler
            Compiler representing Series with divisor part of division.
        BaseQueryCompiler
            Compiler representing Series with modulo part of division.
        """
        return SeriesDefault.register(pandas.Series.divmod)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="greater than or equal comparison", sign=">=", op_type="comparison"
    )
    def ge(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="greater than or equal comparison",
        sign=">=",
        op_type="series_comparison",
    )
    def series_ge(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.Series.ge)(
            self,
            other=other,
            squeeze_self=True,
            squeeze_other=kwargs.pop("squeeze_other", False),
            **kwargs,
        )

    @doc_utils.doc_binary_method(
        operation="greater than comparison", sign=">", op_type="comparison"
    )
    def gt(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="greater than comparison", sign=">", op_type="series_comparison"
    )
    def series_gt(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.Series.gt)(
            self,
            other=other,
            squeeze_self=True,
            squeeze_other=kwargs.pop("squeeze_other", False),
            **kwargs,
        )

    @doc_utils.doc_binary_method(
        operation="less than or equal comparison", sign="<=", op_type="comparison"
    )
    def le(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="less than or equal comparison",
        sign="<=",
        op_type="series_comparison",
    )
    def series_le(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.Series.le)(
            self,
            other=other,
            squeeze_self=True,
            squeeze_other=kwargs.pop("squeeze_other", False),
            **kwargs,
        )

    @doc_utils.doc_binary_method(
        operation="less than comparison", sign="<", op_type="comparison"
    )
    def lt(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="less than", sign="<", op_type="series_comparison"
    )
    def series_lt(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.Series.lt)(
            self,
            other=other,
            squeeze_self=True,
            squeeze_other=kwargs.pop("squeeze_other", False),
            **kwargs,
        )

    @doc_utils.doc_binary_method(operation="modulo", sign="%")
    def mod(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.mod)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(operation="multiplication", sign="*")
    def mul(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.mul)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="multiplication", sign="*", self_on_right=True
    )
    def rmul(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.rmul)(
            self, other=other, **kwargs
        )

    @doc_utils.add_refer_to("DataFrame.corr")
    def corr(self, **kwargs):  # noqa: PR02
        """
        Compute pairwise correlation of columns, excluding NA/null values.

        Parameters
        ----------
        method : {'pearson', 'kendall', 'spearman'} or callable(pandas.Series, pandas.Series) -> pandas.Series
            Correlation method.
        min_periods : int
            Minimum number of observations required per pair of columns
            to have a valid result. If fewer than `min_periods` non-NA values
            are present the result will be NA.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            Correlation matrix.
        """
        return DataFrameDefault.register(pandas.DataFrame.corr)(self, **kwargs)

    @doc_utils.add_refer_to("Series.corr")
    def series_corr(self, **kwargs):  # noqa: PR01
        """
        Compute correlation with `other` Series, excluding missing values.

        The two `Series` objects are not required to be the same length and will be
        aligned internally before the correlation function is applied.

        Returns
        -------
        float
            Correlation with other.
        """
        return SeriesDefault.register(pandas.Series.corr)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.corrwith")
    def corrwith(self, **kwargs):  # noqa: PR01
        """
        Compute pairwise correlation.

        Returns
        -------
        BaseQueryCompiler
        """
        return DataFrameDefault.register(pandas.DataFrame.corrwith)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.cov")
    def cov(self, **kwargs):  # noqa: PR02
        """
        Compute pairwise covariance of columns, excluding NA/null values.

        Parameters
        ----------
        min_periods : int
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            Covariance matrix.
        """
        return DataFrameDefault.register(pandas.DataFrame.cov)(self, **kwargs)

    def dot(self, other, **kwargs):  # noqa: PR02
        """
        Compute the matrix multiplication of `self` and `other`.

        Parameters
        ----------
        other : BaseQueryCompiler or NumPy array
            The other query compiler or NumPy array to matrix multiply with `self`.
        squeeze_self : boolean
            If `self` is a one-column query compiler, indicates whether it represents Series object.
        squeeze_other : boolean
            If `other` is a one-column query compiler, indicates whether it represents Series object.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            A new query compiler that contains result of the matrix multiply.
        """
        if kwargs.get("squeeze_self", False):
            applyier = pandas.Series.dot
        else:
            applyier = pandas.DataFrame.dot
        return BinaryDefault.register(applyier)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="not equal comparison", sign="!=", op_type="comparison"
    )
    def ne(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(
        operation="not equal comparison", sign="!=", op_type="series_comparison"
    )
    def series_ne(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.Series.ne)(
            self,
            other=other,
            squeeze_self=True,
            squeeze_other=kwargs.pop("squeeze_other", False),
            **kwargs,
        )

    @doc_utils.doc_binary_method(operation="exponential power", sign="**")
    def pow(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.pow)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(operation="addition", sign="+", self_on_right=True)
    def radd(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.radd)(
            self, other=other, **kwargs
        )

    @doc_utils.add_refer_to("Series.rdivmod")
    def rdivmod(self, other, **kwargs):
        """
        Return Integer division and modulo of `self` and `other`, element-wise (binary operator rdivmod).

        Equivalent to `other` divmod `self`, but with support to substitute a fill_value for missing data in either one of the inputs.

        Parameters
        ----------
        other : BaseQueryCompiler or scalar value
        **kwargs : dict
            Other arguments for division.

        Returns
        -------
        BaseQueryCompiler
            Compiler representing Series with divisor part of division.
        BaseQueryCompiler
            Compiler representing Series with modulo part of division.
        """
        return SeriesDefault.register(pandas.Series.rdivmod)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(
        operation="integer division", sign="//", self_on_right=True
    )
    def rfloordiv(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.rfloordiv)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="modulo", sign="%", self_on_right=True)
    def rmod(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.rmod)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(
        operation="exponential power", sign="**", self_on_right=True
    )
    def rpow(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.rpow)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="subtraction", sign="-", self_on_right=True)
    def rsub(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.rsub)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="division", sign="/", self_on_right=True)
    def rtruediv(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.rtruediv)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="subtraction", sign="-")
    def sub(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.sub)(self, other=other, **kwargs)

    @doc_utils.doc_binary_method(operation="division", sign="/")
    def truediv(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.truediv)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="conjunction", sign="&", op_type="logical")
    def __and__(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.__and__)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="disjunction", sign="|", op_type="logical")
    def __or__(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.__or__)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(
        operation="conjunction", sign="&", op_type="logical", self_on_right=True
    )
    def __rand__(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.__rand__)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(
        operation="disjunction", sign="|", op_type="logical", self_on_right=True
    )
    def __ror__(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.__ror__)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(
        operation="exclusive or", sign="^", op_type="logical", self_on_right=True
    )
    def __rxor__(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.__rxor__)(
            self, other=other, **kwargs
        )

    @doc_utils.doc_binary_method(operation="exclusive or", sign="^", op_type="logical")
    def __xor__(self, other, **kwargs):  # noqa: PR02
        return BinaryDefault.register(pandas.DataFrame.__xor__)(
            self, other=other, **kwargs
        )

    # FIXME: query compiler shoudln't care about differences between Frame and Series.
    # We should combine `df_update` and `series_update` into one method (Modin issue #3101).
    @doc_utils.add_refer_to("DataFrame.update")
    def df_update(self, other, **kwargs):  # noqa: PR02
        """
        Update values of `self` using non-NA values of `other` at the corresponding positions.

        If axes are not equal, perform frames alignment first.

        Parameters
        ----------
        other : BaseQueryCompiler
            Frame to grab replacement values from.
        join : {"left"}
            Specify type of join to align frames if axes are not equal
            (note: currently only one type of join is implemented).
        overwrite : bool
            Whether to overwrite every corresponding value of self, or only if it's NAN.
        filter_func : callable(pandas.Series, pandas.Series) -> numpy.ndarray<bool>
            Function that takes column of the self and return bool mask for values, that
            should be overwritten in the self frame.
        errors : {"raise", "ignore"}
            If "raise", will raise a ``ValueError`` if `self` and `other` both contain
            non-NA data in the same place.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with updated values.
        """
        return BinaryDefault.register(pandas.DataFrame.update, inplace=True)(
            self, other=other, **kwargs
        )

    @doc_utils.add_refer_to("Series.update")
    def series_update(self, other, **kwargs):  # noqa: PR02
        """
        Update values of `self` using values of `other` at the corresponding indices.

        Parameters
        ----------
        other : BaseQueryCompiler
            One-column query compiler with updated values.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with updated values.
        """
        return BinaryDefault.register(pandas.Series.update, inplace=True)(
            self,
            other=other,
            squeeze_self=True,
            squeeze_other=True,
            **kwargs,
        )

    @doc_utils.add_refer_to("DataFrame.asfreq")
    def asfreq(self, **kwargs):  # noqa: PR01
        """
        Convert time series to specified frequency.

        Returns the original data conformed to a new index with the specified frequency.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler reindexed to the specified frequency.
        """
        return DataFrameDefault.register(pandas.DataFrame.asfreq)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.clip")
    def clip(self, lower, upper, **kwargs):  # noqa: PR02
        """
        Trim values at input threshold.

        Parameters
        ----------
        lower : float or list-like
        upper : float or list-like
        axis : {0, 1}
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with values limited by the specified thresholds.
        """
        if isinstance(lower, BaseQueryCompiler):
            lower = lower.to_pandas().squeeze(1)
        if isinstance(upper, BaseQueryCompiler):
            upper = upper.to_pandas().squeeze(1)
        return DataFrameDefault.register(pandas.DataFrame.clip)(
            self, lower=lower, upper=upper, **kwargs
        )

    @doc_utils.add_refer_to("DataFrame.where")
    def where(self, cond, other, **kwargs):  # noqa: PR02
        """
        Update values of `self` using values from `other` at positions where `cond` is False.

        Parameters
        ----------
        cond : BaseQueryCompiler
            Boolean mask. True - keep the self value, False - replace by `other` value.
        other : BaseQueryCompiler or pandas.Series
            Object to grab replacement values from.
        axis : {0, 1}
            Axis to align frames along if axes of self, `cond` and `other` are not equal.
            0 is for index, when 1 is for columns.
        level : int or label, optional
            Level of MultiIndex to align frames along if axes of self, `cond`
            and `other` are not equal. Currently `level` parameter is not implemented,
            so only None value is acceptable.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with updated data.
        """
        return DataFrameDefault.register(pandas.DataFrame.where)(
            self, cond=cond, other=other, **kwargs
        )

    @doc_utils.add_refer_to("DataFrame.merge")
    def merge(self, right, **kwargs):  # noqa: PR02
        """
        Merge QueryCompiler objects using a database-style join.

        Parameters
        ----------
        right : BaseQueryCompiler
            QueryCompiler of the right frame to merge with.
        how : {"left", "right", "outer", "inner", "cross"}
        on : label or list of such
        left_on : label or list of such
        right_on : label or list of such
        left_index : bool
        right_index : bool
        sort : bool
        suffixes : list-like
        copy : bool
        indicator : bool or str
        validate : str
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler that contains result of the merge.
        """
        return DataFrameDefault.register(pandas.DataFrame.merge)(
            self, right=right, **kwargs
        )

    @doc_utils.add_refer_to("merge_ordered")
    def merge_ordered(self, right, **kwargs):  # noqa: PR01
        """
        Perform a merge for ordered data with optional filling/interpolation.

        Returns
        -------
        BaseQueryCompiler
        """
        return DataFrameDefault.register(pandas.merge_ordered)(self, right, **kwargs)

    def _get_column_as_pandas_series(self, key):
        """
        Get column data by label as pandas.Series.

        Parameters
        ----------
        key : Any
            Column label.

        Returns
        -------
        pandas.Series
        """
        result = self.getitem_array([key]).to_pandas().squeeze(axis=1)
        if not isinstance(result, pandas.Series):
            raise RuntimeError(
                f"Expected getting column {key} to give "
                + f"pandas.Series, but instead got {type(result)}"
            )
        return result

    def merge_asof(
        self,
        right: "BaseQueryCompiler",
        left_on: Optional[IndexLabel] = None,
        right_on: Optional[IndexLabel] = None,
        left_index: bool = False,
        right_index: bool = False,
        left_by=None,
        right_by=None,
        suffixes: Suffixes = ("_x", "_y"),
        tolerance=None,
        allow_exact_matches: bool = True,
        direction: str = "backward",
    ):  # noqa: GL08
        self._maybe_warn_on_default(message="`merge_asof`")
        # Pandas fallbacks for tricky cases:
        if (
            # No idea how this works or why it does what it does; and in fact
            # there's a Pandas bug suggesting it's wrong:
            # https://github.com/pandas-dev/pandas/issues/33463
            (left_index and right_on is not None)
            # This is the case where by is a list of columns. If we're copying lots
            # of columns out of Pandas, maybe not worth trying our path, it's not
            # clear it's any better:
            or not (left_by is None or is_scalar(left_by))
            or not (right_by is None or is_scalar(right_by))
            # The implementation below assumes that the right index is unique
            # because it uses merge_asof to map each position in the merged
            # index to the label of the one right row that should be merged
            # at that row position.
            or not right.index.is_unique
        ):
            return self.default_to_pandas(
                pandas.merge_asof,
                right,
                left_on=left_on,
                right_on=right_on,
                left_index=left_index,
                right_index=right_index,
                left_by=left_by,
                right_by=right_by,
                suffixes=suffixes,
                tolerance=tolerance,
                allow_exact_matches=allow_exact_matches,
                direction=direction,
            )

        if left_on is None:
            left_column = self.index
        else:
            left_column = self._get_column_as_pandas_series(left_on)

        if right_on is None:
            right_column = right.index
        else:
            right_column = right._get_column_as_pandas_series(right_on)

        left_pandas_limited = {"on": left_column}
        right_pandas_limited = {"on": right_column, "right_labels": right.index}
        extra_kwargs = {}  # extra arguments to Pandas merge_asof

        if left_by is not None or right_by is not None:
            extra_kwargs["by"] = "by"
            left_pandas_limited["by"] = self._get_column_as_pandas_series(left_by)
            right_pandas_limited["by"] = right._get_column_as_pandas_series(right_by)

        # 1. Construct Pandas DataFrames with just the 'on' and optional 'by'
        # columns, and the index as another column.
        left_pandas_limited = pandas.DataFrame(left_pandas_limited, index=self.index)
        right_pandas_limited = pandas.DataFrame(right_pandas_limited)

        # 2. Use Pandas' merge_asof to figure out how to map labels on left to
        # labels on the right.
        merged = pandas.merge_asof(
            left_pandas_limited,
            right_pandas_limited,
            on="on",
            direction=direction,
            allow_exact_matches=allow_exact_matches,
            tolerance=tolerance,
            **extra_kwargs,
        )
        # Now merged["right_labels"] shows which labels from right map to left's index.

        # 3. Re-index right using the merged["right_labels"]; at this point right
        # should be same length and (semantically) same order as left:
        right_subset = right.reindex(
            axis=0, labels=pandas.Index(merged["right_labels"])
        )
        if not right_index:
            right_subset = right_subset.drop(columns=[right_on])
        if right_by is not None and left_by == right_by:
            right_subset = right_subset.drop(columns=[right_by])
        right_subset.index = self.index

        # 4. Merge left and the new shrunken right:
        result = self.merge(
            right_subset,
            left_index=True,
            right_index=True,
            suffixes=suffixes,
            how="left",
        )

        # 5. Clean up to match Pandas output:
        if left_on is not None and right_index:
            result = result.insert(
                # In theory this could use get_indexer_for(), but that causes an error:
                list(result.columns).index(left_on + suffixes[0]),
                left_on,
                result.getitem_array([left_on + suffixes[0]]),
            )
        if not left_index and not right_index:
            result = result.reset_index(drop=True)

        return result

    @doc_utils.add_refer_to("DataFrame.join")
    def join(self, right, **kwargs):  # noqa: PR02
        """
        Join columns of another QueryCompiler.

        Parameters
        ----------
        right : BaseQueryCompiler
            QueryCompiler of the right frame to join with.
        on : label or list of such
        how : {"left", "right", "outer", "inner"}
        lsuffix : str
        rsuffix : str
        sort : bool
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler that contains result of the join.
        """
        return DataFrameDefault.register(pandas.DataFrame.join)(self, right, **kwargs)

    # END Abstract inter-data operations

    # Abstract Transpose
    def transpose(self, *args, **kwargs):  # noqa: PR02
        """
        Transpose this QueryCompiler.

        Parameters
        ----------
        copy : bool
            Whether to copy the data after transposing.
        *args : iterable
            Serves the compatibility purpose. Does not affect the result.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            Transposed new QueryCompiler.
        """
        return DataFrameDefault.register(pandas.DataFrame.transpose)(
            self, *args, **kwargs
        )

    def columnarize(self):
        """
        Transpose this QueryCompiler if it has a single row but multiple columns.

        This method should be called for QueryCompilers representing a Series object,
        i.e. ``self.is_series_like()`` should be True.

        Returns
        -------
        BaseQueryCompiler
            Transposed new QueryCompiler or self.
        """
        if self._shape_hint == "column":
            return self

        result = self
        if len(self.columns) != 1 or (
            len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
        ):
            result = self.transpose()
        result._shape_hint = "column"
        return result

    def is_series_like(self):
        """
        Check whether this QueryCompiler can represent ``modin.pandas.Series`` object.

        Returns
        -------
        bool
            Return True if QueryCompiler has a single column or row, False otherwise.
        """
        return len(self.columns) == 1 or len(self.index) == 1

    # END Abstract Transpose

    # Abstract reindex/reset_index (may shuffle data)
    @doc_utils.add_refer_to("DataFrame.reindex")
    def reindex(self, axis, labels, **kwargs):  # noqa: PR02
        """
        Align QueryCompiler data with a new index along specified axis.

        Parameters
        ----------
        axis : {0, 1}
            Axis to align labels along. 0 is for index, 1 is for columns.
        labels : list-like
            Index-labels to align with.
        method : {None, "backfill"/"bfill", "pad"/"ffill", "nearest"}
            Method to use for filling holes in reindexed frame.
        fill_value : scalar
            Value to use for missing values in the resulted frame.
        limit : int
        tolerance : int
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with aligned axis.
        """
        return DataFrameDefault.register(pandas.DataFrame.reindex)(
            self, axis=axis, labels=labels, **kwargs
        )

    @doc_utils.add_refer_to("DataFrame.reset_index")
    def reset_index(self, **kwargs):  # noqa: PR02
        """
        Reset the index, or a level of it.

        Parameters
        ----------
        drop : bool
            Whether to drop the reset index or insert it at the beginning of the frame.
        level : int or label, optional
            Level to remove from index. Removes all levels by default.
        col_level : int or label
            If the columns have multiple levels, determines which level the labels
            are inserted into.
        col_fill : label
            If the columns have multiple levels, determines how the other levels
            are named.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with reset index.
        """
        return DataFrameDefault.register(pandas.DataFrame.reset_index)(self, **kwargs)

    def set_index_from_columns(
        self, keys: List[Hashable], drop: bool = True, append: bool = False
    ):
        """
        Create new row labels from a list of columns.

        Parameters
        ----------
        keys : list of hashable
            The list of column names that will become the new index.
        drop : bool, default: True
            Whether or not to drop the columns provided in the `keys` argument.
        append : bool, default: True
            Whether or not to add the columns in `keys` as new levels appended to the
            existing index.

        Returns
        -------
        BaseQueryCompiler
            A new QueryCompiler with updated index.
        """
        return DataFrameDefault.register(pandas.DataFrame.set_index)(
            self, keys=keys, drop=drop, append=append
        )

    # END Abstract reindex/reset_index

    # Full Reduce operations
    #
    # These operations result in a reduced dimensionality of data.
    # Currently, this means a Pandas Series will be returned, but in the future
    # we will implement a Distributed Series, and this will be returned
    # instead.

    def is_monotonic_increasing(self):
        """
        Return boolean if values in the object are monotonically increasing.

        Returns
        -------
        bool
        """
        return SeriesDefault.register(pandas.Series.is_monotonic_increasing)(self)

    def is_monotonic_decreasing(self):
        """
        Return boolean if values in the object are monotonically decreasing.

        Returns
        -------
        bool
        """
        return SeriesDefault.register(pandas.Series.is_monotonic_decreasing)(self)

    @doc_utils.doc_reduce_agg(
        method="number of non-NaN values", refer_to="count", extra_params=["**kwargs"]
    )
    def count(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.count)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="maximum value", refer_to="max", extra_params=["skipna", "**kwargs"]
    )
    def max(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.max)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="mean value", refer_to="mean", extra_params=["skipna", "**kwargs"]
    )
    def mean(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.mean)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="minimum value", refer_to="min", extra_params=["skipna", "**kwargs"]
    )
    def min(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.min)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="production",
        refer_to="prod",
        extra_params=["**kwargs"],
        params="axis : {0, 1}",
    )
    def prod(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.prod)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="sum",
        refer_to="sum",
        extra_params=["**kwargs"],
        params="axis : {0, 1}",
    )
    def sum(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.sum)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.mask")
    def mask(self, cond, other, **kwargs):  # noqa: PR01
        """
        Replace values where the condition `cond` is True.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with elements replaced with ones from `other` where `cond` is True.
        """
        return DataFrameDefault.register(pandas.DataFrame.mask)(
            self, cond, other, **kwargs
        )

    @doc_utils.add_refer_to("DataFrame.pct_change")
    def pct_change(self, **kwargs):  # noqa: PR01
        """
        Percentage change between the current and a prior element.

        Returns
        -------
        BaseQueryCompiler
        """
        return DataFrameDefault.register(pandas.DataFrame.pct_change)(self, **kwargs)

    @doc_utils.add_refer_to("to_datetime")
    def to_datetime(self, *args, **kwargs):
        """
        Convert columns of the QueryCompiler to the datetime dtype.

        Parameters
        ----------
        *args : iterable
        **kwargs : dict

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with all columns converted to datetime dtype.
        """
        return SeriesDefault.register(pandas.to_datetime)(self, *args, **kwargs)

    # END Abstract full Reduce operations

    # Abstract map partitions operations
    # These operations are operations that apply a function to every partition.
    def abs(self):
        """
        Get absolute numeric value of each element.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with absolute numeric value of each element.
        """
        return DataFrameDefault.register(pandas.DataFrame.abs)(self)

    def map(self, func, *args, **kwargs):
        """
        Apply passed function elementwise.

        Parameters
        ----------
        func : callable(scalar) -> scalar
            Function to apply to each element of the QueryCompiler.
        *args : iterable
        **kwargs : dict

        Returns
        -------
        BaseQueryCompiler
            Transformed QueryCompiler.
        """
        return DataFrameDefault.register(pandas.DataFrame.map)(
            self, func, *args, **kwargs
        )

    # FIXME: `**kwargs` which follows `numpy.conj` signature was inherited
    # from ``PandasQueryCompiler``, we should get rid of this dependency.
    # (Modin issue #3108)
    def conj(self, **kwargs):
        """
        Get the complex conjugate for every element of self.

        Parameters
        ----------
        **kwargs : dict

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with conjugate applied element-wise.

        Notes
        -----
        Please refer to ``numpy.conj`` for parameters description.
        """

        def conj(df, *args, **kwargs):
            return pandas.DataFrame(np.conj(df))

        return DataFrameDefault.register(conj)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.interpolate")
    def interpolate(self, **kwargs):  # noqa: PR01
        """
        Fill NaN values using an interpolation method.

        Returns
        -------
        BaseQueryCompiler
            Returns the same object type as the caller, interpolated at some or all NaN values.
        """
        return DataFrameDefault.register(pandas.DataFrame.interpolate)(self, **kwargs)

    # FIXME:
    #   1. This function takes Modin Series and DataFrames via `values` parameter,
    #      we should avoid leaking of the high-level objects to the query compiler level.
    #      (Modin issue #3106)
    #   2. Spread **kwargs into actual arguments (Modin issue #3108).
    def isin(self, values, ignore_indices=False, **kwargs):  # noqa: PR02
        """
        Check for each element of `self` whether it's contained in passed `values`.

        Parameters
        ----------
        values : list-like, modin.pandas.Series, modin.pandas.DataFrame or dict
            Values to check elements of self in.
        ignore_indices : bool, default: False
            Whether to execute ``isin()`` only on an intersection of indices.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            Boolean mask for self of whether an element at the corresponding
            position is contained in `values`.
        """
        if isinstance(values, type(self)) and ignore_indices:
            # Pandas logic is that it ignores indexing if 'values' is a 1D object
            values = values.to_pandas().squeeze(axis=1)
        if self._shape_hint == "column":
            return SeriesDefault.register(pandas.Series.isin)(self, values, **kwargs)
        else:
            return DataFrameDefault.register(pandas.DataFrame.isin)(
                self, values, **kwargs
            )

    def isna(self):
        """
        Check for each element of self whether it's NaN.

        Returns
        -------
        BaseQueryCompiler
            Boolean mask for self of whether an element at the corresponding
            position is NaN.
        """
        return DataFrameDefault.register(pandas.DataFrame.isna)(self)

    # FIXME: this method is not supposed to take any parameters (Modin issue #3108).
    def negative(self, **kwargs):
        """
        Change the sign for every value of self.

        Parameters
        ----------
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler

        Notes
        -----
        Be aware, that all QueryCompiler values have to be numeric.
        """
        return DataFrameDefault.register(pandas.DataFrame.__neg__)(self, **kwargs)

    def notna(self):
        """
        Check for each element of `self` whether it's existing (non-missing) value.

        Returns
        -------
        BaseQueryCompiler
            Boolean mask for `self` of whether an element at the corresponding
            position is not NaN.
        """
        return DataFrameDefault.register(pandas.DataFrame.notna)(self)

    @doc_utils.add_refer_to("DataFrame.round")
    def round(self, **kwargs):  # noqa: PR02
        """
        Round every numeric value up to specified number of decimals.

        Parameters
        ----------
        decimals : int or list-like
            Number of decimals to round each column to.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with rounded values.
        """
        return DataFrameDefault.register(pandas.DataFrame.round)(self, **kwargs)

    # FIXME:
    #   1. high-level objects leaks to the query compiler (Modin issue #3106).
    #   2. remove `inplace` parameter.
    @doc_utils.add_refer_to("DataFrame.replace")
    def replace(self, **kwargs):  # noqa: PR02
        """
        Replace values given in `to_replace` by `value`.

        Parameters
        ----------
        to_replace : scalar, list-like, regex, modin.pandas.Series, or None
        value : scalar, list-like, regex or dict
        inplace : {False}
            This parameter serves the compatibility purpose. Always has to be False.
        limit : int or None
        regex : bool or same types as `to_replace`
        method : {"pad", "ffill", "bfill", None}
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with all `to_replace` values replaced by `value`.
        """
        return DataFrameDefault.register(pandas.DataFrame.replace)(self, **kwargs)

    @doc_utils.add_refer_to("Series.argsort")
    def argsort(self, **kwargs):  # noqa: PR02
        """
        Return the integer indices that would sort the Series values.

        Override ndarray.argsort. Argsorts the value, omitting NA/null values,
        and places the result in the same locations as the non-NA values.

        Parameters
        ----------
        axis : {0 or 'index'}
            Unused. Parameter needed for compatibility with DataFrame.
        kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
            Choice of sorting algorithm. See :func:`numpy.sort` for more
            information. 'mergesort' and 'stable' are the only stable algorithms.
        order : None
            Has no effect but is accepted for compatibility with NumPy.
        **kwargs : dict
            Serves compatibility purposes.

        Returns
        -------
        BaseQueryCompiler
            One-column QueryCompiler with positions of values within the
            sort order with -1 indicating nan values.
        """
        return SeriesDefault.register(pandas.Series.argsort)(self, **kwargs)

    @doc_utils.add_one_column_warning
    # FIXME: adding refer-to note will create two instances of the "Notes" section,
    # this breaks numpydoc style rules and also crashes the doc-style checker script.
    # For now manually added the refer-to message.
    # @doc_utils.add_refer_to("Series.view")
    def series_view(self, **kwargs):  # noqa: PR02
        """
        Reinterpret underlying data with new dtype.

        Parameters
        ----------
        dtype : dtype
            Data type to reinterpret underlying data with.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler of the same data in memory, with reinterpreted values.

        Notes
        -----
            - Be aware, that if this method do fallback to pandas, then newly created
              QueryCompiler will be the copy of the original data.
            - Please refer to ``modin.pandas.Series.view`` for more information
              about parameters and output format.
        """
        return SeriesDefault.register(pandas.Series.view)(self, **kwargs)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("to_numeric")
    def to_numeric(self, *args, **kwargs):  # noqa: PR02
        """
        Convert underlying data to numeric dtype.

        Parameters
        ----------
        errors : {"ignore", "raise", "coerce"}
        downcast : {"integer", "signed", "unsigned", "float", None}
        *args : iterable
            Serves the compatibility purpose. Does not affect the result.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with converted to numeric values.
        """
        return SeriesDefault.register(pandas.to_numeric)(self, *args, **kwargs)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("to_timedelta")
    def to_timedelta(self, unit="ns", errors="raise"):  # noqa: PR02
        """
        Convert argument to timedelta.

        Parameters
        ----------
        unit : str, default: "ns"
            Denotes the unit of the arg for numeric arg. Defaults to "ns".
        errors : {"ignore", "raise", "coerce"}, default: "raise"

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with converted to timedelta values.
        """
        return SeriesDefault.register(pandas.to_timedelta)(
            self, unit=unit, errors=errors
        )

    # 'qc.unique()' uses most of the arguments from 'df.drop_duplicates()', so refering to this method
    @doc_utils.add_refer_to("DataFrame.drop_duplicates")
    def unique(self, keep="first", ignore_index=True, subset=None):
        """
        Get unique rows of `self`.

        Parameters
        ----------
        keep : {"first", "last", False}, default: "first"
            Which duplicates to keep.
        ignore_index : bool, default: True
            If ``True``, the resulting axis will be labeled ``0, 1, …, n - 1``.
        subset : list, optional
            Only consider certain columns for identifying duplicates, if `None`, use all of the columns.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with unique values.
        """
        if subset is not None:
            mask = self.getitem_column_array(subset, ignore_order=True)
        else:
            mask = self
        without_duplicates = self.getitem_array(mask.duplicated(keep=keep).invert())
        if ignore_index:
            without_duplicates = without_duplicates.reset_index(drop=True)
        return without_duplicates

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.searchsorted")
    def searchsorted(self, **kwargs):  # noqa: PR02
        """
        Find positions in a sorted `self` where `value` should be inserted to maintain order.

        Parameters
        ----------
        value : list-like
        side : {"left", "right"}
        sorter : list-like, optional
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            One-column QueryCompiler which contains indices to insert.
        """
        return SeriesDefault.register(pandas.Series.searchsorted)(self, **kwargs)

    # END Abstract map partitions operations

    @doc_utils.add_refer_to("DataFrame.stack")
    def stack(self, level, dropna, sort):
        """
        Stack the prescribed level(s) from columns to index.

        Parameters
        ----------
        level : int or label
        dropna : bool
        sort : bool

        Returns
        -------
        BaseQueryCompiler
        """
        return DataFrameDefault.register(pandas.DataFrame.stack)(
            self,
            level=level,
            dropna=dropna,
            sort=sort,
        )

    # Abstract map partitions across select indices
    def astype(self, col_dtypes, errors: str = "raise"):  # noqa: PR02
        """
        Convert columns dtypes to given dtypes.

        Parameters
        ----------
        col_dtypes : dict or str
            Map for column names and new dtypes.
        errors : {'raise', 'ignore'}, default: 'raise'
            Control raising of exceptions on invalid data for provided dtype.
            - raise : allow exceptions to be raised
            - ignore : suppress exceptions. On error return original object.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with updated dtypes.
        """
        return DataFrameDefault.register(pandas.DataFrame.astype)(
            self, dtype=col_dtypes, errors=errors
        )

    def infer_objects(self):
        """
        Attempt to infer better dtypes for object columns.

        Attempts soft conversion of object-dtyped columns, leaving non-object
        and unconvertible columns unchanged. The inference rules are the same
        as during normal Series/DataFrame construction.

        Returns
        -------
        BaseQueryCompiler
            New query compiler with udpated dtypes.
        """
        return DataFrameDefault.register(pandas.DataFrame.infer_objects)(self)

    def convert_dtypes(
        self,
        infer_objects: bool = True,
        convert_string: bool = True,
        convert_integer: bool = True,
        convert_boolean: bool = True,
        convert_floating: bool = True,
        dtype_backend: DtypeBackend = "numpy_nullable",
    ):
        """
        Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.

        Parameters
        ----------
        infer_objects : bool, default: True
            Whether object dtypes should be converted to the best possible types.
        convert_string : bool, default: True
            Whether object dtypes should be converted to ``pd.StringDtype()``.
        convert_integer : bool, default: True
            Whether, if possbile, conversion should be done to integer extension types.
        convert_boolean : bool, default: True
            Whether object dtypes should be converted to ``pd.BooleanDtype()``.
        convert_floating : bool, default: True
            Whether, if possible, conversion can be done to floating extension types.
            If `convert_integer` is also True, preference will be give to integer dtypes
            if the floats can be faithfully casted to integers.
        dtype_backend : {"numpy_nullable", "pyarrow"}, default: "numpy_nullable"
            Which dtype_backend to use, e.g. whether a DataFrame should use nullable
            dtypes for all dtypes that have a nullable
            implementation when "numpy_nullable" is set, PyArrow is used for all
            dtypes if "pyarrow" is set.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with updated dtypes.
        """
        return DataFrameDefault.register(pandas.DataFrame.convert_dtypes)(
            self,
            infer_objects=infer_objects,
            convert_string=convert_string,
            convert_integer=convert_integer,
            convert_boolean=convert_boolean,
            convert_floating=convert_floating,
            dtype_backend=dtype_backend,
        )

    @property
    def dtypes(self):
        """
        Get columns dtypes.

        Returns
        -------
        pandas.Series
            Series with dtypes of each column.
        """
        return self.to_pandas().dtypes

    # END Abstract map partitions across select indices

    # Abstract column/row partitions reduce operations
    #
    # These operations result in a reduced dimensionality of data.
    # Currently, this means a Pandas Series will be returned, but in the future
    # we will implement a Distributed Series, and this will be returned
    # instead.

    # FIXME: we're handling level parameter at front-end, it shouldn't
    # propagate to the query compiler (Modin issue #3102)
    @doc_utils.add_refer_to("DataFrame.all")
    def all(self, **kwargs):  # noqa: PR02
        """
        Return whether all the elements are true, potentially over an axis.

        Parameters
        ----------
        axis : {0, 1}, optional
        bool_only : bool, optional
        skipna : bool
        level : int or label
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            If axis was specified return one-column QueryCompiler with index labels
            of the specified axis, where each row contains boolean of whether all elements
            at the corresponding row or column are True. Otherwise return QueryCompiler
            with a single bool of whether all elements are True.
        """
        return DataFrameDefault.register(pandas.DataFrame.all)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.any")
    def any(self, **kwargs):  # noqa: PR02
        """
        Return whether any element is true, potentially over an axis.

        Parameters
        ----------
        axis : {0, 1}, optional
        bool_only : bool, optional
        skipna : bool
        level : int or label
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            If axis was specified return one-column QueryCompiler with index labels
            of the specified axis, where each row contains boolean of whether any element
            at the corresponding row or column is True. Otherwise return QueryCompiler
            with a single bool of whether any element is True.
        """
        return DataFrameDefault.register(pandas.DataFrame.any)(self, **kwargs)

    def first_valid_index(self):
        """
        Return index label of first non-NaN/NULL value.

        Returns
        -------
        scalar
        """
        return (
            DataFrameDefault.register(pandas.DataFrame.first_valid_index)(self)
            .to_pandas()
            .squeeze()
        )

    @doc_utils.add_refer_to("DataFrame.idxmax")
    def idxmax(self, **kwargs):  # noqa: PR02
        """
        Get position of the first occurrence of the maximum for each row or column.

        Parameters
        ----------
        axis : {0, 1}
        skipna : bool
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            One-column QueryCompiler with index labels of the specified axis,
            where each row contains position of the maximum element for the
            corresponding row or column.
        """
        return DataFrameDefault.register(pandas.DataFrame.idxmax)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.idxmin")
    def idxmin(self, **kwargs):  # noqa: PR02
        """
        Get position of the first occurrence of the minimum for each row or column.

        Parameters
        ----------
        axis : {0, 1}
        skipna : bool
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            One-column QueryCompiler with index labels of the specified axis,
            where each row contains position of the minimum element for the
            corresponding row or column.
        """
        return DataFrameDefault.register(pandas.DataFrame.idxmin)(self, **kwargs)

    def last_valid_index(self):
        """
        Return index label of last non-NaN/NULL value.

        Returns
        -------
        scalar
        """
        return (
            DataFrameDefault.register(pandas.DataFrame.last_valid_index)(self)
            .to_pandas()
            .squeeze()
        )

    @doc_utils.doc_reduce_agg(
        method="median value", refer_to="median", extra_params=["skipna", "**kwargs"]
    )
    def median(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.median)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.memory_usage")
    def memory_usage(self, **kwargs):  # noqa: PR02
        """
        Return the memory usage of each column in bytes.

        Parameters
        ----------
        index : bool
        deep : bool
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            One-column QueryCompiler with index labels of `self`, where each row
            contains the memory usage for the corresponding column.
        """
        return DataFrameDefault.register(pandas.DataFrame.memory_usage)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.sizeof")
    def sizeof(self):
        """
        Compute the total memory usage for `self`.

        Returns
        -------
        BaseQueryCompiler
            Result that holds either a value or Series of values.
        """
        return DataFrameDefault.register(pandas.DataFrame.__sizeof__)(self)

    @doc_utils.doc_reduce_agg(
        method="number of unique values",
        refer_to="nunique",
        params="""
        axis : {0, 1}
        dropna : bool""",
        extra_params=["**kwargs"],
    )
    def nunique(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.nunique)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="value at the given quantile",
        refer_to="quantile",
        params="""
        q : float
        axis : {0, 1}
        numeric_only : bool
        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}""",
        extra_params=["**kwargs"],
    )
    def quantile_for_single_value(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.quantile)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="unbiased skew", refer_to="skew", extra_params=["skipna", "**kwargs"]
    )
    def skew(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.skew)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="standard deviation of the mean",
        refer_to="sem",
        extra_params=["skipna", "ddof", "**kwargs"],
    )
    def sem(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.sem)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="standard deviation",
        refer_to="std",
        extra_params=["skipna", "ddof", "**kwargs"],
    )
    def std(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.std)(self, **kwargs)

    @doc_utils.doc_reduce_agg(
        method="variance", refer_to="var", extra_params=["skipna", "ddof", "**kwargs"]
    )
    def var(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.var)(self, **kwargs)

    # END Abstract column/row partitions reduce operations

    @doc_utils.add_refer_to("DataFrame.describe")
    def describe(self, percentiles: np.ndarray):
        """
        Generate descriptive statistics.

        Parameters
        ----------
        percentiles : list-like

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler object containing the descriptive statistics
            of the underlying data.
        """
        return DataFrameDefault.register(pandas.DataFrame.describe)(
            self,
            percentiles=percentiles,
            include="all",
        )

    # Map across rows/columns
    # These operations require some global knowledge of the full column/row
    # that is being operated on. This means that we have to put all of that
    # data in the same place.

    @doc_utils.doc_cum_agg(method="sum", refer_to="cumsum")
    def cumsum(self, fold_axis, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.cumsum)(self, **kwargs)

    @doc_utils.doc_cum_agg(method="maximum", refer_to="cummax")
    def cummax(self, fold_axis, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.cummax)(self, **kwargs)

    @doc_utils.doc_cum_agg(method="minimum", refer_to="cummin")
    def cummin(self, fold_axis, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.cummin)(self, **kwargs)

    @doc_utils.doc_cum_agg(method="product", refer_to="cumprod")
    def cumprod(self, fold_axis, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.cumprod)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.diff")
    def diff(self, **kwargs):  # noqa: PR02
        """
        First discrete difference of element.

        Parameters
        ----------
        periods : int
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler of the same shape as `self`, where each element is the difference
            between the corresponding value and the previous value in this row or column.
        """
        return DataFrameDefault.register(pandas.DataFrame.diff)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.dropna")
    def dropna(self, **kwargs):  # noqa: PR02
        """
        Remove missing values.

        Parameters
        ----------
        axis : {0, 1}
        how : {"any", "all"}
        thresh : int, optional
        subset : list of labels
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with null values dropped along given axis.
        """
        return DataFrameDefault.register(pandas.DataFrame.dropna)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.duplicated")
    def duplicated(self, **kwargs):
        """
        Return boolean Series denoting duplicate rows.

        Parameters
        ----------
        **kwargs : dict
            Additional keyword arguments to be passed in to `pandas.DataFrame.duplicated`.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing boolean Series denoting duplicate rows.
        """
        return DataFrameDefault.register(pandas.DataFrame.duplicated)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.nlargest")
    def nlargest(self, n=5, columns=None, keep="first"):
        """
        Return the first `n` rows ordered by `columns` in descending order.

        Parameters
        ----------
        n : int, default: 5
        columns : list of labels, optional
            Column labels to order by.
            (note: this parameter can be omitted only for a single-column query compilers
            representing Series object, otherwise `columns` has to be specified).
        keep : {"first", "last", "all"}, default: "first"

        Returns
        -------
        BaseQueryCompiler
        """
        if columns is None:
            return SeriesDefault.register(pandas.Series.nlargest)(self, n=n, keep=keep)
        else:
            return DataFrameDefault.register(pandas.DataFrame.nlargest)(
                self, n=n, columns=columns, keep=keep
            )

    @doc_utils.add_refer_to("DataFrame.nsmallest")
    def nsmallest(self, n=5, columns=None, keep="first"):
        """
        Return the first `n` rows ordered by `columns` in ascending order.

        Parameters
        ----------
        n : int, default: 5
        columns : list of labels, optional
            Column labels to order by.
            (note: this parameter can be omitted only for a single-column query compilers
            representing Series object, otherwise `columns` has to be specified).
        keep : {"first", "last", "all"}, default: "first"

        Returns
        -------
        BaseQueryCompiler
        """
        if columns is None:
            return SeriesDefault.register(pandas.Series.nsmallest)(self, n=n, keep=keep)
        else:
            return DataFrameDefault.register(pandas.DataFrame.nsmallest)(
                self, n=n, columns=columns, keep=keep
            )

    @doc_utils.add_refer_to("DataFrame.query")
    def rowwise_query(self, expr, **kwargs):
        """
        Query columns of the QueryCompiler with a boolean expression row-wise.

        Parameters
        ----------
        expr : str
        **kwargs : dict

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing the rows where the boolean expression is satisfied.
        """
        raise NotImplementedError(
            "Row-wise queries execution is not implemented for the selected backend."
        )

    @doc_utils.add_refer_to("DataFrame.eval")
    def eval(self, expr, **kwargs):
        """
        Evaluate string expression on QueryCompiler columns.

        Parameters
        ----------
        expr : str
        **kwargs : dict

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing the result of evaluation.
        """
        return DataFrameDefault.register(pandas.DataFrame.eval)(
            self, expr=expr, **kwargs
        )

    @doc_utils.add_refer_to("DataFrame.mode")
    def mode(self, **kwargs):  # noqa: PR02
        """
        Get the modes for every column or row.

        Parameters
        ----------
        axis : {0, 1}
        numeric_only : bool
        dropna : bool
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with modes calculated along given axis.
        """
        return DataFrameDefault.register(pandas.DataFrame.mode)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.fillna")
    def fillna(self, **kwargs):  # noqa: PR02
        """
        Replace NaN values using provided method.

        Parameters
        ----------
        value : scalar or dict
        method : {"backfill", "bfill", "pad", "ffill", None}
        axis : {0, 1}
        inplace : {False}
            This parameter serves the compatibility purpose. Always has to be False.
        limit : int, optional
        downcast : dict, optional
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with all null values filled.
        """
        squeeze_self = kwargs.pop("squeeze_self", False)
        squeeze_value = kwargs.pop("squeeze_value", False)

        def fillna(df, value, **kwargs):
            if squeeze_self:
                df = df.squeeze(axis=1)
            if squeeze_value:
                value = value.squeeze(axis=1)
            return df.fillna(value, **kwargs)

        return DataFrameDefault.register(fillna)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.rank")
    def rank(self, **kwargs):  # noqa: PR02
        """
        Compute numerical rank along the specified axis.

        By default, equal values are assigned a rank that is the average of the ranks
        of those values, this behavior can be changed via `method` parameter.

        Parameters
        ----------
        axis : {0, 1}
        method : {"average", "min", "max", "first", "dense"}
        numeric_only : bool
        na_option : {"keep", "top", "bottom"}
        ascending : bool
        pct : bool
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler of the same shape as `self`, where each element is the
            numerical rank of the corresponding value along row or column.
        """
        return DataFrameDefault.register(pandas.DataFrame.rank)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.sort_index")
    def sort_index(self, **kwargs):  # noqa: PR02
        """
        Sort data by index or column labels.

        Parameters
        ----------
        axis : {0, 1}
        level : int, label or list of such
        ascending : bool
        inplace : bool
        kind : {"quicksort", "mergesort", "heapsort"}
        na_position : {"first", "last"}
        sort_remaining : bool
        ignore_index : bool
        key : callable(pandas.Index) -> pandas.Index, optional
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing the data sorted by columns or indices.
        """
        return DataFrameDefault.register(pandas.DataFrame.sort_index)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.melt")
    def melt(self, *args, **kwargs):  # noqa: PR02
        """
        Unpivot QueryCompiler data from wide to long format.

        Parameters
        ----------
        id_vars : list of labels, optional
        value_vars : list of labels, optional
        var_name : label
        value_name : label
        col_level : int or label
        ignore_index : bool
        *args : iterable
            Serves the compatibility purpose. Does not affect the result.
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with unpivoted data.
        """
        return DataFrameDefault.register(pandas.DataFrame.melt)(self, *args, **kwargs)

    @doc_utils.add_refer_to("DataFrame.sort_values")
    def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):  # noqa: PR02
        """
        Reorder the columns based on the lexicographic order of the given rows.

        Parameters
        ----------
        rows : label or list of labels
            The row or rows to sort by.
        ascending : bool, default: True
            Sort in ascending order (True) or descending order (False).
        kind : {"quicksort", "mergesort", "heapsort"}
        na_position : {"first", "last"}
        ignore_index : bool
        key : callable(pandas.Index) -> pandas.Index, optional
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler that contains result of the sort.
        """
        return DataFrameDefault.register(pandas.DataFrame.sort_values)(
            self, by=rows, axis=1, ascending=ascending, **kwargs
        )

    @doc_utils.add_refer_to("DataFrame.sort_values")
    def sort_rows_by_column_values(
        self, columns, ascending=True, **kwargs
    ):  # noqa: PR02
        """
        Reorder the rows based on the lexicographic order of the given columns.

        Parameters
        ----------
        columns : label or list of labels
            The column or columns to sort by.
        ascending : bool, default: True
            Sort in ascending order (True) or descending order (False).
        kind : {"quicksort", "mergesort", "heapsort"}
        na_position : {"first", "last"}
        ignore_index : bool
        key : callable(pandas.Index) -> pandas.Index, optional
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler that contains result of the sort.
        """
        # Avoid index/column name collisions by renaming and restoring after sorting
        index_renaming = None
        if is_scalar(columns):
            columns = [columns]
        if any(name in columns for name in self.index.names):
            index_renaming = self.index.names
            self.index = self.index.set_names([None] * len(self.index.names))
        new_query_compiler = DataFrameDefault.register(pandas.DataFrame.sort_values)(
            self, by=columns, axis=0, ascending=ascending, **kwargs
        )
        if index_renaming is not None:
            new_query_compiler.index = new_query_compiler.index.set_names(
                index_renaming
            )
        return new_query_compiler

    # END Abstract map across rows/columns

    # Map across rows/columns
    # These operations require some global knowledge of the full column/row
    # that is being operated on. This means that we have to put all of that
    # data in the same place.
    @doc_utils.doc_reduce_agg(
        method="value at the given quantile",
        refer_to="quantile",
        params="""
        q : list-like
        axis : {0, 1}
        numeric_only : bool
        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}""",
        extra_params=["**kwargs"],
    )
    def quantile_for_list_of_values(self, **kwargs):  # noqa: PR02
        return DataFrameDefault.register(pandas.DataFrame.quantile)(self, **kwargs)

    # END Abstract map across rows/columns

    # Abstract __getitem__ methods
    def getitem_array(self, key):
        """
        Mask QueryCompiler with `key`.

        Parameters
        ----------
        key : BaseQueryCompiler, np.ndarray or list of column labels
            Boolean mask represented by QueryCompiler or ``np.ndarray`` of the same
            shape as `self`, or enumerable of columns to pick.

        Returns
        -------
        BaseQueryCompiler
            New masked QueryCompiler.
        """
        if isinstance(key, type(self)):
            key = key.to_pandas().squeeze(axis=1)

        def getitem_array(df, key):
            return df[key]

        return DataFrameDefault.register(getitem_array)(self, key)

    def getitem_column_array(self, key, numeric=False, ignore_order=False):
        """
        Get column data for target labels.

        Parameters
        ----------
        key : list-like
            Target labels by which to retrieve data.
        numeric : bool, default: False
            Whether or not the key passed in represents the numeric index
            or the named index.
        ignore_order : bool, default: False
            Allow returning columns in an arbitrary order for the sake of performance.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler that contains specified columns.
        """

        def get_column(df, key):
            if numeric:
                return df.iloc[:, key]
            else:
                return df[key]

        return DataFrameDefault.register(get_column)(self, key=key)

    def getitem_row_array(self, key):
        """
        Get row data for target indices.

        Parameters
        ----------
        key : list-like
            Numeric indices of the rows to pick.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler that contains specified rows.
        """

        def get_row(df, key):
            return df.iloc[key]

        return DataFrameDefault.register(get_row)(self, key=key)

    def lookup(self, row_labels, col_labels):  # noqa: PR01, RT01, D200
        """
        Label-based "fancy indexing" function for ``DataFrame``.
        """
        return self.default_to_pandas(pandas.DataFrame.lookup, row_labels, col_labels)

    # END Abstract __getitem__ methods

    # Abstract insert
    # This method changes the shape of the resulting data. In Pandas, this
    # operation is always inplace, but this object is immutable, so we just
    # return a new one from here and let the front end handle the inplace
    # update.
    def insert(self, loc, column, value):
        """
        Insert new column.

        Parameters
        ----------
        loc : int
            Insertion position.
        column : label
            Label of the new column.
        value : One-column BaseQueryCompiler, 1D array or scalar
            Data to fill new column with.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler with new column inserted.
        """

        def inserter(df, loc, column, value):
            if isinstance(value, pandas.DataFrame):
                value = value.squeeze(axis=1)
            df.insert(loc, column, value)
            return df

        return DataFrameDefault.register(inserter, inplace=True)(
            self, loc=loc, column=column, value=value
        )

    # END Abstract insert

    # __setitem__ methods
    def setitem_bool(self, row_loc, col_loc, item):
        """
        Set an item to the given location based on `row_loc` and `col_loc`.

        Parameters
        ----------
        row_loc : BaseQueryCompiler
            Query Compiler holding a Series of booleans.
        col_loc : label
            Column label in `self`.
        item : scalar
            An item to be set.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with the inserted item.

        Notes
        -----
        Currently, this method is only used to set a scalar to the given location.
        """

        def _set_item(df, row_loc, col_loc, item):
            df.loc[row_loc.squeeze(axis=1), col_loc] = item
            return df

        return DataFrameDefault.register(_set_item)(
            self, row_loc=row_loc, col_loc=col_loc, item=item
        )

    # END __setitem__ methods

    # Abstract drop
    def drop(self, index=None, columns=None, errors: str = "raise"):
        """
        Drop specified rows or columns.

        Parameters
        ----------
        index : list of labels, optional
            Labels of rows to drop.
        columns : list of labels, optional
            Labels of columns to drop.
        errors : str, default: "raise"
            If 'ignore', suppress error and only existing labels are dropped.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with removed data.
        """
        if index is None and columns is None:
            return self
        else:
            return DataFrameDefault.register(pandas.DataFrame.drop)(
                self, index=index, columns=columns, errors=errors
            )

    # END drop

    # UDF (apply and agg) methods
    # There is a wide range of behaviors that are supported, so a lot of the
    # logic can get a bit convoluted.
    def apply(self, func, axis, raw=False, result_type=None, *args, **kwargs):
        """
        Apply passed function across given axis.

        Parameters
        ----------
        func : callable(pandas.Series) -> scalar, str, list or dict of such
            The function to apply to each column or row.
        axis : {0, 1}
            Target axis to apply the function along.
            0 is for index, 1 is for columns.
        raw : bool, default: False
            Whether to pass a high-level Series object (False) or a raw representation
            of the data (True).
        result_type : {"expand", "reduce", "broadcast", None}, default: None
            Determines how to treat list-like return type of the `func` (works only if
            a single function was passed):

            - "expand": expand list-like result into columns.
            - "reduce": keep result into a single cell (opposite of "expand").
            - "broadcast": broadcast result to original data shape (overwrite the existing column/row with the function result).
            - None: use "expand" strategy if Series is returned, "reduce" otherwise.
        *args : iterable
            Positional arguments to pass to `func`.
        **kwargs : dict
            Keyword arguments to pass to `func`.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler that contains the results of execution and is built by
            the following rules:

            - Index of the specified axis contains: the names of the passed functions if multiple
              functions are passed, otherwise: indices of the `func` result if "expand" strategy
              is used, indices of the original frame if "broadcast" strategy is used, a single
              label `MODIN_UNNAMED_SERIES_LABEL` if "reduce" strategy is used.
            - Labels of the opposite axis are preserved.
            - Each element is the result of execution of `func` against
              corresponding row/column.
        """
        return DataFrameDefault.register(pandas.DataFrame.apply)(
            self,
            func=func,
            axis=axis,
            raw=raw,
            result_type=result_type,
            *args,
            **kwargs,
        )

    def apply_on_series(self, func, *args, **kwargs):
        """
        Apply passed function on underlying Series.

        Parameters
        ----------
        func : callable(pandas.Series) -> scalar, str, list or dict of such
            The function to apply to each row.
        *args : iterable
            Positional arguments to pass to `func`.
        **kwargs : dict
            Keyword arguments to pass to `func`.

        Returns
        -------
        BaseQueryCompiler
        """
        assert self.is_series_like()

        return SeriesDefault.register(pandas.Series.apply)(
            self,
            func=func,
            *args,
            **kwargs,
        )

    def explode(self, column):
        """
        Explode the given columns.

        Parameters
        ----------
        column : Union[Hashable, Sequence[Hashable]]
            The columns to explode.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler that contains the results of execution. For each row
            in the input QueryCompiler, if the selected columns each contain M
            items, there will be M rows created by exploding the columns.
        """
        return DataFrameDefault.register(pandas.DataFrame.explode)(self, column)

    # END UDF

    # Manual Partitioning methods (e.g. merge, groupby)
    # These methods require some sort of manual partitioning due to their
    # nature. They require certain data to exist on the same partition, and
    # after the shuffle, there should be only a local map required.

    # FIXME: `map_args` and `reduce_args` leaked there from `PandasQueryCompiler.groupby_*`,
    # pandas storage format implements groupby via TreeReduce approach, but for other storage formats these
    # parameters make no sense, they shouldn't be present in a base class.

    @doc_utils.doc_groupby_method(
        action="count non-null values",
        result="number of non-null values",
        refer_to="count",
    )
    def groupby_count(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.count)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="check whether any element is True",
        result="boolean of whether there is any element which is True",
        refer_to="any",
    )
    def groupby_any(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.any)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get the index of the minimum value",
        result="index of minimum value",
        refer_to="idxmin",
    )
    def groupby_idxmin(
        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.idxmin)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get the index of the maximum value",
        result="index of maximum value",
        refer_to="idxmax",
    )
    def groupby_idxmax(
        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.idxmax)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get the minimum value", result="minimum value", refer_to="min"
    )
    def groupby_min(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.min)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(result="product", refer_to="prod")
    def groupby_prod(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.prod)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get the maximum value", result="maximum value", refer_to="max"
    )
    def groupby_max(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.max)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="check whether all elements are True",
        result="boolean of whether all elements are True",
        refer_to="all",
    )
    def groupby_all(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.all)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(result="sum", refer_to="sum")
    def groupby_sum(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.sum)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get the number of elements",
        result="number of elements",
        refer_to="size",
    )
    def groupby_size(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        result = GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.size)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
            method="size",
        )
        if not groupby_kwargs.get("as_index", False):
            # Renaming 'MODIN_UNNAMED_SERIES_LABEL' to a proper name
            result.columns = result.columns[:-1].append(pandas.Index(["size"]))
        return result

    @doc_utils.add_refer_to("GroupBy.rolling")
    def groupby_rolling(
        self,
        by,
        agg_func,
        axis,
        groupby_kwargs,
        rolling_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        """
        Group QueryCompiler data and apply passed aggregation function to a rolling window in each group.

        Parameters
        ----------
        by : BaseQueryCompiler, column or index label, Grouper or list of such
            Object that determine groups.
        agg_func : str, dict or callable(Series | DataFrame) -> scalar | Series | DataFrame
            Function to apply to the GroupBy object.
        axis : {0, 1}
            Axis to group and apply aggregation function along.
            0 is for index, when 1 is for columns.
        groupby_kwargs : dict
            GroupBy parameters as expected by ``modin.pandas.DataFrame.groupby`` signature.
        rolling_kwargs : dict
            Parameters to build a rolling window as expected by ``modin.pandas.window.RollingGroupby`` signature.
        agg_args : list-like
            Positional arguments to pass to the `agg_func`.
        agg_kwargs : dict
            Key arguments to pass to the `agg_func`.
        drop : bool, default: False
            If `by` is a QueryCompiler indicates whether or not by-data came
            from the `self`.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing the result of groupby aggregation.
        """
        if isinstance(agg_func, str):
            str_func = agg_func

            def agg_func(window, *args, **kwargs):
                return getattr(window, str_func)(*args, **kwargs)

        else:
            assert callable(agg_func)
        return self.groupby_agg(
            by=by,
            agg_func=lambda grp, *args, **kwargs: agg_func(
                grp.rolling(**rolling_kwargs), *args, **kwargs
            ),
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            how="direct",
            drop=drop,
        )

    @doc_utils.add_refer_to("GroupBy.aggregate")
    def groupby_agg(
        self,
        by,
        agg_func,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        how="axis_wise",
        drop=False,
        series_groupby=False,
    ):
        """
        Group QueryCompiler data and apply passed aggregation function.

        Parameters
        ----------
        by : BaseQueryCompiler, column or index label, Grouper or list of such
            Object that determine groups.
        agg_func : str, dict or callable(Series | DataFrame) -> scalar | Series | DataFrame
            Function to apply to the GroupBy object.
        axis : {0, 1}
            Axis to group and apply aggregation function along.
            0 is for index, when 1 is for columns.
        groupby_kwargs : dict
            GroupBy parameters as expected by ``modin.pandas.DataFrame.groupby`` signature.
        agg_args : list-like
            Positional arguments to pass to the `agg_func`.
        agg_kwargs : dict
            Key arguments to pass to the `agg_func`.
        how : {'axis_wise', 'group_wise', 'transform'}, default: 'axis_wise'
            How to apply passed `agg_func`:
                - 'axis_wise': apply the function against each row/column.
                - 'group_wise': apply the function against every group.
                - 'transform': apply the function against every group and broadcast
                  the result to the original Query Compiler shape.
        drop : bool, default: False
            If `by` is a QueryCompiler indicates whether or not by-data came
            from the `self`.
        series_groupby : bool, default: False
            Whether we should treat `self` as Series when performing groupby.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing the result of groupby aggregation.
        """
        if isinstance(by, type(self)) and len(by.columns) == 1:
            by = by.columns[0] if drop else by.to_pandas().squeeze()
        # converting QC 'by' to a list of column labels only if this 'by' comes from the self (if drop is True)
        elif drop and isinstance(by, type(self)):
            by = list(by.columns)

        defaulter = SeriesGroupByDefault if series_groupby else GroupByDefault
        return defaulter.register(defaulter.get_aggregation_method(how))(
            self,
            by=by,
            agg_func=agg_func,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute the mean value", result="mean value", refer_to="mean"
    )
    def groupby_mean(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="mean",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute unbiased skew", result="unbiased skew", refer_to="skew"
    )
    def groupby_skew(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        if axis == 1:
            # To avoid `ValueError: Operation skew does not support axis=1` due to the
            # difference in the behavior of `groupby(...).skew(axis=1)` and
            # `groupby(...).agg("skew", axis=1)`.
            return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.skew)(
                self,
                by=by,
                axis=axis,
                groupby_kwargs=groupby_kwargs,
                agg_args=agg_args,
                agg_kwargs=agg_kwargs,
                drop=drop,
            )
        return self.groupby_agg(
            by=by,
            agg_func="skew",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute cumulative count",
        result="count of all the previous values",
        refer_to="cumcount",
    )
    def groupby_cumcount(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="cumcount",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute cumulative sum",
        result="sum of all the previous values",
        refer_to="cumsum",
    )
    def groupby_cumsum(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="cumsum",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get cumulative maximum",
        result="maximum of all the previous values",
        refer_to="cummax",
    )
    def groupby_cummax(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="cummax",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get cumulative minimum",
        result="minimum of all the previous values",
        refer_to="cummin",
    )
    def groupby_cummin(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="cummin",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get cumulative production",
        result="production of all the previous values",
        refer_to="cumprod",
    )
    def groupby_cumprod(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="cumprod",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute standard deviation", result="standard deviation", refer_to="std"
    )
    def groupby_std(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="std",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute standard error", result="standard error", refer_to="sem"
    )
    def groupby_sem(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="sem",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute numerical rank", result="numerical rank", refer_to="rank"
    )
    def groupby_rank(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="rank",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute variance", result="variance", refer_to="var"
    )
    def groupby_var(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="var",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute correlation", result="correlation", refer_to="corr"
    )
    def groupby_corr(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="corr",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute covariance", result="covariance", refer_to="cov"
    )
    def groupby_cov(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="cov",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get the number of unique values",
        result="number of unique values",
        refer_to="nunique",
    )
    def groupby_nunique(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="nunique",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get the median value", result="median value", refer_to="median"
    )
    def groupby_median(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="median",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="compute specified quantile",
        result="quantile value",
        refer_to="quantile",
    )
    def groupby_quantile(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="quantile",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="fill NaN values",
        result="`fill_value` if it was NaN, original value otherwise",
        refer_to="fillna",
    )
    def groupby_fillna(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="fillna",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    def groupby_diff(
        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False
    ):  # noqa: GL08
        return self.groupby_agg(
            by=by,
            agg_func="diff",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    def groupby_pct_change(
        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False
    ):  # noqa: GL08
        return self.groupby_agg(
            by=by,
            agg_func="pct_change",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get data types", result="data type", refer_to="dtypes"
    )
    def groupby_dtypes(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="dtypes",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="construct DataFrame from group with provided name",
        result="DataFrame for given group",
        refer_to="get_group",
    )
    def groupby_get_group(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="get_group",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="shift data with the specified settings",
        result="shifted value",
        refer_to="shift",
    )
    def groupby_shift(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="shift",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get first value in group",
        result="first value",
        refer_to="first",
    )
    def groupby_first(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="first",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get last value in group",
        result="last value",
        refer_to="last",
    )
    def groupby_last(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="last",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get first n values of a group",
        result="first n values of a group",
        refer_to="head",
    )
    def groupby_head(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="head",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get last n values in group",
        result="last n values",
        refer_to="tail",
    )
    def groupby_tail(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="tail",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get nth value in group",
        result="nth value",
        refer_to="nth",
    )
    def groupby_nth(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="nth",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get group number of each value",
        result="group number of each value",
        refer_to="ngroup",
    )
    def groupby_ngroup(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="ngroup",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    @doc_utils.doc_groupby_method(
        action="get n largest values in group",
        result="n largest values",
        refer_to="nlargest",
    )
    def groupby_nlargest(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="nlargest",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
            series_groupby=True,
        )

    @doc_utils.doc_groupby_method(
        action="get n nsmallest values in group",
        result="n nsmallest values",
        refer_to="nsmallest",
    )
    def groupby_nsmallest(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="nsmallest",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
            series_groupby=True,
        )

    @doc_utils.doc_groupby_method(
        action="get unique values in group",
        result="unique values",
        refer_to="unique",
    )
    def groupby_unique(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            agg_func="unique",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
            series_groupby=True,
        )

    def groupby_ohlc(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        is_df,
    ):  # noqa: GL08
        if not is_df:
            return self.groupby_agg(
                by=by,
                agg_func="ohlc",
                axis=axis,
                groupby_kwargs=groupby_kwargs,
                agg_args=agg_args,
                agg_kwargs=agg_kwargs,
                series_groupby=True,
            )
        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.ohlc)(
            self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=True,
        )

    # END Manual Partitioning methods

    @doc_utils.add_refer_to("DataFrame.unstack")
    def unstack(self, level, fill_value):
        """
        Pivot a level of the (necessarily hierarchical) index labels.

        Parameters
        ----------
        level : int or label
        fill_value : scalar or dict

        Returns
        -------
        BaseQueryCompiler
        """
        return DataFrameDefault.register(pandas.DataFrame.unstack)(
            self, level=level, fill_value=fill_value
        )

    @doc_utils.add_refer_to("wide_to_long")
    def wide_to_long(self, **kwargs):  # noqa: PR01
        """
        Unpivot a DataFrame from wide to long format.

        Returns
        -------
        BaseQueryCompiler
        """
        return DataFrameDefault.register(pandas.wide_to_long)(self, **kwargs)

    @doc_utils.add_refer_to("DataFrame.pivot")
    def pivot(self, index, columns, values):
        """
        Produce pivot table based on column values.

        Parameters
        ----------
        index : label or list of such, pandas.Index, optional
        columns : label or list of such
        values : label or list of such, optional

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing pivot table.
        """
        return DataFrameDefault.register(pandas.DataFrame.pivot)(
            self, index=index, columns=columns, values=values
        )

    @doc_utils.add_refer_to("DataFrame.pivot_table")
    def pivot_table(
        self,
        index,
        values,
        columns,
        aggfunc,
        fill_value,
        margins,
        dropna,
        margins_name,
        observed,
        sort,
    ):
        """
        Create a spreadsheet-style pivot table from underlying data.

        Parameters
        ----------
        index : label, pandas.Grouper, array or list of such
        values : label, optional
        columns : column, pandas.Grouper, array or list of such
        aggfunc : callable(pandas.Series) -> scalar, dict of list of such
        fill_value : scalar, optional
        margins : bool
        dropna : bool
        margins_name : str
        observed : bool
        sort : bool

        Returns
        -------
        BaseQueryCompiler
        """
        return DataFrameDefault.register(pandas.DataFrame.pivot_table)(
            self,
            index=index,
            values=values,
            columns=columns,
            aggfunc=aggfunc,
            fill_value=fill_value,
            margins=margins,
            dropna=dropna,
            margins_name=margins_name,
            observed=observed,
            sort=sort,
        )

    @doc_utils.add_refer_to("get_dummies")
    def get_dummies(self, columns, **kwargs):  # noqa: PR02
        """
        Convert categorical variables to dummy variables for certain columns.

        Parameters
        ----------
        columns : label or list of such
            Columns to convert.
        prefix : str or list of such
        prefix_sep : str
        dummy_na : bool
        drop_first : bool
        dtype : dtype
        **kwargs : dict
            Serves the compatibility purpose. Does not affect the result.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with categorical variables converted to dummy.
        """

        def get_dummies(df, columns, **kwargs):
            return pandas.get_dummies(df, columns=columns, **kwargs)

        return DataFrameDefault.register(get_dummies)(self, columns=columns, **kwargs)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.repeat")
    def repeat(self, repeats):
        """
        Repeat each element of one-column QueryCompiler given number of times.

        Parameters
        ----------
        repeats : int or array of ints
            The number of repetitions for each element. This should be a
            non-negative integer. Repeating 0 times will return an empty
            QueryCompiler.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with repeated elements.
        """
        return SeriesDefault.register(pandas.Series.repeat)(self, repeats=repeats)

    @doc_utils.add_refer_to("cut")
    def cut(
        self,
        bins,
        **kwargs,
    ):
        """
        Bin values into discrete intervals.

        Parameters
        ----------
        bins : int, array of ints, or IntervalIndex
            The criteria to bin by.
        **kwargs : dict
            The keyword arguments to pass through.

        Returns
        -------
        BaseQueryCompiler or np.ndarray or list[np.ndarray]
            Returns the result of pd.cut.
        """

        def squeeze_and_cut(df, *args, **kwargs):
            # We need this function to ensure we squeeze our internal
            # representation (a dataframe) to a Series.
            series = df.squeeze(axis=1)
            return pandas.cut(series, *args, **kwargs)

        # We use `default_to_pandas` here since the type and number of
        # results can change depending on the input arguments.
        return self.default_to_pandas(squeeze_and_cut, bins, **kwargs)

    # Indexing

    index = property(_get_axis(0), _set_axis(0))
    columns = property(_get_axis(1), _set_axis(1))

    def get_axis(self, axis):
        """
        Return index labels of the specified axis.

        Parameters
        ----------
        axis : {0, 1}
            Axis to return labels on.
            0 is for index, when 1 is for columns.

        Returns
        -------
        pandas.Index
        """
        return self.index if axis == 0 else self.columns

    def get_axis_len(self, axis: Literal[0, 1]) -> int:
        """
        Return the length of the specified axis.

        A query compiler may choose to override this method if it has a more efficient way
        of computing the length of an axis without materializing it.

        Parameters
        ----------
        axis : {0, 1}
            Axis to return labels on.

        Returns
        -------
        int
        """
        return len(self.get_axis(axis))

    def take_2d_labels(
        self,
        index,
        columns,
    ):
        """
        Take the given labels.

        Parameters
        ----------
        index : slice, scalar, list-like, or BaseQueryCompiler
            Labels of rows to grab.
        columns : slice, scalar, list-like, or BaseQueryCompiler
            Labels of columns to grab.

        Returns
        -------
        BaseQueryCompiler
            Subset of this QueryCompiler.
        """
        row_lookup, col_lookup = self.get_positions_from_labels(index, columns)
        if isinstance(row_lookup, slice):
            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=row_lookup != slice(None),
                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {row_lookup}",
            )
            row_lookup = None
        if isinstance(col_lookup, slice):
            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=col_lookup != slice(None),
                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {col_lookup}",
            )
            col_lookup = None
        return self.take_2d_positional(row_lookup, col_lookup)

    def get_positions_from_labels(self, row_loc, col_loc):
        """
        Compute index and column positions from their respective locators.

        Inputs to this method are arguments the the pandas user could pass to loc.
        This function will compute the corresponding index and column positions
        that the user could equivalently pass to iloc.

        Parameters
        ----------
        row_loc : scalar, slice, list, array or tuple
            Row locator.
        col_loc : scalar, slice, list, array or tuple
            Columns locator.

        Returns
        -------
        row_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise
            List of index labels.
        col_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise
            List of columns labels.

        Notes
        -----
        Usage of `slice(None)` as a resulting lookup is a hack to pass information about
        full-axis grab without computing actual indices that triggers lazy computations.
        Ideally, this API should get rid of using slices as indexers and either use a
        common ``Indexer`` object or range and ``np.ndarray`` only.
        """
        from modin.pandas.indexing import (
            boolean_mask_to_numeric,
            is_boolean_array,
            is_list_like,
            is_range_like,
        )

        lookups = []
        for axis, axis_loc in enumerate((row_loc, col_loc)):
            if is_scalar(axis_loc):
                axis_loc = np.array([axis_loc])
            if isinstance(axis_loc, pandas.RangeIndex):
                axis_lookup = axis_loc
            elif isinstance(axis_loc, slice) or is_range_like(axis_loc):
                if isinstance(axis_loc, slice) and axis_loc == slice(None):
                    axis_lookup = axis_loc
                else:
                    axis_labels = self.get_axis(axis)
                    # `slice_indexer` returns a fully-defined numeric slice for a non-fully-defined labels-based slice
                    # RangeIndex and range use a semi-open interval, while
                    # slice_indexer uses a closed interval. Subtract 1 step from the
                    # end of the interval to get the equivalent closed interval.
                    if axis_loc.stop is None or not is_number(axis_loc.stop):
                        slice_stop = axis_loc.stop
                    else:
                        slice_stop = axis_loc.stop - (
                            0 if axis_loc.step is None else axis_loc.step
                        )
                    axis_lookup = axis_labels.slice_indexer(
                        axis_loc.start,
                        slice_stop,
                        axis_loc.step,
                    )
                    # Converting negative indices to their actual positions:
                    axis_lookup = pandas.RangeIndex(
                        start=(
                            axis_lookup.start
                            if axis_lookup.start >= 0
                            else axis_lookup.start + len(axis_labels)
                        ),
                        stop=(
                            axis_lookup.stop
                            if axis_lookup.stop >= 0
                            else axis_lookup.stop + len(axis_labels)
                        ),
                        step=axis_lookup.step,
                    )
            elif self.has_multiindex(axis):
                # `Index.get_locs` raises an IndexError by itself if missing labels were provided,
                # we don't have to do missing-check for the received `axis_lookup`.
                if isinstance(axis_loc, pandas.MultiIndex):
                    axis_lookup = self.get_axis(axis).get_indexer_for(axis_loc)
                else:
                    axis_lookup = self.get_axis(axis).get_locs(axis_loc)
            elif is_boolean_array(axis_loc):
                axis_lookup = boolean_mask_to_numeric(axis_loc)
            else:
                axis_labels = self.get_axis(axis)
                if is_list_like(axis_loc) and not isinstance(
                    axis_loc, (np.ndarray, pandas.Index)
                ):
                    # `Index.get_indexer_for` works much faster with numpy arrays than with python lists,
                    # so although we lose some time here on converting to numpy, `Index.get_indexer_for`
                    # speedup covers the loss that we gain here.
                    axis_loc = np.array(axis_loc, dtype=axis_labels.dtype)
                axis_lookup = axis_labels.get_indexer_for(axis_loc)
                # `Index.get_indexer_for` sets -1 value for missing labels, we have to verify whether
                # there are any -1 in the received indexer to raise a KeyError here.
                missing_mask = axis_lookup == -1
                if missing_mask.any():
                    missing_labels = (
                        axis_loc[missing_mask]
                        if is_list_like(axis_loc)
                        # If `axis_loc` is not a list-like then we can't select certain
                        # labels that are missing and so printing the whole indexer
                        else axis_loc
                    )
                    raise KeyError(missing_labels)

            if isinstance(axis_lookup, pandas.Index) and not is_range_like(axis_lookup):
                axis_lookup = axis_lookup.values

            lookups.append(axis_lookup)
        return lookups

    def take_2d_positional(self, index=None, columns=None):
        """
        Index QueryCompiler with passed keys.

        Parameters
        ----------
        index : list-like of ints, optional
            Positional indices of rows to grab.
        columns : list-like of ints, optional
            Positional indices of columns to grab.

        Returns
        -------
        BaseQueryCompiler
            New masked QueryCompiler.
        """
        index = slice(None) if index is None else index
        columns = slice(None) if columns is None else columns

        def applyer(df):
            return df.iloc[index, columns]

        return DataFrameDefault.register(applyer)(self)

    def insert_item(self, axis, loc, value, how="inner", replace=False):
        """
        Insert rows/columns defined by `value` at the specified position.

        If frames are not aligned along specified axis, perform frames alignment first.

        Parameters
        ----------
        axis : {0, 1}
            Axis to insert along. 0 means insert rows, when 1 means insert columns.
        loc : int
            Position to insert `value`.
        value : BaseQueryCompiler
            Rows/columns to insert.
        how : {"inner", "outer", "left", "right"}, default: "inner"
            Type of join that will be used if frames are not aligned.
        replace : bool, default: False
            Whether to insert item after column/row at `loc-th` position or to replace
            it by `value`.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with inserted values.
        """
        assert isinstance(value, type(self))

        def mask(idx):
            if len(idx) == len(self.get_axis(axis)):
                return self
            return (
                self.getitem_column_array(idx, numeric=True)
                if axis
                else self.getitem_row_array(idx)
            )

        if 0 <= loc < len(self.get_axis(axis)):
            first_mask = mask(list(range(loc)))
            second_mask_loc = loc + 1 if replace else loc
            second_mask = mask(list(range(second_mask_loc, len(self.get_axis(axis)))))
            return first_mask.concat(axis, [value, second_mask], join=how, sort=False)
        else:
            return self.concat(axis, [value], join=how, sort=False)

    def setitem(self, axis, key, value):
        """
        Set the row/column defined by `key` to the `value` provided.

        Parameters
        ----------
        axis : {0, 1}
            Axis to set `value` along. 0 means set row, 1 means set column.
        key : label
            Row/column label to set `value` in.
        value : BaseQueryCompiler, list-like or scalar
            Define new row/column value.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with updated `key` value.
        """

        def setitem(df, axis, key, value):
            if is_scalar(key) and isinstance(value, pandas.DataFrame):
                value = value.squeeze()
            if not axis:
                df[key] = value
            else:
                df.loc[key] = value
            return df

        return DataFrameDefault.register(setitem)(self, axis=axis, key=key, value=value)

    def write_items(
        self, row_numeric_index, col_numeric_index, item, need_columns_reindex=True
    ):
        """
        Update QueryCompiler elements at the specified positions by passed values.

        In contrast to ``setitem`` this method allows to do 2D assignments.

        Parameters
        ----------
        row_numeric_index : list of ints
            Row positions to write value.
        col_numeric_index : list of ints
            Column positions to write value.
        item : Any
            Values to write. If not a scalar will be broadcasted according to
            `row_numeric_index` and `col_numeric_index`.
        need_columns_reindex : bool, default: True
            In the case of assigning columns to a dataframe (broadcasting is
            part of the flow), reindexing is not needed.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with updated values.
        """
        # We have to keep this import away from the module level to avoid circular import
        from modin.pandas.utils import broadcast_item, is_scalar

        if not isinstance(row_numeric_index, slice):
            row_numeric_index = list(row_numeric_index)
        if not isinstance(col_numeric_index, slice):
            col_numeric_index = list(col_numeric_index)

        def write_items(df, broadcasted_items):
            if isinstance(df.iloc[row_numeric_index, col_numeric_index], pandas.Series):
                broadcasted_items = broadcasted_items.squeeze()
            df.iloc[row_numeric_index, col_numeric_index] = broadcasted_items
            return df

        if not is_scalar(item):
            broadcasted_item, _, _, _ = broadcast_item(
                self,
                row_numeric_index,
                col_numeric_index,
                item,
                need_columns_reindex=need_columns_reindex,
                sort_lookups_and_item=False,
            )
        else:
            broadcasted_item = item

        return DataFrameDefault.register(write_items)(
            self, broadcasted_items=broadcasted_item
        )

    # END Abstract methods for QueryCompiler

    @cached_property
    def __constructor__(self) -> type[Self]:
        """
        Get query compiler constructor.

        By default, constructor method will invoke an init.

        Returns
        -------
        callable
        """
        return type(self)

    # __delitem__
    # This will change the shape of the resulting data.
    def delitem(self, key):
        """
        Drop `key` column.

        Parameters
        ----------
        key : label
            Column name to drop.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler without `key` column.
        """
        return self.drop(columns=[key])

    # END __delitem__

    def has_multiindex(self, axis=0):
        """
        Check if specified axis is indexed by MultiIndex.

        Parameters
        ----------
        axis : {0, 1}, default: 0
            The axis to check (0 - index, 1 - columns).

        Returns
        -------
        bool
            True if index at specified axis is MultiIndex and False otherwise.
        """
        if axis == 0:
            return isinstance(self.index, pandas.MultiIndex)
        assert axis == 1
        return isinstance(self.columns, pandas.MultiIndex)

    @property
    def frame_has_materialized_dtypes(self) -> bool:
        """
        Check if the underlying dataframe has materialized dtypes.

        Returns
        -------
        bool
        """
        return self._modin_frame.has_materialized_dtypes

    @property
    def frame_has_materialized_columns(self) -> bool:
        """
        Check if the underlying dataframe has materialized columns.

        Returns
        -------
        bool
        """
        return self._modin_frame.has_materialized_columns

    @property
    def frame_has_materialized_index(self) -> bool:
        """
        Check if the underlying dataframe has materialized index.

        Returns
        -------
        bool
        """
        return self._modin_frame.has_materialized_index

    def set_frame_dtypes_cache(self, dtypes):
        """
        Set dtypes cache for the underlying dataframe frame.

        Parameters
        ----------
        dtypes : pandas.Series, ModinDtypes, callable or None
        """
        self._modin_frame.set_dtypes_cache(dtypes)

    def set_frame_index_cache(self, index):
        """
        Set index cache for underlying dataframe.

        Parameters
        ----------
        index : sequence, callable or None
        """
        self._modin_frame.set_index_cache(index)

    def set_frame_columns_cache(self, index):
        """
        Set columns cache for underlying dataframe.

        Parameters
        ----------
        index : sequence, callable or None
        """
        self._modin_frame.set_columns_cache(index)

    @property
    def frame_has_index_cache(self):
        """
        Check if the index cache exists for underlying dataframe.

        Returns
        -------
        bool
        """
        return self._modin_frame.has_index_cache

    @property
    def frame_has_columns_cache(self):
        """
        Check if the columns cache exists for underlying dataframe.

        Returns
        -------
        bool
        """
        return self._modin_frame.has_columns_cache

    @property
    def frame_has_dtypes_cache(self) -> bool:
        """
        Check if the dtypes cache exists for the underlying dataframe.

        Returns
        -------
        bool
        """
        return self._modin_frame.has_dtypes_cache

    def get_index_name(self, axis=0):
        """
        Get index name of specified axis.

        Parameters
        ----------
        axis : {0, 1}, default: 0
            Axis to get index name on.

        Returns
        -------
        hashable
            Index name, None for MultiIndex.
        """
        return self.get_axis(axis).name

    def set_index_name(self, name, axis=0):
        """
        Set index name for the specified axis.

        Parameters
        ----------
        name : hashable
            New index name.
        axis : {0, 1}, default: 0
            Axis to set name along.
        """
        self.get_axis(axis).name = name

    def get_index_names(self, axis=0):
        """
        Get index names of specified axis.

        Parameters
        ----------
        axis : {0, 1}, default: 0
            Axis to get index names on.

        Returns
        -------
        list
            Index names.
        """
        return self.get_axis(axis).names

    def set_index_names(self, names, axis=0):
        """
        Set index names for the specified axis.

        Parameters
        ----------
        names : list
            New index names.
        axis : {0, 1}, default: 0
            Axis to set names along.
        """
        self.get_axis(axis).names = names

    def get_dtypes_set(self):
        """
        Get a set of dtypes that are in this query compiler.

        Returns
        -------
        set
        """
        return set(self.dtypes.values)

    # DateTime methods
    def between_time(self, **kwargs):  # noqa: PR01
        """
        Select values between particular times of the day (e.g., 9:00-9:30 AM).

        By setting start_time to be later than end_time, you can get the times that are not between the two times.

        Returns
        -------
        BaseQueryCompiler
        """
        return DataFrameDefault.register(pandas.DataFrame.between_time)(self, **kwargs)

    def shift(
        self,
        periods,
        freq,
        axis,
        fill_value,
    ):  # noqa: GL08
        return DataFrameDefault.register(pandas.DataFrame.shift)(
            self, periods, freq, axis, fill_value
        )

    def tz_convert(
        self,
        tz,
        axis=0,
        level=None,
        copy=True,
    ):
        """
        Convert tz-aware axis to target time zone.

        Parameters
        ----------
        tz : str or tzinfo object or None
            Target time zone. Passing None will convert to UTC
            and remove the timezone information.
        axis : int, default: 0
            The axis to localize.
        level : int, str, default: None
            If axis is a MultiIndex, convert a specific level. Otherwise must be None.
        copy : bool, default: True
            Also make a copy of the underlying data.

        Returns
        -------
        BaseQueryCompiler
            A new query compiler with the converted axis.
        """
        if level is not None:
            new_labels = (
                pandas.Series(index=self.get_axis(axis))
                .tz_convert(tz, level=level)
                .index
            )
        else:
            new_labels = self.get_axis(axis).tz_convert(tz)
        obj = self.copy() if copy else self
        if axis == 0:
            obj.index = new_labels
        else:
            obj.columns = new_labels
        return obj

    def tz_localize(
        self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"
    ):
        """
        Localize tz-naive index of a Series or DataFrame to target time zone.

        Parameters
        ----------
        tz : tzstr or tzinfo or None
            Time zone to localize. Passing None will remove the time zone
            information and preserve local time.
        axis : int, default: 0
            The axis to localize.
        level : int, str, default: None
            If axis is a MultiIndex, localize a specific level. Otherwise must be None.
        copy : bool, default: True
            Also make a copy of the underlying data.
        ambiguous : str, bool-ndarray, NaT, default: "raise"
            Behaviour on ambiguous times.
        nonexistent : str, default: "raise"
            What to do with nonexistent times.

        Returns
        -------
        BaseQueryCompiler
            A new query compiler with the localized axis.
        """
        new_labels = (
            pandas.Series(index=self.get_axis(axis))
            .tz_localize(
                tz,
                axis=axis,
                level=level,
                copy=False,
                ambiguous=ambiguous,
                nonexistent=nonexistent,
            )
            .index
        )
        obj = self.copy() if copy else self
        if axis == 0:
            obj.index = new_labels
        else:
            obj.columns = new_labels
        return obj

    @doc_utils.doc_dt_round(refer_to="ceil")
    def dt_ceil(self, freq, ambiguous="raise", nonexistent="raise"):
        return DateTimeDefault.register(pandas.Series.dt.ceil)(
            self, freq, ambiguous, nonexistent
        )

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.components")
    def dt_components(self):
        """
        Spread each date-time value into its components (days, hours, minutes...).

        Returns
        -------
        BaseQueryCompiler
        """
        return DateTimeDefault.register(pandas.Series.dt.components)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the date without timezone information", refer_to="date"
    )
    def dt_date(self):
        return DateTimeDefault.register(pandas.Series.dt.date)(self)

    @doc_utils.doc_dt_timestamp(prop="day component", refer_to="day")
    def dt_day(self):
        return DateTimeDefault.register(pandas.Series.dt.day)(self)

    @doc_utils.doc_dt_timestamp(
        prop="day name", refer_to="day_name", params="locale : str, optional"
    )
    def dt_day_name(self, locale=None):
        return DateTimeDefault.register(pandas.Series.dt.day_name)(self, locale)

    @doc_utils.doc_dt_timestamp(prop="integer day of week", refer_to="dayofweek")
    # FIXME: `dt_dayofweek` is an alias for `dt_weekday`, one of them should
    # be removed (Modin issue #3107).
    def dt_dayofweek(self):
        return DateTimeDefault.register(pandas.Series.dt.dayofweek)(self)

    @doc_utils.doc_dt_timestamp(prop="day of year", refer_to="dayofyear")
    def dt_dayofyear(self):
        return DateTimeDefault.register(pandas.Series.dt.dayofyear)(self)

    @doc_utils.doc_dt_interval(prop="days", refer_to="days")
    def dt_days(self):
        return DateTimeDefault.register(pandas.Series.dt.days)(self)

    @doc_utils.doc_dt_timestamp(
        prop="number of days in month", refer_to="days_in_month"
    )
    # FIXME: `dt_days_in_month` is an alias for `dt_daysinmonth`, one of them should
    # be removed (Modin issue #3107).
    def dt_days_in_month(self):
        return DateTimeDefault.register(pandas.Series.dt.days_in_month)(self)

    @doc_utils.doc_dt_timestamp(prop="number of days in month", refer_to="daysinmonth")
    def dt_daysinmonth(self):
        return DateTimeDefault.register(pandas.Series.dt.daysinmonth)(self)

    @doc_utils.doc_dt_period(prop="the timestamp of end time", refer_to="end_time")
    def dt_end_time(self):
        return DateTimeDefault.register(pandas.Series.dt.end_time)(self)

    @doc_utils.doc_dt_round(refer_to="floor")
    def dt_floor(self, freq, ambiguous="raise", nonexistent="raise"):
        return DateTimeDefault.register(pandas.Series.dt.floor)(
            self, freq, ambiguous, nonexistent
        )

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.freq")
    def dt_freq(self):
        """
        Get the time frequency of the underlying time-series data.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing a single value, the frequency of the data.
        """
        return DateTimeDefault.register(pandas.Series.dt.freq)(self)

    @doc_utils.add_refer_to("Series.dt.unit")
    def dt_unit(self):  # noqa: RT01
        return DateTimeDefault.register(pandas.Series.dt.unit)(self)

    @doc_utils.add_refer_to("Series.dt.as_unit")
    def dt_as_unit(self, *args, **kwargs):  # noqa: PR01, RT01
        return DateTimeDefault.register(pandas.Series.dt.as_unit)(self, *args, **kwargs)

    @doc_utils.doc_dt_timestamp(
        prop="Calculate year, week, and day according to the ISO 8601 standard.",
        refer_to="isocalendar",
    )
    def dt_isocalendar(self):
        return DateTimeDefault.register(pandas.Series.dt.isocalendar)(self)

    @doc_utils.doc_dt_timestamp(prop="hour", refer_to="hour")
    def dt_hour(self):
        return DateTimeDefault.register(pandas.Series.dt.hour)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the boolean of whether corresponding year is leap",
        refer_to="is_leap_year",
    )
    def dt_is_leap_year(self):
        return DateTimeDefault.register(pandas.Series.dt.is_leap_year)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the boolean of whether the date is the last day of the month",
        refer_to="is_month_end",
    )
    def dt_is_month_end(self):
        return DateTimeDefault.register(pandas.Series.dt.is_month_end)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the boolean of whether the date is the first day of the month",
        refer_to="is_month_start",
    )
    def dt_is_month_start(self):
        return DateTimeDefault.register(pandas.Series.dt.is_month_start)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the boolean of whether the date is the last day of the quarter",
        refer_to="is_quarter_end",
    )
    def dt_is_quarter_end(self):
        return DateTimeDefault.register(pandas.Series.dt.is_quarter_end)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the boolean of whether the date is the first day of the quarter",
        refer_to="is_quarter_start",
    )
    def dt_is_quarter_start(self):
        return DateTimeDefault.register(pandas.Series.dt.is_quarter_start)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the boolean of whether the date is the last day of the year",
        refer_to="is_year_end",
    )
    def dt_is_year_end(self):
        return DateTimeDefault.register(pandas.Series.dt.is_year_end)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the boolean of whether the date is the first day of the year",
        refer_to="is_year_start",
    )
    def dt_is_year_start(self):
        return DateTimeDefault.register(pandas.Series.dt.is_year_start)(self)

    @doc_utils.doc_dt_timestamp(prop="microseconds component", refer_to="microsecond")
    def dt_microsecond(self):
        return DateTimeDefault.register(pandas.Series.dt.microsecond)(self)

    @doc_utils.doc_dt_interval(prop="microseconds component", refer_to="microseconds")
    def dt_microseconds(self):
        return DateTimeDefault.register(pandas.Series.dt.microseconds)(self)

    @doc_utils.doc_dt_timestamp(prop="minute component", refer_to="minute")
    def dt_minute(self):
        return DateTimeDefault.register(pandas.Series.dt.minute)(self)

    @doc_utils.doc_dt_timestamp(prop="month component", refer_to="month")
    def dt_month(self):
        return DateTimeDefault.register(pandas.Series.dt.month)(self)

    @doc_utils.doc_dt_timestamp(
        prop="the month name", refer_to="month name", params="locale : str, optional"
    )
    def dt_month_name(self, locale=None):
        return DateTimeDefault.register(pandas.Series.dt.month_name)(self, locale)

    @doc_utils.doc_dt_timestamp(prop="nanoseconds component", refer_to="nanosecond")
    def dt_nanosecond(self):
        return DateTimeDefault.register(pandas.Series.dt.nanosecond)(self)

    @doc_utils.doc_dt_interval(prop="nanoseconds component", refer_to="nanoseconds")
    def dt_nanoseconds(self):
        return DateTimeDefault.register(pandas.Series.dt.nanoseconds)(self)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.normalize")
    def dt_normalize(self):
        """
        Set the time component of each date-time value to midnight.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing date-time values with midnight time.
        """
        return DateTimeDefault.register(pandas.Series.dt.normalize)(self)

    @doc_utils.doc_dt_timestamp(prop="quarter component", refer_to="quarter")
    def dt_quarter(self):
        return DateTimeDefault.register(pandas.Series.dt.quarter)(self)

    @doc_utils.doc_dt_period(prop="the fiscal year", refer_to="qyear")
    def dt_qyear(self):
        return DateTimeDefault.register(pandas.Series.dt.qyear)(self)

    @doc_utils.doc_dt_round(refer_to="round")
    def dt_round(self, freq, ambiguous="raise", nonexistent="raise"):
        return DateTimeDefault.register(pandas.Series.dt.round)(
            self, freq, ambiguous, nonexistent
        )

    @doc_utils.doc_dt_timestamp(prop="seconds component", refer_to="second")
    def dt_second(self):
        return DateTimeDefault.register(pandas.Series.dt.second)(self)

    @doc_utils.doc_dt_interval(prop="seconds component", refer_to="seconds")
    def dt_seconds(self):
        return DateTimeDefault.register(pandas.Series.dt.seconds)(self)

    @doc_utils.doc_dt_period(prop="the timestamp of start time", refer_to="start_time")
    def dt_start_time(self):
        return DateTimeDefault.register(pandas.Series.dt.start_time)(self)

    @doc_utils.add_refer_to("Series.dt.strftime")
    def dt_strftime(self, date_format):
        """
        Format underlying date-time data using specified format.

        Parameters
        ----------
        date_format : str

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing formatted date-time values.
        """
        return DateTimeDefault.register(pandas.Series.dt.strftime)(self, date_format)

    @doc_utils.doc_dt_timestamp(prop="time component", refer_to="time")
    def dt_time(self):
        return DateTimeDefault.register(pandas.Series.dt.time)(self)

    @doc_utils.doc_dt_timestamp(
        prop="time component with timezone information", refer_to="timetz"
    )
    def dt_timetz(self):
        return DateTimeDefault.register(pandas.Series.dt.timetz)(self)

    @doc_utils.add_refer_to("Series.dt.asfreq")
    def dt_asfreq(self, freq=None, how: str = "E"):
        """
        Convert the PeriodArray to the specified frequency `freq`.

        Equivalent to applying pandas.Period.asfreq() with the given arguments to each Period in this PeriodArray.

        Parameters
        ----------
        freq : str, optional
            A frequency.
        how : str {'E', 'S'}, default: 'E'
            Whether the elements should be aligned to the end or start within pa period.
            * 'E', "END", or "FINISH" for end,
            * 'S', "START", or "BEGIN" for start.
            January 31st ("END") vs. January 1st ("START") for example.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing period data.
        """
        return DateTimeDefault.register(pandas.Series.dt.asfreq)(self, freq, how)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.to_period")
    def dt_to_period(self, freq=None):
        """
        Convert underlying data to the period at a particular frequency.

        Parameters
        ----------
        freq : str, optional

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing period data.
        """
        return DateTimeDefault.register(pandas.Series.dt.to_period)(self, freq)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.to_pydatetime")
    def dt_to_pydatetime(self):
        """
        Convert underlying data to array of python native ``datetime``.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing 1D array of ``datetime`` objects.
        """
        return DateTimeDefault.register(pandas.Series.dt.to_pydatetime)(self)

    # FIXME: there are no references to this method, we should either remove it
    # or add a call reference at the DataFrame level (Modin issue #3103).
    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.to_pytimedelta")
    def dt_to_pytimedelta(self):
        """
        Convert underlying data to array of python native ``datetime.timedelta``.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing 1D array of ``datetime.timedelta``.
        """
        return DateTimeDefault.register(pandas.Series.dt.to_pytimedelta)(self)

    @doc_utils.doc_dt_period(
        prop="the timestamp representation", refer_to="to_timestamp"
    )
    def dt_to_timestamp(self):
        return DateTimeDefault.register(pandas.Series.dt.to_timestamp)(self)

    @doc_utils.doc_dt_interval(prop="duration in seconds", refer_to="total_seconds")
    def dt_total_seconds(self):
        return DateTimeDefault.register(pandas.Series.dt.total_seconds)(self)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.tz")
    def dt_tz(self):
        """
        Get the time-zone of the underlying time-series data.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler containing a single value, time-zone of the data.
        """
        return DateTimeDefault.register(pandas.Series.dt.tz)(self)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.tz_convert")
    def dt_tz_convert(self, tz):
        """
        Convert time-series data to the specified time zone.

        Parameters
        ----------
        tz : str, pytz.timezone

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing values with converted time zone.
        """
        return DateTimeDefault.register(pandas.Series.dt.tz_convert)(self, tz)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.dt.tz_localize")
    def dt_tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
        """
        Localize tz-naive to tz-aware.

        Parameters
        ----------
        tz : str, pytz.timezone, optional
        ambiguous : {"raise", "inner", "NaT"} or bool mask, default: "raise"
        nonexistent : {"raise", "shift_forward", "shift_backward, "NaT"} or pandas.timedelta, default: "raise"

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing values with localized time zone.
        """
        return DateTimeDefault.register(pandas.Series.dt.tz_localize)(
            self, tz, ambiguous, nonexistent
        )

    @doc_utils.doc_dt_timestamp(prop="integer day of week", refer_to="weekday")
    def dt_weekday(self):
        return DateTimeDefault.register(pandas.Series.dt.weekday)(self)

    @doc_utils.doc_dt_timestamp(prop="year component", refer_to="year")
    def dt_year(self):
        return DateTimeDefault.register(pandas.Series.dt.year)(self)

    # End of DateTime methods

    def first(self, offset: pandas.DateOffset):
        """
        Select initial periods of time series data based on a date offset.

        When having a query compiler with dates as index, this function can
        select the first few rows based on a date offset.

        Parameters
        ----------
        offset : pandas.DateOffset
            The offset length of the data to select.

        Returns
        -------
        BaseQueryCompiler
            New compiler containing the selected data.
        """
        return DataFrameDefault.register(pandas.DataFrame.first)(self, offset)

    def last(self, offset: pandas.DateOffset):
        """
        Select final periods of time series data based on a date offset.

        For a query compiler with a sorted DatetimeIndex, this function
        selects the last few rows based on a date offset.

        Parameters
        ----------
        offset : pandas.DateOffset
            The offset length of the data to select.

        Returns
        -------
        BaseQueryCompiler
            New compiler containing the selected data.
        """
        return DataFrameDefault.register(pandas.DataFrame.last)(self, offset)

    # Resample methods

    # FIXME:
    #   1. Query Compiler shouldn't care about differences between Series and DataFrame
    #      so `resample_agg_df` and `resample_agg_ser` should be combined (Modin issue #3104).
    #   2. In DataFrame API `Resampler.aggregate` is an alias for `Resampler.apply`
    #      we should remove one of these methods: `resample_agg_*` or `resample_app_*` (Modin issue #3107).
    @doc_utils.doc_resample_agg(
        action="apply passed aggregation function",
        params="func : str, dict, callable(pandas.Series) -> scalar, or list of such",
        output="function names",
        refer_to="agg",
    )
    def resample_agg_df(self, resample_kwargs, func, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.aggregate)(
            self, resample_kwargs, func, *args, **kwargs
        )

    @doc_utils.add_deprecation_warning(replacement_method="resample_agg_df")
    @doc_utils.doc_resample_agg(
        action="apply passed aggregation function in a one-column query compiler",
        params="func : str, dict, callable(pandas.Series) -> scalar, or list of such",
        output="function names",
        refer_to="agg",
    )
    def resample_agg_ser(self, resample_kwargs, func, *args, **kwargs):
        return ResampleDefault.register(
            pandas.core.resample.Resampler.aggregate, squeeze_self=True
        )(self, resample_kwargs, func, *args, **kwargs)

    @doc_utils.add_deprecation_warning(replacement_method="resample_agg_df")
    @doc_utils.doc_resample_agg(
        action="apply passed aggregation function",
        params="func : str, dict, callable(pandas.Series) -> scalar, or list of such",
        output="function names",
        refer_to="apply",
    )
    def resample_app_df(self, resample_kwargs, func, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.apply)(
            self, resample_kwargs, func, *args, **kwargs
        )

    @doc_utils.add_deprecation_warning(replacement_method="resample_agg_df")
    @doc_utils.doc_resample_agg(
        action="apply passed aggregation function in a one-column query compiler",
        params="func : str, dict, callable(pandas.Series) -> scalar, or list of such",
        output="function names",
        refer_to="apply",
    )
    def resample_app_ser(self, resample_kwargs, func, *args, **kwargs):
        return ResampleDefault.register(
            pandas.core.resample.Resampler.apply, squeeze_self=True
        )(self, resample_kwargs, func, *args, **kwargs)

    def resample_asfreq(self, resample_kwargs, fill_value):
        """
        Resample time-series data and get the values at the new frequency.

        Group data into intervals by time-series row/column with
        a specified frequency and get values at the new frequency.

        Parameters
        ----------
        resample_kwargs : dict
            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
        fill_value : scalar

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing values at the specified frequency.
        """
        return ResampleDefault.register(pandas.core.resample.Resampler.asfreq)(
            self, resample_kwargs, fill_value
        )

    @doc_utils.doc_resample_fillna(method="back-fill", refer_to="bfill")
    def resample_bfill(self, resample_kwargs, limit):
        return ResampleDefault.register(pandas.core.resample.Resampler.bfill)(
            self, resample_kwargs, limit
        )

    @doc_utils.doc_resample_reduce(
        result="number of non-NA values", refer_to="count", compatibility_params=False
    )
    def resample_count(self, resample_kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.count)(
            self, resample_kwargs
        )

    @doc_utils.doc_resample_fillna(method="forward-fill", refer_to="ffill")
    def resample_ffill(self, resample_kwargs, limit):
        return ResampleDefault.register(pandas.core.resample.Resampler.ffill)(
            self, resample_kwargs, limit
        )

    # FIXME: we should combine all resample fillna methods into `resample_fillna`
    # (Modin issue #3107)
    @doc_utils.doc_resample_fillna(
        method="specified", refer_to="fillna", params="method : str"
    )
    def resample_fillna(self, resample_kwargs, method, limit):
        return ResampleDefault.register(pandas.core.resample.Resampler.fillna)(
            self, resample_kwargs, method, limit
        )

    @doc_utils.doc_resample_reduce(result="first element", refer_to="first")
    def resample_first(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.first)(
            self, resample_kwargs, *args, **kwargs
        )

    # FIXME: This function takes Modin DataFrame via `obj` parameter,
    # we should avoid leaking of the high-level objects to the query compiler level.
    # (Modin issue #3106)
    def resample_get_group(self, resample_kwargs, name, obj):
        """
        Resample time-series data and get the specified group.

        Group data into intervals by time-series row/column with
        a specified frequency and get the values of the specified group.

        Parameters
        ----------
        resample_kwargs : dict
            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
        name : object
        obj : modin.pandas.DataFrame, optional

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing the values from the specified group.
        """
        return ResampleDefault.register(pandas.core.resample.Resampler.get_group)(
            self, resample_kwargs, name, obj
        )

    @doc_utils.doc_resample_fillna(
        method="specified interpolation",
        refer_to="interpolate",
        params="""
        method : str
        axis : {0, 1}
        limit : int
        inplace : {False}
            This parameter serves the compatibility purpose. Always has to be False.
        limit_direction : {"forward", "backward", "both"}
        limit_area : {None, "inside", "outside"}
        downcast : str, optional
        **kwargs : dict
        """,
        overwrite_template_params=True,
    )
    def resample_interpolate(
        self,
        resample_kwargs,
        method,
        axis,
        limit,
        inplace,
        limit_direction,
        limit_area,
        downcast,
        **kwargs,
    ):
        return ResampleDefault.register(pandas.core.resample.Resampler.interpolate)(
            self,
            resample_kwargs,
            method,
            axis=axis,
            limit=limit,
            inplace=inplace,
            limit_direction=limit_direction,
            limit_area=limit_area,
            downcast=downcast,
            **kwargs,
        )

    @doc_utils.doc_resample_reduce(result="last element", refer_to="last")
    def resample_last(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.last)(
            self, resample_kwargs, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(result="maximum value", refer_to="max")
    def resample_max(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.max)(
            self, resample_kwargs, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(result="mean value", refer_to="mean")
    def resample_mean(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.mean)(
            self, resample_kwargs, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(result="median value", refer_to="median")
    def resample_median(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.median)(
            self, resample_kwargs, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(result="minimum value", refer_to="min")
    def resample_min(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.min)(
            self, resample_kwargs, *args, **kwargs
        )

    @doc_utils.doc_resample_fillna(method="'nearest'", refer_to="nearest")
    def resample_nearest(self, resample_kwargs, limit):
        return ResampleDefault.register(pandas.core.resample.Resampler.nearest)(
            self, resample_kwargs, limit
        )

    @doc_utils.doc_resample_reduce(result="number of unique values", refer_to="nunique")
    def resample_nunique(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.nunique)(
            self, resample_kwargs, *args, **kwargs
        )

    # FIXME: Query Compiler shouldn't care about differences between Series and DataFrame
    # so `resample_ohlc_df` and `resample_ohlc_ser` should be combined (Modin issue #3104).
    @doc_utils.doc_resample_agg(
        action="compute open, high, low and close values",
        output="labels of columns containing computed values",
        refer_to="ohlc",
    )
    def resample_ohlc_df(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.ohlc)(
            self, resample_kwargs, *args, **kwargs
        )

    @doc_utils.doc_resample_agg(
        action="compute open, high, low and close values",
        output="labels of columns containing computed values",
        refer_to="ohlc",
    )
    def resample_ohlc_ser(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(
            pandas.core.resample.Resampler.ohlc, squeeze_self=True
        )(self, resample_kwargs, *args, **kwargs)

    # FIXME: This method require us to build high-level resampler object
    # which we shouldn't do at the query compiler. We need to move this at the front.
    # (Modin issue #3105)
    @doc_utils.add_refer_to("Resampler.pipe")
    def resample_pipe(self, resample_kwargs, func, *args, **kwargs):
        """
        Resample time-series data and apply aggregation on it.

        Group data into intervals by time-series row/column with
        a specified frequency, build equivalent ``pandas.Resampler`` object
        and apply passed function to it.

        Parameters
        ----------
        resample_kwargs : dict
            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
        func : callable(pandas.Resampler) -> object or tuple(callable, str)
        *args : iterable
            Positional arguments to pass to function.
        **kwargs : dict
            Keyword arguments to pass to function.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing the result of passed function.
        """
        return ResampleDefault.register(pandas.core.resample.Resampler.pipe)(
            self, resample_kwargs, func, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(
        result="product",
        params="min_count : int",
        refer_to="prod",
    )
    def resample_prod(self, resample_kwargs, min_count, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.prod)(
            self, resample_kwargs, min_count, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(
        result="quantile", params="q : float", refer_to="quantile"
    )
    def resample_quantile(self, resample_kwargs, q, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.quantile)(
            self, resample_kwargs, q, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(
        result="standard error of the mean",
        refer_to="sem",
    )
    def resample_sem(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.sem)(
            self, resample_kwargs, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(
        result="number of elements in a group", refer_to="size"
    )
    def resample_size(self, resample_kwargs, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.size)(
            self, resample_kwargs, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(
        result="standard deviation", params="ddof : int", refer_to="std"
    )
    def resample_std(self, resample_kwargs, ddof, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.std)(
            self, resample_kwargs, ddof, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(
        result="sum",
        params="min_count : int",
        refer_to="sum",
    )
    def resample_sum(self, resample_kwargs, min_count, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.sum)(
            self, resample_kwargs, min_count, *args, **kwargs
        )

    def resample_transform(self, resample_kwargs, arg, *args, **kwargs):
        """
        Resample time-series data and apply aggregation on it.

        Group data into intervals by time-series row/column with
        a specified frequency and call passed function on each group.
        In contrast to ``resample_app_df`` apply function to the whole group,
        instead of a single axis.

        Parameters
        ----------
        resample_kwargs : dict
            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
        arg : callable(pandas.DataFrame) -> pandas.Series
        *args : iterable
            Positional arguments to pass to function.
        **kwargs : dict
            Keyword arguments to pass to function.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing the result of passed function.
        """
        return ResampleDefault.register(pandas.core.resample.Resampler.transform)(
            self, resample_kwargs, arg, *args, **kwargs
        )

    @doc_utils.doc_resample_reduce(
        result="variance", params="ddof : int", refer_to="var"
    )
    def resample_var(self, resample_kwargs, ddof, *args, **kwargs):
        return ResampleDefault.register(pandas.core.resample.Resampler.var)(
            self, resample_kwargs, ddof, *args, **kwargs
        )

    # End of Resample methods

    # Str methods

    @doc_utils.doc_str_method(refer_to="capitalize", params="")
    def str_capitalize(self):
        return StrDefault.register(pandas.Series.str.capitalize)(self)

    @doc_utils.doc_str_method(
        refer_to="center",
        params="""
        width : int
        fillchar : str, default: ' '""",
    )
    def str_center(self, width, fillchar=" "):
        return StrDefault.register(pandas.Series.str.center)(self, width, fillchar)

    @doc_utils.doc_str_method(
        refer_to="contains",
        params="""
        pat : str
        case : bool, default: True
        flags : int, default: 0
        na : object, default: None
        regex : bool, default: True""",
    )
    def str_contains(self, pat, case=True, flags=0, na=None, regex=True):
        return StrDefault.register(pandas.Series.str.contains)(
            self, pat, case, flags, na, regex
        )

    @doc_utils.doc_str_method(
        refer_to="count",
        params="""
        pat : str
        flags : int, default: 0""",
    )
    def str_count(self, pat, flags=0):
        return StrDefault.register(pandas.Series.str.count)(self, pat, flags)

    @doc_utils.doc_str_method(
        refer_to="endswith",
        params="""
        pat : str
        na : object, default: None""",
    )
    def str_endswith(self, pat, na=None):
        return StrDefault.register(pandas.Series.str.endswith)(self, pat, na)

    @doc_utils.doc_str_method(
        refer_to="find",
        params="""
        sub : str
        start : int, default: 0
        end : int, optional""",
    )
    def str_find(self, sub, start=0, end=None):
        return StrDefault.register(pandas.Series.str.find)(self, sub, start, end)

    @doc_utils.doc_str_method(
        refer_to="findall",
        params="""
        pat : str
        flags : int, default: 0""",
    )
    def str_findall(self, pat, flags=0):
        return StrDefault.register(pandas.Series.str.findall)(self, pat, flags)

    @doc_utils.doc_str_method(
        refer_to="fullmatch",
        params="""
        pat : str
        case : bool, default: True
        flags : int, default: 0
        na : object, default: None""",
    )
    def str_fullmatch(self, pat, case=True, flags=0, na=None):
        return StrDefault.register(pandas.Series.str.fullmatch)(
            self, pat, case, flags, na
        )

    @doc_utils.doc_str_method(refer_to="get", params="i : int")
    def str_get(self, i):
        return StrDefault.register(pandas.Series.str.get)(self, i)

    @doc_utils.doc_str_method(refer_to="get_dummies", params="sep : str")
    def str_get_dummies(self, sep):
        return StrDefault.register(pandas.Series.str.get_dummies)(self, sep)

    @doc_utils.doc_str_method(
        refer_to="index",
        params="""
        sub : str
        start : int, default: 0
        end : int, optional""",
    )
    def str_index(self, sub, start=0, end=None):
        return StrDefault.register(pandas.Series.str.index)(self, sub, start, end)

    @doc_utils.doc_str_method(refer_to="isalnum", params="")
    def str_isalnum(self):
        return StrDefault.register(pandas.Series.str.isalnum)(self)

    @doc_utils.doc_str_method(refer_to="isalpha", params="")
    def str_isalpha(self):
        return StrDefault.register(pandas.Series.str.isalpha)(self)

    @doc_utils.doc_str_method(refer_to="isdecimal", params="")
    def str_isdecimal(self):
        return StrDefault.register(pandas.Series.str.isdecimal)(self)

    @doc_utils.doc_str_method(refer_to="isdigit", params="")
    def str_isdigit(self):
        return StrDefault.register(pandas.Series.str.isdigit)(self)

    @doc_utils.doc_str_method(refer_to="islower", params="")
    def str_islower(self):
        return StrDefault.register(pandas.Series.str.islower)(self)

    @doc_utils.doc_str_method(refer_to="isnumeric", params="")
    def str_isnumeric(self):
        return StrDefault.register(pandas.Series.str.isnumeric)(self)

    @doc_utils.doc_str_method(refer_to="isspace", params="")
    def str_isspace(self):
        return StrDefault.register(pandas.Series.str.isspace)(self)

    @doc_utils.doc_str_method(refer_to="istitle", params="")
    def str_istitle(self):
        return StrDefault.register(pandas.Series.str.istitle)(self)

    @doc_utils.doc_str_method(refer_to="isupper", params="")
    def str_isupper(self):
        return StrDefault.register(pandas.Series.str.isupper)(self)

    @doc_utils.doc_str_method(refer_to="join", params="sep : str")
    def str_join(self, sep):
        return StrDefault.register(pandas.Series.str.join)(self, sep)

    @doc_utils.doc_str_method(refer_to="len", params="")
    def str_len(self):
        return StrDefault.register(pandas.Series.str.len)(self)

    @doc_utils.doc_str_method(
        refer_to="ljust",
        params="""
        width : int
        fillchar : str, default: ' '""",
    )
    def str_ljust(self, width, fillchar=" "):
        return StrDefault.register(pandas.Series.str.ljust)(self, width, fillchar)

    @doc_utils.doc_str_method(refer_to="lower", params="")
    def str_lower(self):
        return StrDefault.register(pandas.Series.str.lower)(self)

    @doc_utils.doc_str_method(refer_to="lstrip", params="to_strip : str, optional")
    def str_lstrip(self, to_strip=None):
        return StrDefault.register(pandas.Series.str.lstrip)(self, to_strip)

    @doc_utils.doc_str_method(
        refer_to="match",
        params="""
        pat : str
        case : bool, default: True
        flags : int, default: 0
        na : object, default: None""",
    )
    def str_match(self, pat, case=True, flags=0, na=None):
        return StrDefault.register(pandas.Series.str.match)(self, pat, case, flags, na)

    @doc_utils.doc_str_method(
        refer_to="extract",
        params="""
        pat : str
        flags : int, default: 0
        expand : bool, default: True""",
    )
    def str_extract(self, pat, flags=0, expand=True):
        return StrDefault.register(pandas.Series.str.extract)(self, pat, flags, expand)

    @doc_utils.doc_str_method(
        refer_to="extractall",
        params="""
        pat : str
        flags : int, default: 0""",
    )
    def str_extractall(self, pat, flags=0):
        return StrDefault.register(pandas.Series.str.extractall)(self, pat, flags)

    @doc_utils.doc_str_method(
        refer_to="normalize", params="form : {'NFC', 'NFKC', 'NFD', 'NFKD'}"
    )
    def str_normalize(self, form):
        return StrDefault.register(pandas.Series.str.normalize)(self, form)

    @doc_utils.doc_str_method(
        refer_to="pad",
        params="""
        width : int
        side : {'left', 'right', 'both'}, default: 'left'
        fillchar : str, default: ' '""",
    )
    def str_pad(self, width, side="left", fillchar=" "):
        return StrDefault.register(pandas.Series.str.pad)(self, width, side, fillchar)

    @doc_utils.doc_str_method(
        refer_to="partition",
        params="""
        sep : str, default: ' '
        expand : bool, default: True""",
    )
    def str_partition(self, sep=" ", expand=True):
        return StrDefault.register(pandas.Series.str.partition)(self, sep, expand)

    @doc_utils.doc_str_method(refer_to="removeprefix", params="prefix : str")
    def str_removeprefix(self, prefix):
        return StrDefault.register(pandas.Series.str.removeprefix)(self, prefix)

    @doc_utils.doc_str_method(refer_to="removesuffix", params="suffix : str")
    def str_removesuffix(self, suffix):
        return StrDefault.register(pandas.Series.str.removesuffix)(self, suffix)

    @doc_utils.doc_str_method(refer_to="repeat", params="repeats : int")
    def str_repeat(self, repeats):
        return StrDefault.register(pandas.Series.str.repeat)(self, repeats)

    @doc_utils.doc_str_method(
        refer_to="replace",
        params="""
        pat : str
        repl : str or callable
        n : int, default: -1
        case : bool, optional
        flags : int, default: 0
        regex : bool, default: None""",
    )
    def str_replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):
        return StrDefault.register(pandas.Series.str.replace)(
            self, pat, repl, n, case, flags, regex
        )

    @doc_utils.doc_str_method(
        refer_to="rfind",
        params="""
        sub : str
        start : int, default: 0
        end : int, optional""",
    )
    def str_rfind(self, sub, start=0, end=None):
        return StrDefault.register(pandas.Series.str.rfind)(self, sub, start, end)

    @doc_utils.doc_str_method(
        refer_to="rindex",
        params="""
        sub : str
        start : int, default: 0
        end : int, optional""",
    )
    def str_rindex(self, sub, start=0, end=None):
        return StrDefault.register(pandas.Series.str.rindex)(self, sub, start, end)

    @doc_utils.doc_str_method(
        refer_to="rjust",
        params="""
        width : int
        fillchar : str, default: ' '""",
    )
    def str_rjust(self, width, fillchar=" "):
        return StrDefault.register(pandas.Series.str.rjust)(self, width, fillchar)

    @doc_utils.doc_str_method(
        refer_to="rpartition",
        params="""
        sep : str, default: ' '
        expand : bool, default: True""",
    )
    def str_rpartition(self, sep=" ", expand=True):
        return StrDefault.register(pandas.Series.str.rpartition)(self, sep, expand)

    @doc_utils.doc_str_method(
        refer_to="rsplit",
        params="""
        pat : str, optional
        n : int, default: -1
        expand : bool, default: False""",
    )
    def str_rsplit(self, pat=None, *, n=-1, expand=False):
        return StrDefault.register(pandas.Series.str.rsplit)(
            self, pat, n=n, expand=expand
        )

    @doc_utils.doc_str_method(refer_to="rstrip", params="to_strip : str, optional")
    def str_rstrip(self, to_strip=None):
        return StrDefault.register(pandas.Series.str.rstrip)(self, to_strip)

    @doc_utils.doc_str_method(
        refer_to="slice",
        params="""
        start : int, optional
        stop : int, optional
        step : int, optional""",
    )
    def str_slice(self, start=None, stop=None, step=None):
        return StrDefault.register(pandas.Series.str.slice)(self, start, stop, step)

    @doc_utils.doc_str_method(
        refer_to="slice_replace",
        params="""
        start : int, optional
        stop : int, optional
        repl : str or callable, optional""",
    )
    def str_slice_replace(self, start=None, stop=None, repl=None):
        return StrDefault.register(pandas.Series.str.slice_replace)(
            self, start, stop, repl
        )

    @doc_utils.doc_str_method(
        refer_to="split",
        params="""
        pat : str, optional
        n : int, default: -1
        expand : bool, default: False
        regex : bool, default: None""",
    )
    def str_split(self, pat=None, *, n=-1, expand=False, regex=None):
        return StrDefault.register(pandas.Series.str.split)(
            self, pat, n=n, expand=expand, regex=regex
        )

    @doc_utils.doc_str_method(
        refer_to="startswith",
        params="""
        pat : str
        na : object, default: None""",
    )
    def str_startswith(self, pat, na=None):
        return StrDefault.register(pandas.Series.str.startswith)(self, pat, na)

    @doc_utils.doc_str_method(refer_to="strip", params="to_strip : str, optional")
    def str_strip(self, to_strip=None):
        return StrDefault.register(pandas.Series.str.strip)(self, to_strip)

    @doc_utils.doc_str_method(refer_to="swapcase", params="")
    def str_swapcase(self):
        return StrDefault.register(pandas.Series.str.swapcase)(self)

    @doc_utils.doc_str_method(refer_to="title", params="")
    def str_title(self):
        return StrDefault.register(pandas.Series.str.title)(self)

    @doc_utils.doc_str_method(refer_to="translate", params="table : dict")
    def str_translate(self, table):
        return StrDefault.register(pandas.Series.str.translate)(self, table)

    @doc_utils.doc_str_method(refer_to="upper", params="")
    def str_upper(self):
        return StrDefault.register(pandas.Series.str.upper)(self)

    @doc_utils.doc_str_method(
        refer_to="wrap",
        params="""
        width : int
        **kwargs : dict""",
    )
    def str_wrap(self, width, **kwargs):
        return StrDefault.register(pandas.Series.str.wrap)(self, width, **kwargs)

    @doc_utils.doc_str_method(refer_to="zfill", params="width : int")
    def str_zfill(self, width):
        return StrDefault.register(pandas.Series.str.zfill)(self, width)

    @doc_utils.doc_str_method(refer_to="__getitem__", params="key : object")
    def str___getitem__(self, key):
        return StrDefault.register(pandas.Series.str.__getitem__)(self, key)

    @doc_utils.doc_str_method(
        refer_to="encode",
        params="""
            encoding : str,
            errors : str, default = 'strict'""",
    )
    def str_encode(self, encoding, errors):
        return StrDefault.register(pandas.Series.str.encode)(self, encoding, errors)

    @doc_utils.doc_str_method(
        refer_to="decode",
        params="""
                encoding : str,
                errors : str, default = 'strict'
                dtype : str or dtype, optional""",
    )
    def str_decode(self, encoding, errors, dtype):
        return StrDefault.register(pandas.Series.str.decode)(
            self, encoding, errors, dtype
        )

    @doc_utils.doc_str_method(
        refer_to="cat",
        params="""
            others : Series, Index, DataFrame, np.ndarray or list-like,
            sep : str, default: '',
            na_rep : str or None, default: None,
            join : {'left', 'right', 'outer', 'inner'}, default: 'left'""",
    )
    def str_cat(self, others, sep=None, na_rep=None, join="left"):
        return StrDefault.register(pandas.Series.str.cat)(
            self, others, sep, na_rep, join
        )

    @doc_utils.doc_str_method(
        refer_to="casefold",
        params="",
    )
    def str_casefold(self):
        return StrDefault.register(pandas.Series.str.casefold)(self)

    # End of Str methods

    # Rolling methods

    # FIXME: most of the rolling/window methods take *args and **kwargs parameters
    # which are only needed for the compatibility with numpy, this behavior is inherited
    # from the API level, we should get rid of it (Modin issue #3108).

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="the result of passed functions",
        action="apply specified functions",
        refer_to="aggregate",
        params="""
        func : str, dict, callable(pandas.Series) -> scalar, or list of such
        *args : iterable
        **kwargs : dict""",
        build_rules="udf_aggregation",
    )
    def rolling_aggregate(self, fold_axis, rolling_kwargs, func, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.aggregate)(
            self, rolling_kwargs, func, *args, **kwargs
        )

    # FIXME: at the query compiler method `rolling_apply` is an alias for `rolling_aggregate`,
    # one of these should be removed (Modin issue #3107).
    @doc_utils.add_deprecation_warning(replacement_method="rolling_aggregate")
    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="the result of passed function",
        action="apply specified function",
        refer_to="apply",
        params="""
        func : callable(pandas.Series) -> scalar
        raw : bool, default: False
        engine : None, default: None
            This parameters serves the compatibility purpose. Always has to be None.
        engine_kwargs : None, default: None
            This parameters serves the compatibility purpose. Always has to be None.
        args : tuple, optional
        kwargs : dict, optional""",
        build_rules="udf_aggregation",
    )
    def rolling_apply(
        self,
        fold_axis,
        rolling_kwargs,
        func,
        raw=False,
        engine=None,
        engine_kwargs=None,
        args=None,
        kwargs=None,
    ):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.apply)(
            self, rolling_kwargs, func, raw, engine, engine_kwargs, args, kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="correlation",
        refer_to="corr",
        params="""
        other : modin.pandas.Series, modin.pandas.DataFrame, list-like, optional
        pairwise : bool, optional
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_corr(
        self, fold_axis, rolling_kwargs, other=None, pairwise=None, *args, **kwargs
    ):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.corr)(
            self, rolling_kwargs, other, pairwise, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling", result="number of non-NA values", refer_to="count"
    )
    def rolling_count(self, fold_axis, rolling_kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.count)(
            self, rolling_kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="covariance",
        refer_to="cov",
        params="""
        other : modin.pandas.Series, modin.pandas.DataFrame, list-like, optional
        pairwise : bool, optional
        ddof : int, default:  1
        **kwargs : dict""",
    )
    def rolling_cov(
        self, fold_axis, rolling_kwargs, other=None, pairwise=None, ddof=1, **kwargs
    ):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.cov)(
            self, rolling_kwargs, other, pairwise, ddof, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="unbiased kurtosis",
        refer_to="kurt",
        params="**kwargs : dict",
    )
    def rolling_kurt(self, fold_axis, rolling_kwargs, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.kurt)(
            self, rolling_kwargs, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="maximum value",
        refer_to="max",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_max(self, fold_axis, rolling_kwargs, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.max)(
            self, rolling_kwargs, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="mean value",
        refer_to="mean",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_mean(self, fold_axis, rolling_kwargs, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.mean)(
            self, rolling_kwargs, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="median value",
        refer_to="median",
        params="**kwargs : dict",
    )
    def rolling_median(self, fold_axis, rolling_kwargs, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.median)(
            self, rolling_kwargs, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="minimum value",
        refer_to="min",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_min(self, fold_axis, rolling_kwargs, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.min)(
            self, rolling_kwargs, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="quantile",
        refer_to="quantile",
        params="""
        quantile : float
        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}, default: 'linear'
        **kwargs : dict""",
    )
    def rolling_quantile(
        self, fold_axis, rolling_kwargs, quantile, interpolation="linear", **kwargs
    ):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.quantile)(
            self, rolling_kwargs, quantile, interpolation, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="unbiased skewness",
        refer_to="skew",
        params="**kwargs : dict",
    )
    def rolling_skew(self, fold_axis, rolling_kwargs, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.skew)(
            self, rolling_kwargs, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="standard deviation",
        refer_to="std",
        params="""
        ddof : int, default: 1
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_std(self, fold_axis, rolling_kwargs, ddof=1, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.std)(
            self, rolling_kwargs, ddof, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="sum",
        refer_to="sum",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_sum(self, fold_axis, rolling_kwargs, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.sum)(
            self, rolling_kwargs, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="sem",
        refer_to="sem",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_sem(self, fold_axis, rolling_kwargs, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.sem)(
            self, rolling_kwargs, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="variance",
        refer_to="var",
        params="""
        ddof : int, default: 1
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_var(self, fold_axis, rolling_kwargs, ddof=1, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.var)(
            self, rolling_kwargs, ddof, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        result="rank",
        refer_to="rank",
        params="""
        method : {'average', 'min', 'max'}, default: 'average'
        ascending : bool, default: True
        pct : bool, default: False
        numeric_only : bool, default: False
        *args : iterable
        **kwargs : dict""",
    )
    def rolling_rank(
        self,
        fold_axis,
        rolling_kwargs,
        method="average",
        ascending=True,
        pct=False,
        numeric_only=False,
        *args,
        **kwargs,
    ):
        return RollingDefault.register(pandas.core.window.rolling.Rolling.rank)(
            self,
            rolling_kwargs,
            method=method,
            ascending=ascending,
            pct=pct,
            numeric_only=numeric_only,
            *args,
            **kwargs,
        )

    # End of Rolling methods

    # Begin Expanding methods

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="the result of passed functions",
        action="apply specified functions",
        refer_to="aggregate",
        win_type="expanding window",
        params="""
        func : str, dict, callable(pandas.Series) -> scalar, or list of such
        *args : iterable
        **kwargs : dict""",
        build_rules="udf_aggregation",
    )
    def expanding_aggregate(self, fold_axis, expanding_args, func, *args, **kwargs):
        return ExpandingDefault.register(
            pandas.core.window.expanding.Expanding.aggregate
        )(self, expanding_args, func, *args, **kwargs)

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="sum",
        refer_to="sum",
        win_type="expanding window",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_sum(self, fold_axis, expanding_args, *args, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.sum)(
            self, expanding_args, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="minimum value",
        refer_to="min",
        win_type="expanding window",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_min(self, fold_axis, expanding_args, *args, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.min)(
            self, expanding_args, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="maximum value",
        refer_to="max",
        win_type="expanding window",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_max(self, fold_axis, expanding_args, *args, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.max)(
            self, expanding_args, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="mean value",
        refer_to="mean",
        win_type="expanding window",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_mean(self, fold_axis, expanding_args, *args, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.mean)(
            self, expanding_args, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="median",
        refer_to="median",
        win_type="expanding window",
        params="""
        numeric_only : bool, default: False
        engine : Optional[str], default: None
        engine_kwargs : Optional[dict], default: None
        **kwargs : dict""",
    )
    def expanding_median(
        self,
        fold_axis,
        expanding_args,
        numeric_only=False,
        engine=None,
        engine_kwargs=None,
        **kwargs,
    ):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.median)(
            self,
            expanding_args,
            numeric_only=numeric_only,
            engine=engine,
            engine_kwargs=engine_kwargs,
            **kwargs,
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="variance",
        refer_to="var",
        win_type="expanding window",
        params="""
        ddof : int, default: 1
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_var(self, fold_axis, expanding_args, ddof=1, *args, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.var)(
            self, expanding_args, ddof=ddof, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="standard deviation",
        refer_to="std",
        win_type="expanding window",
        params="""
        ddof : int, default: 1
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_std(self, fold_axis, expanding_args, ddof=1, *args, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.std)(
            self, expanding_args, ddof=ddof, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="correlation",
        refer_to="corr",
        win_type="expanding window",
        params="""
        squeeze_self : bool
        squeeze_other : bool
        other : pandas.Series or pandas.DataFrame, default: None
        pairwise : bool | None, default: None
        ddof : int, default: 1
        numeric_only : bool, default: False
        **kwargs : dict""",
    )
    def expanding_corr(
        self,
        fold_axis,
        expanding_args,
        squeeze_self,
        squeeze_other,
        other=None,
        pairwise=None,
        ddof=1,
        numeric_only=False,
        **kwargs,
    ):
        other_for_default = (
            other
            if other is None
            else (
                other.to_pandas().squeeze(axis=1)
                if squeeze_other
                else other.to_pandas()
            )
        )
        return ExpandingDefault.register(
            pandas.core.window.expanding.Expanding.corr,
            squeeze_self=squeeze_self,
        )(
            self,
            expanding_args,
            other=other_for_default,
            pairwise=pairwise,
            ddof=ddof,
            numeric_only=numeric_only,
            **kwargs,
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="sample covariance",
        refer_to="cov",
        win_type="expanding window",
        params="""
        squeeze_self : bool
        squeeze_other : bool
        other : pandas.Series or pandas.DataFrame, default: None
        pairwise : bool | None, default: None
        ddof : int, default: 1
        numeric_only : bool, default: False
        **kwargs : dict""",
    )
    def expanding_cov(
        self,
        fold_axis,
        expanding_args,
        squeeze_self,
        squeeze_other,
        other=None,
        pairwise=None,
        ddof=1,
        numeric_only=False,
        **kwargs,
    ):
        other_for_default = (
            other
            if other is None
            else (
                other.to_pandas().squeeze(axis=1)
                if squeeze_other
                else other.to_pandas()
            )
        )
        return ExpandingDefault.register(
            pandas.core.window.expanding.Expanding.cov,
            squeeze_self=squeeze_self,
        )(
            self,
            expanding_args,
            other=other_for_default,
            pairwise=pairwise,
            ddof=ddof,
            numeric_only=numeric_only,
            **kwargs,
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="standard deviation",
        refer_to="std",
        win_type="expanding window",
        params="""
        ddof : int, default: 1
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_count(self, fold_axis, expanding_args, ddof=1, *args, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.count)(
            self, expanding_args, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="quantile",
        refer_to="quantile",
        win_type="expanding window",
        params="""
        quantile : float
        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}, default: 'linear'
        **kwargs : dict""",
    )
    def expanding_quantile(
        self, fold_axis, expanding_args, quantile, interpolation, **kwargs
    ):
        return ExpandingDefault.register(
            pandas.core.window.expanding.Expanding.quantile
        )(self, expanding_args, quantile, interpolation, **kwargs)

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="unbiased standard error mean",
        refer_to="std",
        win_type="expanding window",
        params="""
        ddof : int, default: 1
        numeric_only : bool, default: False
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_sem(
        self, fold_axis, expanding_args, ddof=1, numeric_only=False, *args, **kwargs
    ):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.sem)(
            self, expanding_args, ddof=ddof, numeric_only=numeric_only, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="unbiased skewness",
        refer_to="skew",
        win_type="expanding window",
        params="""
        numeric_only : bool, default: False
        **kwargs : dict""",
    )
    def expanding_skew(self, fold_axis, expanding_args, numeric_only=False, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.skew)(
            self, expanding_args, numeric_only=numeric_only, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="Fisher’s definition of kurtosis without bias",
        refer_to="kurt",
        win_type="expanding window",
        params="""
        numeric_only : bool, default: False
        **kwargs : dict""",
    )
    def expanding_kurt(self, fold_axis, expanding_args, numeric_only=False, **kwargs):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.kurt)(
            self, expanding_args, numeric_only=numeric_only, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Expanding",
        result="rank",
        refer_to="rank",
        win_type="expanding window",
        params="""
        method : {'average', 'min', 'max'}, default: 'average'
        ascending : bool, default: True
        pct : bool, default: False
        numeric_only : bool, default: False
        *args : iterable
        **kwargs : dict""",
    )
    def expanding_rank(
        self,
        fold_axis,
        expanding_args,
        method="average",
        ascending=True,
        pct=False,
        numeric_only=False,
        *args,
        **kwargs,
    ):
        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.rank)(
            self,
            expanding_args,
            method=method,
            ascending=ascending,
            pct=pct,
            numeric_only=numeric_only,
            *args,
            **kwargs,
        )

    # End of Expanding methods

    # Window methods

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        win_type="window of the specified type",
        result="mean",
        refer_to="mean",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def window_mean(self, fold_axis, window_kwargs, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.Window.mean)(
            self, window_kwargs, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        win_type="window of the specified type",
        result="standard deviation",
        refer_to="std",
        params="""
        ddof : int, default: 1
        *args : iterable
        **kwargs : dict""",
    )
    def window_std(self, fold_axis, window_kwargs, ddof=1, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.Window.std)(
            self, window_kwargs, ddof, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        win_type="window of the specified type",
        result="sum",
        refer_to="sum",
        params="""
        *args : iterable
        **kwargs : dict""",
    )
    def window_sum(self, fold_axis, window_kwargs, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.Window.sum)(
            self, window_kwargs, *args, **kwargs
        )

    @doc_utils.doc_window_method(
        window_cls_name="Rolling",
        win_type="window of the specified type",
        result="variance",
        refer_to="var",
        params="""
        ddof : int, default: 1
        *args : iterable
        **kwargs : dict""",
    )
    def window_var(self, fold_axis, window_kwargs, ddof=1, *args, **kwargs):
        return RollingDefault.register(pandas.core.window.Window.var)(
            self, window_kwargs, ddof, *args, **kwargs
        )

    # End of Window methods

    # Categories methods

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.cat.codes")
    def cat_codes(self):
        """
        Convert underlying categories data into its codes.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing the integer codes of the underlying
            categories.
        """
        return CatDefault.register(pandas.Series.cat.codes)(self)

    # End of Categories methods

    # List accessor's methods

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.list.flatten")
    def list_flatten(self):
        """
        Flatten list values.

        Returns
        -------
        BaseQueryCompiler
        """
        return ListDefault.register(pandas.Series.list.flatten)(self)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.list.len")
    def list_len(self):
        """
        Return the length of each list in the Series.

        Returns
        -------
        BaseQueryCompiler
        """
        return ListDefault.register(pandas.Series.list.len)(self)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.list.__getitem__")
    def list__getitem__(self, key):  # noqa: PR01
        """
        Index or slice lists in the Series.

        Returns
        -------
        BaseQueryCompiler
        """
        return ListDefault.register(pandas.Series.list.__getitem__)(self, key=key)

    # End of List accessor's methods

    # Struct accessor's methods

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.struct.dtypes")
    def struct_dtypes(self):
        """
        Return the dtype object of each child field of the struct.

        Returns
        -------
        BaseQueryCompiler
        """
        return StructDefault.register(pandas.Series.struct.dtypes)(self)

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.struct.field")
    def struct_field(self, name_or_index):  # noqa: PR01
        """
        Extract a child field of a struct as a Series.

        Returns
        -------
        BaseQueryCompiler
        """
        return StructDefault.register(pandas.Series.struct.field)(
            self, name_or_index=name_or_index
        )

    @doc_utils.add_one_column_warning
    @doc_utils.add_refer_to("Series.struct.explode")
    def struct_explode(self):
        """
        Extract all child fields of a struct as a DataFrame.

        Returns
        -------
        BaseQueryCompiler
        """
        return StructDefault.register(pandas.Series.struct.explode)(self)

    # End of Struct accessor's methods

    # DataFrame methods

    def invert(self):
        """
        Apply bitwise inversion for each element of the QueryCompiler.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing bitwise inversion for each value.
        """
        return DataFrameDefault.register(pandas.DataFrame.__invert__)(self)

    @doc_utils.doc_reduce_agg(
        method="unbiased kurtosis", refer_to="kurt", extra_params=["skipna", "**kwargs"]
    )
    def kurt(self, axis, numeric_only=False, skipna=True, **kwargs):
        return DataFrameDefault.register(pandas.DataFrame.kurt)(
            self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
        )

    sum_min_count = sum
    prod_min_count = prod

    @doc_utils.add_refer_to("DataFrame.compare")
    def compare(self, other, align_axis, keep_shape, keep_equal, result_names):
        """
        Compare data of two QueryCompilers and highlight the difference.

        Parameters
        ----------
        other : BaseQueryCompiler
            Query compiler to compare with. Have to be the same shape and the same
            labeling as `self`.
        align_axis : {0, 1}
        keep_shape : bool
        keep_equal : bool
        result_names : tuple

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler containing the differences between `self` and passed
            query compiler.
        """
        return DataFrameDefault.register(pandas.DataFrame.compare)(
            self,
            other=other,
            align_axis=align_axis,
            keep_shape=keep_shape,
            keep_equal=keep_equal,
            result_names=result_names,
        )

    @doc_utils.add_refer_to("Series.case_when")
    def case_when(self, caselist):  # noqa: PR01, RT01, D200
        """
        Replace values where the conditions are True.
        """
        # A workaround for https://github.com/modin-project/modin/issues/7041
        qc_type = type(self)
        caselist = [
            tuple(
                data.to_pandas().squeeze(axis=1) if isinstance(data, qc_type) else data
                for data in case_tuple
            )
            for case_tuple in caselist
        ]
        return SeriesDefault.register(pandas.Series.case_when)(self, caselist=caselist)

    def get_pandas_backend(self) -> Optional[str]:
        """
        Get backend stored in `_modin_frame`.

        Returns
        -------
        str | None
            Backend name.
        """
        return self._modin_frame._pandas_backend

    def repartition(self, axis=None):
        """
        Repartitioning QueryCompiler objects to get ideal partitions inside.

        Allows to improve performance where the query compiler can't improve
        yet by doing implicit repartitioning.

        Parameters
        ----------
        axis : {0, 1, None}, optional
            The axis along which the repartitioning occurs.
            `None` is used for repartitioning along both axes.

        Returns
        -------
        BaseQueryCompiler
            The repartitioned BaseQueryCompiler.
        """
        axes = [0, 1] if axis is None else [axis]

        new_query_compiler = self
        for _ax in axes:
            new_query_compiler = new_query_compiler.__constructor__(
                new_query_compiler._modin_frame.apply_full_axis(
                    _ax,
                    lambda df: df,
                    new_index=self._modin_frame.copy_index_cache(copy_lengths=_ax == 1),
                    new_columns=self._modin_frame.copy_columns_cache(
                        copy_lengths=_ax == 0
                    ),
                    dtypes=self._modin_frame.copy_dtypes_cache(),
                    keep_partitioning=False,
                    sync_labels=False,
                )
            )
        return new_query_compiler

    # End of DataFrame methods


================================================
FILE: modin/core/storage_formats/base/query_compiler_calculator.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains ``BackendCostCalculator`` class.

``BackendCostCalculator`` is used to determine the casting cost
between a set of different backends. It aggregates the cost across
all query compilers to determine the best query compiler to use.
"""

import random
from types import MappingProxyType
from typing import Any, Optional

from modin.config import Backend, BackendJoinConsiderAllBackends
from modin.core.storage_formats.base.query_compiler import (
    BaseQueryCompiler,
    QCCoercionCost,
)
from modin.logging import get_logger
from modin.logging.metrics import emit_metric


def all_switchable_backends() -> list[str]:
    """
    Return a list of all currently active backends that are candidates for switching.

    Returns
    -------
    list
        A list of valid backends.
    """
    return list(
        filter(
            # Disable automatically switching to these engines for now, because
            # 1) _get_prepared_factory_for_backend() currently calls
            # _initialize_engine(), which starts up the ray/dask/unidist
            #  processes
            # 2) we can't decide to switch to unidist in the middle of execution.
            lambda backend: backend not in ("Ray", "Unidist", "Dask"),
            Backend.get_active_backends(),
        )
    )


class AggregatedBackendData:
    """
    Contains information on Backends considered for computation.

    Parameters
    ----------
    backend : str
        String representing the backend name.
    qc_cls : type[QueryCompiler]
        The query compiler sub-class for this backend.
    """

    def __init__(self, backend: str, qc_cls: type[BaseQueryCompiler]):
        self.backend = backend
        self.qc_cls = qc_cls
        self.cost = 0
        self.max_cost = qc_cls.max_cost()


class BackendCostCalculator:
    """
    Calculate which Backend should be used for an operation.

    Given a set of QueryCompilers containing various data, determine
    which query compiler's backend would minimize the cost of casting
    or coercion. Use the aggregate sum of coercion to determine overall
    cost.

    Parameters
    ----------
    operation_arguments : MappingProxyType[str, Any]
        Mapping from operation argument names to their values.
    api_cls_name : str or None
        Representing the class name of the function being called.
    operation : str representing the operation being performed
    query_compilers : list of query compiler arguments
    preop_switch : bool
        True if the operation is a pre-operation switch point.
    """

    def __init__(
        self,
        *,
        operation_arguments: MappingProxyType[str, Any],
        api_cls_name: Optional[str],
        operation: str,
        query_compilers: list[BaseQueryCompiler],
        preop_switch: bool,
    ):
        from modin.core.execution.dispatching.factories.dispatcher import (
            FactoryDispatcher,
        )

        self._qc_list: list[BaseQueryCompiler] = []
        self._result_backend = None
        self._api_cls_name = api_cls_name
        self._op = operation
        self._operation_arguments = operation_arguments
        self._backend_data = {}
        self._qc_list = query_compilers[:]
        for query_compiler in query_compilers:
            # If a QC's backend was not configured as active, we need to create an entry for it here.
            backend = query_compiler.get_backend()
            if backend not in self._backend_data:
                self._backend_data[backend] = AggregatedBackendData(
                    backend,
                    FactoryDispatcher._get_prepared_factory_for_backend(
                        backend=backend
                    ).io_cls.query_compiler_cls,
                )
        if preop_switch and BackendJoinConsiderAllBackends.get():
            # Initialize backend data for any backends not found among query compiler arguments.
            # Because we default to the first query compiler's backend if no cost information is available,
            # this initialization must occur after iterating over query compiler arguments to ensure
            # correct ordering in dictionary arguments.
            for backend in all_switchable_backends():
                if backend not in self._backend_data:
                    self._backend_data[backend] = AggregatedBackendData(
                        backend,
                        FactoryDispatcher._get_prepared_factory_for_backend(
                            backend=backend
                        ).io_cls.query_compiler_cls,
                    )

    def calculate(self) -> str:
        """
        Calculate which query compiler we should cast to.

        Switching calculation is performed as follows:
        - For every registered query compiler in qc_list, with backend `backend_from`, compute
          `self_cost = qc_from.stay_cost(...)` and add it to the total cost for `backend_from`.
          - For every valid target `backend_to`, compute `qc_from.move_to_cost(qc_cls_to, ...)`. If it
            returns None, instead compute `qc_cls_to.move_to_me_cost(qc_from, ...)`. Add the result
            to the cost for `backend_to`.
        At a high level, the cost for choosing a particular backend is the sum of
            (all stay costs for data already on that backend)
            + (cost of moving all other query compilers to this backend)

        If the operation is a registered pre-operation switch point, then the list of target backends
        is ALL active backends. Otherwise, only backends found among the arguments are considered.
        Post-operation switch points are not yet supported.

        If the arguments contain no query compilers for a particular backend, then there are no stay
        costs. In this scenario, we expect the move_to cost for this backend to outweigh the corresponding
        stay costs for each query compiler's original backend.

        If no argument QCs have cost information for each other (that is, move_to_cost and move_to_me_cost
        returns None), then we attempt to move all data to the backend of the first QC.

        We considered a few alternative algorithms for switching calculation:

        1. Instead of considering all active backends, consider only backends found among input QCs.
        This was used in the calculator's original implementation, as we figured transfer cost to
        unrelated backends would outweigh any possible gains in computation speed. However, certain
        pathological cases that significantly changed the size of input or output data (e.g. cross join)
        would create situations where transferring data after the computation became prohibitively
        expensive, so we chose to allow switching to unrelated backends.
        Additionally, the original implementation had a bug where stay_cost was only computed for the
        _first_ query compiler of each backend, thus under-reporting the cost of computation for any
        backend with multiple QCs present. In practice this very rarely affected the chosen result.
        2. Compute stay/move costs only once for each backend pair, but force QCs to consider other
        arguments when calculating.
        This approach is the most robust and accurate for cases like cross join, where a product of
        transfer costs between backends is more reflective of cost than size. This approach requires
        more work in the query compiler, as each QC must be aware of when multiple QC arguments are
        passed and adjust the cost computation accordingly. It is also unclear how often this would
        make a meaningful difference compared to the summation approach.

        Returns
        -------
        str
            A string representing a backend.

        Raises
        ------
        ValueError
            Raises ValueError when the reported transfer cost for every backend exceeds its maximum cost.
        """
        if self._result_backend is not None:
            return self._result_backend
        if len(self._qc_list) == 1:
            return self._qc_list[0].get_backend()
        if len(self._qc_list) == 0:
            raise ValueError("No query compilers registered")
        # See docstring for explanation of switching decision algorithm.
        for qc_from in self._qc_list:
            # Add self cost for the current query compiler
            self_cost = qc_from.stay_cost(
                self._api_cls_name, self._op, self._operation_arguments
            )
            backend_from = qc_from.get_backend()
            if self_cost is not None:
                self._add_cost_data(backend_from, self_cost)

            for backend_to, agg_data_to in self._backend_data.items():
                if backend_to == backend_from:
                    continue
                qc_cls_to = agg_data_to.qc_cls
                cost = qc_from.move_to_cost(
                    qc_cls_to,
                    self._api_cls_name,
                    self._op,
                    self._operation_arguments,
                )
                if cost is not None:
                    self._add_cost_data(backend_to, cost)
                else:
                    # We have some information asymmetry in query compilers,
                    # qc_from does not know about qc_to types so we instead
                    # ask the same question but of qc_to.
                    cost = qc_cls_to.move_to_me_cost(
                        qc_from,
                        self._api_cls_name,
                        self._op,
                        self._operation_arguments,
                    )
                    if cost is not None:
                        self._add_cost_data(backend_to, cost)

        self._result_backend = None

        def get_min_cost_backend(skip_exceeds_max_cost=True) -> str:
            result = None
            min_value = None
            for k, v in self._backend_data.items():
                if skip_exceeds_max_cost and v.cost > v.max_cost:
                    continue
                if min_value is None or min_value > v.cost:
                    min_value = v.cost
                    result = k
            return result

        # Get the best backend, skipping backends where we may exceed
        # the total cost
        self._result_backend = get_min_cost_backend(skip_exceeds_max_cost=True)

        # If we still do not have a backend, pick the best backend while
        # ignoring max_cost
        if self._result_backend is None:
            self._result_backend = get_min_cost_backend(skip_exceeds_max_cost=False)

        # This should not happen
        if self._result_backend is None:
            raise ValueError("No backends are available to calculate costs.")

        if len(self._backend_data) > 1:
            get_logger().info(
                f"BackendCostCalculator results for {'pd' if self._api_cls_name is None else self._api_cls_name}.{self._op}: {self._calc_result_log(self._result_backend)}"
            )
            # Does not need to be secure, should not use system entropy
            metrics_group = "%04x" % random.randrange(16**4)
            for qc in self._qc_list:
                max_shape = qc._max_shape()
                backend = qc.get_backend()
                emit_metric(
                    f"hybrid.merge.candidate.{backend}.group.{metrics_group}.rows",
                    max_shape[0],
                )
                emit_metric(
                    f"hybrid.merge.candidate.{backend}.group.{metrics_group}.cols",
                    max_shape[1],
                )
            for k, v in self._backend_data.items():
                emit_metric(
                    f"hybrid.merge.candidate.{k}.group.{metrics_group}.cost", v.cost
                )
            emit_metric(
                f"hybrid.merge.decision.{self._result_backend}.group.{metrics_group}",
                1,
            )

        return self._result_backend

    def _add_cost_data(self, backend, cost):
        """
        Add the cost data to the calculator.

        Parameters
        ----------
        backend : str
            String representing the backend for this engine.
        cost : dict
            Dictionary of query compiler classes to costs.
        """
        # We can assume that if we call this method, backend
        # exists in the backend_data map
        QCCoercionCost.validate_coersion_cost(cost)
        self._backend_data[backend].cost += cost

    def _calc_result_log(self, selected_backend: str) -> str:
        """
        Create a string summary of the backend costs.

        The format is
            [*|][backend name]:[cost]/[max_cost],...
        where '*' indicates this was the selected backend
        and [cost]/[max_cost] represents the aggregated
        cost of moving to that backend over the maximum
        cost allowed on that backend.

        Parameters
        ----------
        selected_backend : str
            String representing the backend selected by
            the calculator.

        Returns
        -------
        str
            String representation of calculator state.
        """
        return ", ".join(
            f"{'*'+k if k is selected_backend else k}:{v.cost}/{v.max_cost}"
            for k, v in self._backend_data.items()
        )


================================================
FILE: modin/core/storage_formats/pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module represents the query compiler level for the pandas storage format."""

from .query_compiler import PandasQueryCompiler

__all__ = ["PandasQueryCompiler"]


================================================
FILE: modin/core/storage_formats/pandas/aggregations.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Contains implementations for aggregation functions."""

from __future__ import annotations

from enum import Enum
from typing import TYPE_CHECKING, Callable, Tuple

import numpy as np
import pandas
from pandas.core.dtypes.common import is_numeric_dtype

if TYPE_CHECKING:
    from .query_compiler import PandasQueryCompiler

from modin.utils import MODIN_UNNAMED_SERIES_LABEL


class CorrCovBuilder:
    """Responsible for building pandas query compiler's methods computing correlation and covariance matrices."""

    class Method(Enum):
        """Enum specifying what method to use (either CORR for correlation or COV for covariance)."""

        CORR = 1
        COV = 2

    @classmethod
    def build_corr_method(
        cls,
    ) -> Callable[[PandasQueryCompiler, str, int, bool], PandasQueryCompiler]:
        """
        Build a query compiler method computing the correlation matrix.

        Returns
        -------
        callable(qc: PandasQueryCompiler, method: str, min_periods: int, numeric_only: bool) -> PandasQueryCompiler
            A callable matching the ``BaseQueryCompiler.corr`` signature and computing the correlation matrix.
        """

        def corr_method(
            qc: PandasQueryCompiler,
            method: str,
            min_periods: int = 1,
            numeric_only: bool = True,
        ) -> PandasQueryCompiler:
            # Further implementation is designed for the default pandas backend (numpy)
            if method != "pearson" or qc.get_pandas_backend() == "pyarrow":
                return super(type(qc), qc).corr(
                    method=method, min_periods=min_periods, numeric_only=numeric_only
                )

            if not numeric_only and qc.frame_has_materialized_columns:
                new_index, new_columns = (
                    qc._modin_frame.copy_columns_cache(),
                    qc._modin_frame.copy_columns_cache(),
                )
                new_dtypes = pandas.Series(
                    np.repeat(pandas.api.types.pandas_dtype("float"), len(new_columns)),
                    index=new_columns,
                )
            elif numeric_only and qc.frame_has_materialized_dtypes:
                old_dtypes = qc.dtypes

                new_columns = old_dtypes[old_dtypes.map(is_numeric_dtype)].index
                new_index = new_columns.copy()
                new_dtypes = pandas.Series(
                    np.repeat(pandas.api.types.pandas_dtype("float"), len(new_columns)),
                    index=new_columns,
                )
            else:
                new_index, new_columns, new_dtypes = None, None, None

            map, reduce = cls._build_map_reduce_methods(
                min_periods, method=cls.Method.CORR, numeric_only=numeric_only
            )

            reduced = qc._modin_frame.apply_full_axis(axis=1, func=map)
            # The 'reduced' dataset has the shape either (num_cols, num_cols + 3) for a non-NaN case
            # or (num_cols, num_cols * 4) for a NaN case, so it's acceptable to call `.combine_and_apply()`
            # here as the number of cols is usually quite small
            result = reduced.combine_and_apply(
                func=reduce,
                new_index=new_index,
                new_columns=new_columns,
                new_dtypes=new_dtypes,
            )
            return qc.__constructor__(result)

        return corr_method

    @classmethod
    def build_cov_method(
        cls,
    ) -> Callable[[PandasQueryCompiler, int, int], PandasQueryCompiler]:
        """
        Build a query compiler method computing the covariance matrix.

        Returns
        -------
        callable(qc: PandasQueryCompiler, min_periods: int, ddof: int) -> PandasQueryCompiler
            A callable matching the ``BaseQueryCompiler.cov`` signature and computing the covariance matrix.
        """
        raise NotImplementedError("Computing covariance is not yet implemented.")

    @classmethod
    def _build_map_reduce_methods(
        cls, min_periods: int, method: Method, numeric_only: bool
    ) -> Tuple[
        Callable[[pandas.DataFrame], pandas.DataFrame],
        Callable[[pandas.DataFrame], pandas.DataFrame],
    ]:
        """
        Build MapReduce kernels for the specified corr/cov method.

        Parameters
        ----------
        min_periods : int
            The parameter to pass to the reduce method.
        method : CorrCovBuilder.Method
            Whether the kernels compute correlation or covariance.
        numeric_only : bool
            Whether to only include numeric types.

        Returns
        -------
        Tuple[Callable(pandas.DataFrame) -> pandas.DataFrame, Callable(pandas.DataFrame) -> pandas.DataFrame]
            A tuple holding the Map (at the first position) and the Reduce (at the second position) kernels
            computing correlation/covariance matrix.
        """
        if method == cls.Method.COV:
            raise NotImplementedError("Computing covariance is not yet implemented.")

        return lambda df: _CorrCovKernels.map(
            df, numeric_only
        ), lambda df: _CorrCovKernels.reduce(df, min_periods, method)


class _CorrCovKernels:
    """Holds kernel functions computing correlation/covariance matrices in a MapReduce manner."""

    @classmethod
    def map(cls, df: pandas.DataFrame, numeric_only: bool) -> pandas.DataFrame:
        """
        Perform the Map phase to compute the corr/cov matrix.

        In this kernel we compute all the required components to compute
        the correlation matrix at the reduce phase, the required components are:
            1. Matrix holding sums of pairwise multiplications between all columns
               defined as ``M[col1, col2] = sum(col1[i] * col2[i] for i in range(col_len))``
            2. Sum for each column (special case if there are NaN values)
            3. Sum of squares for each column (special case if there are NaN values)
            4. Number of values in each column (special case if there are NaN values)

        Parameters
        ----------
        df : pandas.DataFrame
            Partition to compute the aggregations for.
        numeric_only : bool
            Whether to only include numeric types.

        Returns
        -------
        pandas.DataFrame
            A MultiIndex columned DataFrame holding the described aggregation results for this
            specifix partition under the following keys: ``["mul", "sum", "pow2_sum", "count"]``
        """
        if numeric_only:
            df = df.select_dtypes(include="number")
        # It's more convenient to use a NumPy array here as it appears to perform
        # much faster in for-loops which this kernel function has plenty of
        raw_df = df.values.T
        try:
            nan_mask = np.isnan(raw_df)
        except TypeError as e:
            # Pandas raises ValueError on unsupported types, so casting
            # the exception to a proper type
            raise ValueError("Unsupported types with 'numeric_only=False'") from e

        has_nans = nan_mask.sum() != 0

        if has_nans:
            if not raw_df.flags.writeable:
                # making a copy if the buffer is read-only
                raw_df = raw_df.copy()
            # Replacing all NaNs with zeros so we can use much
            # faster `np.sum()` instead of slow `np.nansum()`
            np.putmask(raw_df, nan_mask, values=0)

        cols = df.columns
        # Here we compute a sum of pairwise multiplications between all columns
        # result:
        #   col1: [sum(col1 * col2), sum(col1 * col3), ... sum(col1 * colN)]
        #   col2: [sum(col2 * col3), sum(col2 * col4), ... sum(col2 * colN)]
        #   ...
        sum_of_pairwise_mul = pandas.DataFrame(
            np.dot(raw_df, raw_df.T), index=cols, columns=cols, copy=False
        )

        if has_nans:
            sums, sums_of_squares, count = cls._compute_nan_aggs(raw_df, cols, nan_mask)
        else:
            sums, sums_of_squares, count = cls._compute_non_nan_aggs(df)

        aggregations = pandas.concat(
            [sum_of_pairwise_mul, sums, sums_of_squares, count],
            copy=False,
            axis=1,
            keys=["mul", "sum", "pow2_sum", "count"],
        )

        return aggregations

    @staticmethod
    def _compute_non_nan_aggs(
        df: pandas.DataFrame,
    ) -> Tuple[pandas.Series, pandas.Series, pandas.Series]:
        """
        Compute sums, sums of square and the number of observations for a partition assuming there are no NaN values in it.

        Parameters
        ----------
        df : pandas.DataFrame
            Partition to compute the aggregations for.

        Returns
        -------
        Tuple[sums: pandas.Series, sums_of_squares: pandas.Series, count: pandas.Series]
            A tuple storing Series where each of them holds the result for
            one of the described aggregations.
        """
        sums = df.sum().rename(MODIN_UNNAMED_SERIES_LABEL)
        sums_of_squares = (df**2).sum().rename(MODIN_UNNAMED_SERIES_LABEL)
        count = pandas.Series(
            np.repeat(len(df), len(df.columns)), index=df.columns, copy=False
        ).rename(MODIN_UNNAMED_SERIES_LABEL)
        return sums, sums_of_squares, count

    @staticmethod
    def _compute_nan_aggs(
        raw_df: np.ndarray, cols: pandas.Index, nan_mask: np.ndarray
    ) -> Tuple[pandas.DataFrame, pandas.DataFrame, pandas.DataFrame]:
        """
        Compute sums, sums of square and the number of observations for a partition assuming there are NaN values in it.

        Parameters
        ----------
        raw_df : np.ndarray
            Raw values of the partition to compute the aggregations for.
        cols : pandas.Index
            Columns of the partition.
        nan_mask : np.ndarray[bool]
            Boolean mask showing positions of NaN values in the `raw_df`.

        Returns
        -------
        Tuple[sums: pandas.DataFrame, sums_of_squares: pandas.DataFrame, count: pandas.DataFrame]
            A tuple storing DataFrames where each of them holds the result for
            one of the described aggregations.
        """
        # Unfortunately, in case of NaN values we forced to compute multiple sums/square sums/counts
        # for each column because we have to exclude values at positions of NaN values in each other
        # column individually.
        # Imagine we have a dataframe like this:
        #   col1: 1, 2  , 3  , 4
        #   col2: 2, NaN, 3  , 4
        #   col3: 4, 5  , NaN, 7
        # In this case we would need to compute 2 different sums/square sums/count for 'col1':
        #   - The first one excluding the values at the NaN possitions of 'col2' (1 + 3 + 4)
        #   - And the second one excluding the values at the NaN positions of 'col3' (1 + 2 + 4)
        # and then also do the same for the rest columns. At the end this should form a matrix
        # of pairwise sums/square sums/counts:
        #   sums[col1, col2] = sum(col1[i] for i in non_NA_indices_of_col2)
        #   sums[col2, col1] = sum(col2[i] for i in non_NA_indices_of_col1)
        #   ...
        # Note that sums[col1, col2] != sums[col2, col1]
        sums = {}
        sums_of_squares = {}
        count = {}

        # TODO: is it possible to get rid of this for-loop somehow?
        for i, col in enumerate(cols):
            # Here we're taking each column, resizing it to the original frame's shape to compute
            # aggregations for each other column and then excluding values at those positions where
            # other columns had NaN values by setting zeros using the validity mask:
            #  col1: 1, 2  , 3  , 4   df[i].resize()  col1: 1, 2, 3, 4  putmask()  col1: 1, 2, 3, 4
            #  col2: 2, NaN, 3  , 4   ------------->  col1: 1, 2, 3, 4  -------->  col1: 1, 0, 3, 4
            #  col3: 4, 5  , NaN, 7                   col1: 1, 2, 3, 4             col1: 1, 2, 0, 4
            # Note that 'NaN' values in this diagram are just for the sake of visibility, in reality
            # they were already replaced by zeroes at the beginning of the 'map' phase.
            col_vals = np.resize(raw_df[i], raw_df.shape)
            np.putmask(col_vals, nan_mask, values=0)

            sums[col] = pandas.Series(np.sum(col_vals, axis=1), index=cols, copy=False)
            sums_of_squares[col] = pandas.Series(
                np.sum(col_vals**2, axis=1), index=cols, copy=False
            )
            count[col] = pandas.Series(
                nan_mask.shape[1] - np.count_nonzero(nan_mask | nan_mask[i], axis=1),
                index=cols,
                copy=False,
            )

        sums = pandas.concat(sums, axis=1, copy=False)
        sums_of_squares = pandas.concat(sums_of_squares, axis=1, copy=False)
        count = pandas.concat(count, axis=1, copy=False)

        return sums, sums_of_squares, count

    @classmethod
    def reduce(
        cls, df: pandas.DataFrame, min_periods: int, method: CorrCovBuilder.Method
    ) -> pandas.DataFrame:
        """
        Perform the Reduce phase to compute the corr/cov matrix.

        Parameters
        ----------
        df : pandas.DataFrame
            A dataframe holding aggregations computed for each partition
            concatenated along the rows axis.
        min_periods : int
            Minimum number of observations required per pair of columns to have a valid result.
        method : CorrCovBuilder.Method
            Whether to build a correlation or a covariance matrix.

        Returns
        -------
        pandas.DataFrame
            Either correlation or covariance matrix.
        """
        if method == CorrCovBuilder.Method.COV:
            raise NotImplementedError("Computing covariance is not yet implemented.")
        # The `df` here accumulates the aggregation results retrieved from each row partition
        # and combined together along the rows axis, so the `df` looks something like this:
        #   mul  sums  pow2_sums
        # a .    .     .
        # b .    .     .            <--- part1 result
        # c .    .     .
        # ---------------------------
        # a .    .     .
        # b .    .     .            <--- part2 result
        # c .    .     .
        # ---------------------------
        # ...
        # So to get the total result we have to group on the index and sum the values
        total_agg = df.groupby(level=0).sum()
        total_agg = cls._maybe_combine_nan_and_non_nan_aggs(total_agg)

        sum_of_pairwise_mul = total_agg["mul"]
        sums = total_agg["sum"]
        sums_of_squares = total_agg["pow2_sum"]
        count = total_agg["count"]

        cols = sum_of_pairwise_mul.columns
        # If there are NaNs in the original dataframe, then we have computed a matrix
        # of sums/square sums/counts at the Map phase, meaning that we now have multiple
        # columns in `sums`.
        has_nans = len(sums.columns) > 1
        if not has_nans:
            # 'count' is the same for all columns in a non-NaN case, so converting
            # it to scalar for faster binary operations
            count = count.iloc[0, 0]
            if count < min_periods:
                # Fast-path for too small data
                return pandas.DataFrame(index=cols, columns=cols, dtype="float")

            # Converting frame to a Series for more convenient handling
            sums = sums.squeeze(axis=1)
            sums_of_squares = sums_of_squares.squeeze(axis=1)

        means = sums / count
        std = np.sqrt(sums_of_squares - 2 * means * sums + count * (means**2))

        # The 'is_nans' condition was moved out of the loop, so the loops themselves
        # work faster as not being slowed by extra conditions in them
        if has_nans:
            return cls._build_corr_table_nan(
                sum_of_pairwise_mul, means, sums, count, std, cols, min_periods
            )
        else:
            # We've already processed the 'min_periods' parameter for a non-na case above,
            # so don't need to pass it here
            return cls._build_corr_table_non_nan(
                sum_of_pairwise_mul, means, sums, count, std, cols
            )

    @staticmethod
    def _maybe_combine_nan_and_non_nan_aggs(
        total_agg: pandas.DataFrame,
    ) -> pandas.DataFrame:
        """
        Pair the aggregation results of partitions having and not having NaN values if needed.

        Parameters
        ----------
        total_agg : pandas.DataFrame
            A dataframe holding aggregations computed for each partition
            concatenated along the rows axis.

        Returns
        -------
        pandas.DataFrame
            DataFrame with aligned results.
        """
        # Here we try to align the results between partitions that had and didn't have NaNs.
        # At the result of the Map phase, partitions with and without NaNs would produce
        # different results:
        #   - Partitions with NaNs produce a matrix of pairwise sums/square sums/counts
        #   - And parts without NaNs produce regular one-column sums/square sums/counts
        #
        # As the result, `total_agg` will be something like this:
        #    mul  | sum   pow2_sum  count | sum          pow2_sum     count
        #    a  b | a  b  a  b      a  b  | __reduced__  __reduced__  __reduced__
        # a  .  . | .  .  .  .      .  .  | .            .            .
        # b  .  . | .  .  .  .      .  .  | .            .            .
        # --------|-----------------------|----------------------------------------
        #           ^-- these are results   ^-- and these are results for
        #           for partitions that     partitions that didn't have NaNs
        #           had NaNs
        # So, to get an actual total result of these aggregations, we have to additionally
        # sum the results from non-NaN and NaN partitions.
        #
        # Here we sample the 'sum' columns to check whether we had mixed NaNs and
        # non-NaNs partitions, if it's not the case we can skip the described step:
        nsums = total_agg.columns.get_locs(["sum"])
        if not (
            len(nsums) > 1 and ("sum", MODIN_UNNAMED_SERIES_LABEL) in total_agg.columns
        ):
            return total_agg

        cols = total_agg.columns

        # Finding column positions for aggregational columns
        all_agg_idxs = np.where(
            cols.get_loc("sum") | cols.get_loc("pow2_sum") | cols.get_loc("count")
        )[0]
        # Finding column positions for aggregational columns that store
        # results of non-NaN partitions
        non_na_agg_idxs = cols.get_indexer_for(
            pandas.Index(
                [
                    ("sum", MODIN_UNNAMED_SERIES_LABEL),
                    ("pow2_sum", MODIN_UNNAMED_SERIES_LABEL),
                    ("count", MODIN_UNNAMED_SERIES_LABEL),
                ]
            )
        )
        # Finding column positions for aggregational columns that store
        # results of NaN partitions by deducting non-NaN indices from all indices
        na_agg_idxs = np.setdiff1d(all_agg_idxs, non_na_agg_idxs, assume_unique=True)

        # Using `.values` here so we can ignore the indices (it's really hard
        # to arrange them for pandas to properly perform the summation)
        parts_with_nans = total_agg.values[:, na_agg_idxs]
        parts_without_nans = (
            total_agg.values[:, non_na_agg_idxs]
            # Before doing the summation we have to align the shapes
            # Imagine that we have 'parts_with_nans' like:
            #    sum   pow2_sum  count
            #    a  b  a  b      a  b
            # a  1  2  3  4      5  6
            # b  1  2  3  4      5  6
            #
            # And the 'parts_without_nans' like:
            #    sum  pow2_sum  count
            # a  1    3         5
            # b  2    4         6
            #
            # Here we want to sum them in an order so the digit matches (1 + 1), (2 + 2), ...
            # For that we first have to repeat the values in 'parts_without_nans':
            #  parts_without_nans.repeat(parts_with_nans.shape[0]):
            #    sum  pow2_sum  count
            # a  1    3         5
            # b  1    3         5
            # a  2    4         6
            # b  2    4         6
            #
            # And then reshape it using the "Fortran" order:
            #  parts_without_nans.reshape(parts_with_nans.shape, order="F"):
            #    sum   pow2_sum  count
            #    a  b  a  b      a  b
            # a  1  2  3  4      5  6
            # b  1  2  3  4      5  6
            # After that the shapes & orders are aligned and we can perform the summation
            .repeat(repeats=len(parts_with_nans), axis=0).reshape(
                parts_with_nans.shape, order="F"
            )
        )
        replace_values = parts_with_nans + parts_without_nans

        if not total_agg.values.flags.writeable:
            # making a copy if the buffer is read-only as
            # we will need to modify `total_agg` inplace
            total_agg = total_agg.copy()
        total_agg.values[:, na_agg_idxs] = replace_values

        return total_agg

    @staticmethod
    def _build_corr_table_nan(
        sum_of_pairwise_mul: pandas.DataFrame,
        means: pandas.DataFrame,
        sums: pandas.DataFrame,
        count: pandas.DataFrame,
        std: pandas.DataFrame,
        cols: pandas.Index,
        min_periods: int,
    ) -> pandas.DataFrame:
        """
        Build correlation matrix for a DataFrame that had NaN values in it.

        Parameters
        ----------
        sum_of_pairwise_mul : pandas.DataFrame
        means : pandas.DataFrame
        sums : pandas.DataFrame
        count : pandas.DataFrame
        std : pandas.DataFrame
        cols : pandas.Index
        min_periods : int

        Returns
        -------
        pandas.DataFrame
            Correlation matrix.
        """
        res = pandas.DataFrame(index=cols, columns=cols, dtype="float")
        nan_mask = count < min_periods

        for col in cols:
            top = (
                sum_of_pairwise_mul.loc[col]
                - sums.loc[col] * means[col]
                - means.loc[col] * sums[col]
                + count.loc[col] * means.loc[col] * means[col]
            )
            down = std.loc[col] * std[col]
            res.loc[col, :] = top / down

        res[nan_mask] = np.nan

        return res

    @staticmethod
    def _build_corr_table_non_nan(
        sum_of_pairwise_mul: pandas.DataFrame,
        means: pandas.Series,
        sums: pandas.Series,
        count: int,
        std: pandas.Series,
        cols: pandas.Index,
    ) -> pandas.DataFrame:
        """
        Build correlation matrix for a DataFrame that didn't have NaN values in it.

        Parameters
        ----------
        sum_of_pairwise_mul : pandas.DataFrame
        means : pandas.Series
        sums : pandas.Series
        count : int
        std : pandas.Series
        cols : pandas.Index

        Returns
        -------
        pandas.DataFrame
            Correlation matrix.
        """
        res = pandas.DataFrame(index=cols, columns=cols, dtype="float")

        for col in cols:
            top = (
                sum_of_pairwise_mul.loc[col]
                - sums.loc[col] * means
                - means.loc[col] * sums
                + count * means.loc[col] * means
            )
            down = std.loc[col] * std
            res.loc[col, :] = top / down

        return res


================================================
FILE: modin/core/storage_formats/pandas/groupby.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Contains implementations for GroupbyReduce functions."""

import numpy as np
import pandas
from pandas.core.dtypes.cast import find_common_type

from modin.config import RangePartitioning
from modin.core.dataframe.algebra import GroupByReduce
from modin.error_message import ErrorMessage
from modin.utils import hashable


class GroupbyReduceImpl:
    """Provide TreeReduce implementations for certain groupby aggregations."""

    @classmethod
    def get_impl(cls, agg_name):
        """
        Get TreeReduce implementations for the specified `agg_name`.

        Parameters
        ----------
        agg_name : hashable

        Returns
        -------
        (map_fn: Union[callable, str], reduce_fn: Union[callable, str], default2pandas_fn: callable)
        """
        try:
            return cls._groupby_reduce_impls[agg_name]
        except KeyError:
            raise KeyError(f"Have no implementation for {agg_name}.")

    @classmethod
    def has_impl_for(cls, agg_func):
        """
        Check whether the class has TreeReduce implementation for the specified `agg_func`.

        Parameters
        ----------
        agg_func : hashable or dict

        Returns
        -------
        bool
        """
        if hashable(agg_func):
            return agg_func in cls._groupby_reduce_impls
        if not isinstance(agg_func, dict):
            return False

        # We have to keep this import away from the module level to avoid circular import
        from modin.pandas.utils import walk_aggregation_dict

        for _, func, _, _ in walk_aggregation_dict(agg_func):
            if func not in cls._groupby_reduce_impls:
                return False

        return True

    @classmethod
    def build_qc_method(cls, agg_name, finalizer_fn=None):
        """
        Build a TreeReduce implemented query compiler method for the specified groupby aggregation.

        Parameters
        ----------
        agg_name : hashable
        finalizer_fn : callable(pandas.DataFrame) -> pandas.DataFrame, default: None
            A callable to execute at the end a groupby kernel against groupby result.

        Returns
        -------
        callable
            Function that takes query compiler and executes GroupBy aggregation
            with TreeReduce algorithm.
        """
        map_fn, reduce_fn, d2p_fn = cls.get_impl(agg_name)
        map_reduce_method = GroupByReduce.register(
            map_fn, reduce_fn, default_to_pandas_func=d2p_fn, finalizer_fn=finalizer_fn
        )

        def method(query_compiler, *args, **kwargs):
            if RangePartitioning.get():
                try:
                    if finalizer_fn is not None:
                        raise NotImplementedError(
                            "Range-partitioning groupby is not implemented yet when a finalizing function is specified."
                        )
                    return query_compiler._groupby_shuffle(
                        *args, agg_func=agg_name, **kwargs
                    )
                except NotImplementedError as e:
                    ErrorMessage.warn(
                        f"Can't use range-partitioning groupby implementation because of: {e}"
                        + "\nFalling back to a TreeReduce implementation."
                    )
            return map_reduce_method(query_compiler, *args, **kwargs)

        return method

    @staticmethod
    def _build_skew_impl():
        """
        Build TreeReduce implementation for 'skew' groupby aggregation.

        Returns
        -------
        (map_fn: callable, reduce_fn: callable, default2pandas_fn: callable)
        """

        def skew_map(dfgb, *args, **kwargs):
            if dfgb._selection is not None:
                data_to_agg = dfgb._selected_obj
            else:
                cols_to_agg = dfgb.obj.columns.difference(dfgb.exclusions)
                data_to_agg = dfgb.obj[cols_to_agg]

            df_pow2 = data_to_agg**2
            df_pow3 = data_to_agg**3

            return pandas.concat(
                [
                    dfgb.count(*args, **kwargs),
                    dfgb.sum(*args, **kwargs),
                    df_pow2.groupby(dfgb.grouper).sum(*args, **kwargs),
                    df_pow3.groupby(dfgb.grouper).sum(*args, **kwargs),
                ],
                copy=False,
                axis=1,
                keys=["count", "sum", "pow2_sum", "pow3_sum"],
                names=[GroupByReduce.ID_LEVEL_NAME],
            )

        def skew_reduce(dfgb, *args, **kwargs):
            df = dfgb.sum(*args, **kwargs)
            if df.empty:
                return df.droplevel(GroupByReduce.ID_LEVEL_NAME, axis=1)

            count = df["count"]
            s = df["sum"]
            s2 = df["pow2_sum"]
            s3 = df["pow3_sum"]

            # mean = sum(x) / count
            m = s / count

            # m2 = sum( (x - m)^ 2) = sum(x^2 - 2*x*m + m^2)
            m2 = s2 - 2 * m * s + count * (m**2)

            # m3 = sum( (x - m)^ 3) = sum(x^3 - 3*x^2*m + 3*x*m^2 - m^3)
            m3 = s3 - 3 * m * s2 + 3 * s * (m**2) - count * (m**3)

            # The equation for the 'skew' was taken directly from pandas:
            # https://github.com/pandas-dev/pandas/blob/8dab54d6573f7186ff0c3b6364d5e4dd635ff3e7/pandas/core/nanops.py#L1226
            with np.errstate(invalid="ignore", divide="ignore"):
                skew_res = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)

            # Setting dummy values for invalid results in accordance with pandas
            skew_res[m2 == 0] = 0
            skew_res[count < 3] = np.nan
            return skew_res

        GroupByReduce.register_implementation(skew_map, skew_reduce)
        return (
            skew_map,
            skew_reduce,
            lambda grp, *args, **kwargs: grp.skew(*args, **kwargs),
        )

    @staticmethod
    def _build_mean_impl():
        """
        Build TreeReduce implementation for 'mean' groupby aggregation.

        Returns
        -------
        (map_fn: callable, reduce_fn: callable, default2pandas_fn: callable)
        """

        def mean_map(dfgb, **kwargs):
            return pandas.concat(
                [dfgb.sum(**kwargs), dfgb.count()],
                axis=1,
                copy=False,
                keys=["sum", "count"],
                names=[GroupByReduce.ID_LEVEL_NAME],
            )

        def mean_reduce(dfgb, **kwargs):
            """
            Compute mean value in each group using sums/counts values within reduce phase.

            Parameters
            ----------
            dfgb : pandas.DataFrameGroupBy
                GroupBy object for column-partition.
            **kwargs : dict
                Additional keyword parameters to be passed in ``pandas.DataFrameGroupBy.sum``.

            Returns
            -------
            pandas.DataFrame
                A pandas Dataframe with mean values in each column of each group.
            """
            sums_counts_df = dfgb.sum(**kwargs)
            if sums_counts_df.empty:
                return sums_counts_df.droplevel(GroupByReduce.ID_LEVEL_NAME, axis=1)

            sum_df = sums_counts_df["sum"]
            count_df = sums_counts_df["count"]

            return sum_df / count_df

        GroupByReduce.register_implementation(mean_map, mean_reduce)

        return (
            mean_map,
            mean_reduce,
            lambda grp, *args, **kwargs: grp.mean(*args, **kwargs),
        )


GroupbyReduceImpl._groupby_reduce_impls = {
    "all": ("all", "all", lambda grp, *args, **kwargs: grp.all(*args, **kwargs)),
    "any": ("any", "any", lambda grp, *args, **kwargs: grp.any(*args, **kwargs)),
    "count": ("count", "sum", lambda grp, *args, **kwargs: grp.count(*args, **kwargs)),
    "max": ("max", "max", lambda grp, *args, **kwargs: grp.max(*args, **kwargs)),
    "mean": GroupbyReduceImpl._build_mean_impl(),
    "min": ("min", "min", lambda grp, *args, **kwargs: grp.min(*args, **kwargs)),
    "prod": ("prod", "prod", lambda grp, *args, **kwargs: grp.prod(*args, **kwargs)),
    "size": ("size", "sum", lambda grp, *args, **kwargs: grp.size(*args, **kwargs)),
    "skew": GroupbyReduceImpl._build_skew_impl(),
    "sum": ("sum", "sum", lambda grp, *args, **kwargs: grp.sum(*args, **kwargs)),
}


class PivotTableImpl:
    """Provide MapReduce, Range-Partitioning and Full-Column implementations for 'pivot_table()'."""

    @classmethod
    def map_reduce_impl(
        cls, qc, unique_keys, drop_column_level, pivot_kwargs
    ):  # noqa: PR01
        """Compute 'pivot_table()' using MapReduce implementation."""
        if pivot_kwargs["margins"]:
            raise NotImplementedError(
                "MapReduce 'pivot_table' implementation doesn't support 'margins=True' parameter"
            )

        index, columns, values = (
            pivot_kwargs["index"],
            pivot_kwargs["columns"],
            pivot_kwargs["values"],
        )
        aggfunc = pivot_kwargs["aggfunc"]

        if not GroupbyReduceImpl.has_impl_for(aggfunc):
            raise NotImplementedError(
                "MapReduce 'pivot_table' implementation only supports 'aggfuncs' that are implemented in 'GroupbyReduceImpl'"
            )

        if len(set(index).intersection(columns)) > 0:
            raise NotImplementedError(
                "MapReduce 'pivot_table' implementation doesn't support intersections of 'index' and 'columns'"
            )

        to_group, keys_columns = cls._separate_data_from_grouper(
            qc, values, unique_keys
        )
        to_unstack = columns if index else None

        result = GroupbyReduceImpl.build_qc_method(
            aggfunc,
            finalizer_fn=lambda df: cls._pivot_table_from_groupby(
                df,
                pivot_kwargs["dropna"],
                drop_column_level,
                to_unstack,
                pivot_kwargs["fill_value"],
            ),
        )(
            to_group,
            by=keys_columns,
            axis=0,
            groupby_kwargs={
                "observed": pivot_kwargs["observed"],
                "sort": pivot_kwargs["sort"],
            },
            agg_args=(),
            agg_kwargs={},
            drop=True,
        )

        if to_unstack is None:
            result = result.transpose()
        return result

    @classmethod
    def full_axis_impl(
        cls, qc, unique_keys, drop_column_level, pivot_kwargs
    ):  # noqa: PR01
        """Compute 'pivot_table()' using full-column-axis implementation."""
        index, columns, values = (
            pivot_kwargs["index"],
            pivot_kwargs["columns"],
            pivot_kwargs["values"],
        )

        to_group, keys_columns = cls._separate_data_from_grouper(
            qc, values, unique_keys
        )

        def applyier(df, other):  # pragma: no cover
            """
            Build pivot table for a single partition.

            Parameters
            ----------
            df : pandas.DataFrame
                Partition of the self frame.
            other : pandas.DataFrame
                Broadcasted partition that contains `value` columns
                of the self frame.

            Returns
            -------
            pandas.DataFrame
                Pivot table for this particular partition.
            """
            concated = pandas.concat([df, other], axis=1, copy=False)
            # to reduce peak memory consumption
            del df, other
            result = pandas.pivot_table(
                concated,
                **pivot_kwargs,
            )
            # to reduce peak memory consumption
            del concated
            # if only one value is specified, removing level that maps
            # columns from `values` to the actual values
            if drop_column_level is not None:
                result = result.droplevel(drop_column_level, axis=1)

            # in that case Pandas transposes the result of `pivot_table`,
            # transposing it back to be consistent with column axis values along
            # different partitions
            if len(index) == 0 and len(columns) > 0:
                common_type = find_common_type(result.dtypes.tolist())
                # TODO: remove find_common_type+astype after pandas fix the following issue
                # transpose loses dtypes: https://github.com/pandas-dev/pandas/issues/43337
                result = result.transpose().astype(common_type, copy=False)

            return result

        result = qc.__constructor__(
            to_group._modin_frame.broadcast_apply_full_axis(
                axis=0, func=applyier, other=keys_columns._modin_frame
            )
        )

        # transposing the result again, to be consistent with Pandas result
        if len(index) == 0 and len(columns) > 0:
            result = result.transpose()

        return result

    @classmethod
    def range_partition_impl(
        cls, qc, unique_keys, drop_column_level, pivot_kwargs
    ):  # noqa: PR01
        """Compute 'pivot_table()' using Range-Partitioning implementation."""
        if pivot_kwargs["margins"]:
            raise NotImplementedError(
                "Range-partitioning 'pivot_table' implementation doesn't support 'margins=True' parameter"
            )

        index, columns, values = (
            pivot_kwargs["index"],
            pivot_kwargs["columns"],
            pivot_kwargs["values"],
        )

        if len(set(index).intersection(columns)) > 0:
            raise NotImplementedError(
                "Range-partitioning 'pivot_table' implementation doesn't support intersections of 'index' and 'columns'"
            )

        if values is not None:
            to_take = list(np.unique(list(index) + list(columns) + list(values)))
            qc = qc.getitem_column_array(to_take, ignore_order=True)

        to_unstack = columns if index else None

        groupby_result = qc._groupby_shuffle(
            by=list(unique_keys),
            agg_func=pivot_kwargs["aggfunc"],
            axis=0,
            groupby_kwargs={
                "observed": pivot_kwargs["observed"],
                "sort": pivot_kwargs["sort"],
            },
            agg_args=(),
            agg_kwargs={},
            drop=True,
        )

        # the length of 'groupby_result' is typically really small here,
        # so it's okay to call full-column function
        result = groupby_result._modin_frame.apply_full_axis(
            axis=0,
            func=lambda df: cls._pivot_table_from_groupby(
                df,
                pivot_kwargs["dropna"],
                drop_column_level,
                to_unstack,
                pivot_kwargs["fill_value"],
                # FIXME: Range-partitioning impl has a problem with the resulting order in case of multiple grouping keys,
                # so passing 'sort=True' explicitly in this case
                # https://github.com/modin-project/modin/issues/6875
                sort=pivot_kwargs["sort"] if len(unique_keys) > 1 else False,
            ),
        )

        if to_unstack is None:
            result = result.transpose()

        return qc.__constructor__(result)

    @staticmethod
    def _pivot_table_from_groupby(
        df, dropna, drop_column_level, to_unstack, fill_value, sort=False
    ):
        """
        Convert group by aggregation result to a pivot table.

        Parameters
        ----------
        df : pandas.DataFrame
            Group by aggregation result.
        dropna : bool
            Whether to drop NaN columns.
        drop_column_level : int or None
            An extra columns level to drop.
        to_unstack : list of labels or None
            Group by keys to pass to ``.unstack()``. Reperent `columns` parameter
            for ``.pivot_table()``.
        fill_value : bool
            Fill value for NaN values.
        sort : bool, default: False
            Whether to sort the result along index.

        Returns
        -------
        pandas.DataFrame
        """
        if df.index.nlevels > 1 and to_unstack is not None:
            df = df.unstack(level=to_unstack)
        if drop_column_level is not None:
            df = df.droplevel(drop_column_level, axis=1)
        if dropna:
            df = df.dropna(axis=1, how="all")
        if fill_value is not None:
            df = df.fillna(fill_value, downcast="infer")
        if sort:
            df = df.sort_index(axis=0)
        return df

    @staticmethod
    def _separate_data_from_grouper(qc, values, unique_keys):
        """
        Split `qc` for key columns to group by and values to aggregate.

        Parameters
        ----------
        qc : PandasQueryCompiler
        values : list of labels or None
            List of columns to aggregate. ``None`` means all columns except 'unique_keys'.
        unique_keys : list of labels
            List of key columns to group by.

        Returns
        -------
        to_aggregate : PandasQueryCompiler
        keys_to_group : PandasQueryCompiler
        """
        if values is None:
            to_aggregate = qc.drop(columns=unique_keys)
        else:
            to_aggregate = qc.getitem_column_array(np.unique(values), ignore_order=True)

        keys_to_group = qc.getitem_column_array(unique_keys, ignore_order=True)

        return to_aggregate, keys_to_group


================================================
FILE: modin/core/storage_formats/pandas/merge.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Contains implementations for Merge/Join."""

from __future__ import annotations

from typing import TYPE_CHECKING, Optional

import pandas
from pandas.core.dtypes.common import is_list_like
from pandas.errors import MergeError

from modin.config import MinRowPartitionSize, NPartitions
from modin.core.dataframe.base.dataframe.utils import join_columns
from modin.core.dataframe.pandas.metadata import ModinDtypes

from .utils import merge_partitioning

if TYPE_CHECKING:
    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


# TODO: add methods for 'join' here
class MergeImpl:
    """Provide implementations for merge/join."""

    @classmethod
    def range_partitioning_merge(cls, left, right, kwargs):
        """
        Execute merge using range-partitioning implementation.

        Parameters
        ----------
        left : PandasQueryCompiler
        right : PandasQueryCompiler
        kwargs : dict
            Keyword arguments for ``pandas.merge()`` function.

        Returns
        -------
        PandasQueryCompiler
        """
        if (
            kwargs.get("left_index", False)
            or kwargs.get("right_index", False)
            or kwargs.get("left_on", None) is not None
            or kwargs.get("left_on", None) is not None
            or kwargs.get("how", "left") not in ("left", "inner")
        ):
            raise NotImplementedError(
                f"The passed parameters are not yet supported by range-partitioning merge: {kwargs=}"
            )

        on = kwargs.get("on", None)
        if on is not None and not isinstance(on, list):
            on = [on]
        if on is None or len(on) > 1:
            raise NotImplementedError(
                f"Merging on multiple columns is not yet supported by range-partitioning merge: {on=}"
            )

        if any(col not in left.columns or col not in right.columns for col in on):
            raise NotImplementedError(
                "Merging on an index level is not yet supported by range-partitioning merge."
            )

        def func(left, right):
            return left.merge(right, **kwargs)

        new_columns, new_dtypes = cls._compute_result_metadata(
            left,
            right,
            on,
            left_on=None,
            right_on=None,
            suffixes=kwargs.get("suffixes", ("_x", "_y")),
        )

        return left.__constructor__(
            left._modin_frame._apply_func_to_range_partitioning_broadcast(
                right._modin_frame,
                func=func,
                key=on,
                new_columns=new_columns,
                new_dtypes=new_dtypes,
            )
            # pandas resets the index of the result unless we were merging on an index level,
            # the current implementation only supports merging on column names, so dropping
            # the index unconditionally
        ).reset_index(drop=True)

    @classmethod
    def row_axis_merge(
        cls, left: PandasQueryCompiler, right: PandasQueryCompiler, kwargs: dict
    ) -> PandasQueryCompiler:
        """
        Execute merge using row-axis implementation.

        Parameters
        ----------
        left : PandasQueryCompiler
        right : PandasQueryCompiler
        kwargs : dict
            Keyword arguments for ``pandas.merge()`` function.

        Returns
        -------
        PandasQueryCompiler
        """
        how = kwargs.get("how", "inner")
        on = kwargs.get("on", None)
        left_on = kwargs.get("left_on", None)
        right_on = kwargs.get("right_on", None)
        left_index = kwargs.get("left_index", False)
        right_index = kwargs.get("right_index", False)
        sort = kwargs.get("sort", False)

        if (
            (
                how in ["left", "inner"]
                or (how == "right" and right._modin_frame._partitions.size != 0)
            )
            and left_index is False
            and right_index is False
        ):
            kwargs["sort"] = False

            reverted = False
            if how == "right":
                left, right = right, left
                reverted = True

            def should_keep_index(
                left: PandasQueryCompiler,
                right: PandasQueryCompiler,
            ) -> bool:
                keep_index = False
                if left_on is not None and right_on is not None:
                    keep_index = any(
                        o in left.index.names
                        and o in right_on
                        and o in right.index.names
                        for o in left_on
                    )
                elif on is not None:
                    keep_index = any(
                        o in left.index.names and o in right.index.names for o in on
                    )
                return keep_index

            def map_func(
                left, right, kwargs=kwargs
            ) -> pandas.DataFrame:  # pragma: no cover
                if reverted:
                    df = pandas.merge(right, left, **kwargs)
                else:
                    df = pandas.merge(left, right, **kwargs)
                return df

            # Want to ensure that these are python lists
            if left_on is not None and right_on is not None:
                left_on = list(left_on) if is_list_like(left_on) else [left_on]
                right_on = list(right_on) if is_list_like(right_on) else [right_on]
            elif on is not None:
                on = list(on) if is_list_like(on) else [on]

            right_to_broadcast = right._modin_frame.combine()
            new_columns, new_dtypes = cls._compute_result_metadata(
                *((left, right) if not reverted else (right, left)),
                on,
                left_on,
                right_on,
                kwargs.get("suffixes", ("_x", "_y")),
            )

            # We rebalance when the ratio of the number of existing partitions to
            # the ideal number of partitions is smaller than this threshold. The
            # threshold is a heuristic that may need to be tuned for performance.
            if (
                left._modin_frame._partitions.shape[0] < 0.3 * NPartitions.get()
                # to avoid empty partitions after repartition; can materialize index
                and len(left._modin_frame)
                > NPartitions.get() * MinRowPartitionSize.get()
            ):
                left = left.repartition(axis=0)

            new_left = left.__constructor__(
                left._modin_frame.broadcast_apply_full_axis(
                    axis=1,
                    func=map_func,
                    other=right_to_broadcast,
                    # We're going to explicitly change the shape across the 1-axis,
                    # so we want for partitioning to adapt as well
                    keep_partitioning=False,
                    num_splits=merge_partitioning(
                        left._modin_frame, right._modin_frame, axis=1
                    ),
                    new_columns=new_columns,
                    sync_labels=False,
                    dtypes=new_dtypes,
                )
            )

            # Here we want to understand whether we're joining on a column or on an index level.
            # It's cool if indexes are already materialized so we can easily check that, if not
            # it's fine too, we can also decide that by columns, which tend to be already
            # materialized quite often compared to the indexes.
            keep_index = False
            if left.frame_has_materialized_index:
                keep_index = should_keep_index(left, right)
            else:
                # Have to trigger columns materialization. Hope they're already available at this point.
                if left_on is not None and right_on is not None:
                    keep_index = any(
                        o not in right.columns
                        and o in left_on
                        and o not in left.columns
                        for o in right_on
                    )
                elif on is not None:
                    keep_index = any(
                        o not in right.columns and o not in left.columns for o in on
                    )

            if sort:
                if left_on is not None and right_on is not None:
                    new_left = (
                        new_left.sort_index(axis=0, level=left_on + right_on)
                        if keep_index
                        else new_left.sort_rows_by_column_values(left_on + right_on)
                    )
                elif on is not None:
                    new_left = (
                        new_left.sort_index(axis=0, level=on)
                        if keep_index
                        else new_left.sort_rows_by_column_values(on)
                    )

            return new_left if keep_index else new_left.reset_index(drop=True)
        else:
            return left.default_to_pandas(pandas.DataFrame.merge, right, **kwargs)

    @classmethod
    def _compute_result_metadata(
        cls,
        left: PandasQueryCompiler,
        right: PandasQueryCompiler,
        on,
        left_on,
        right_on,
        suffixes,
    ) -> tuple[Optional[pandas.Index], Optional[ModinDtypes]]:
        """
        Compute columns and dtypes metadata for the result of merge if possible.

        Parameters
        ----------
        left : PandasQueryCompiler
        right : PandasQueryCompiler
        on : label, list of labels or None
            `on` argument that was passed to ``pandas.merge()``.
        left_on : label, list of labels or None
            `left_on` argument that was passed to ``pandas.merge()``.
        right_on : label, list of labels or None
            `right_on` argument that was passed to ``pandas.merge()``.
        suffixes : list of strings
            `suffixes` argument that was passed to ``pandas.merge()``.

        Returns
        -------
        new_columns : pandas.Index or None
            Columns for the result of merge. ``None`` if not enought metadata to compute.
        new_dtypes : ModinDtypes or None
            Dtypes for the result of merge. ``None`` if not enought metadata to compute.
        """
        new_columns = None
        new_dtypes = None

        if not left.frame_has_materialized_columns:
            return new_columns, new_dtypes

        if left_on is None and right_on is None:
            if on is None:
                on = [c for c in left.columns if c in right.columns]
            _left_on, _right_on = on, on
        else:
            if left_on is None or right_on is None:
                raise MergeError(
                    "Must either pass only 'on' or 'left_on' and 'right_on', not combination of them."
                )
            _left_on, _right_on = left_on, right_on

        try:
            new_columns, left_renamer, right_renamer = join_columns(
                left.columns,
                right.columns,
                _left_on,
                _right_on,
                suffixes,
            )
        except NotImplementedError:
            # This happens when one of the keys to join is an index level. Pandas behaviour
            # is really complicated in this case, so we're not computing resulted columns for now.
            pass
        else:
            # renamers may contain columns from 'index', so trying to merge index and column dtypes here
            right_index_dtypes = (
                right.index.dtypes
                if isinstance(right.index, pandas.MultiIndex)
                else pandas.Series([right.index.dtype], index=[right.index.name])
            )
            right_dtypes = pandas.concat([right.dtypes, right_index_dtypes])[
                right_renamer.keys()
            ].rename(right_renamer)

            left_index_dtypes = left._modin_frame._index_cache.maybe_get_dtypes()
            left_dtypes = (
                ModinDtypes.concat([left._modin_frame._dtypes, left_index_dtypes])
                .lazy_get(left_renamer.keys())
                .set_index(list(left_renamer.values()))
            )
            new_dtypes = ModinDtypes.concat([left_dtypes, right_dtypes])

        return new_columns, new_dtypes


================================================
FILE: modin/core/storage_formats/pandas/native_query_compiler.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains ``NativeQueryCompiler`` class.

``NativeQueryCompiler`` is responsible for compiling efficient DataFrame algebra
queries for small data and empty ``PandasDataFrame``.
"""

from typing import TYPE_CHECKING, Any, Optional, Union

import numpy as np
import pandas
from pandas.core.dtypes.common import is_scalar

from modin.config.envvars import (
    NativePandasDeepCopy,
    NativePandasMaxRows,
    NativePandasTransferThreshold,
)
from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
    ProtocolDataframe,
)
from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
from modin.utils import _inherit_docstrings, try_cast_to_pandas

if TYPE_CHECKING:
    from modin.pandas import DataFrame, Series
    from modin.pandas.base import BasePandasDataset

_NO_REPARTITION_ON_NATIVE_EXECUTION_EXCEPTION_MESSAGE = (
    "Modin dataframes and series using native execution do not have partitions."
)


def _get_axis(axis):
    """
    Build index labels getter of the specified axis.

    Parameters
    ----------
    axis : {0, 1}
        Axis to get labels from. 0 is for index and 1 is for column.

    Returns
    -------
    callable(NativeQueryCompiler) -> pandas.Index
    """
    if axis == 0:
        return lambda self: self._modin_frame.index
    else:
        return lambda self: self._modin_frame.columns


def _set_axis(axis):
    """
    Build index labels setter of the specified axis.

    Parameters
    ----------
    axis : {0, 1}
        Axis to set labels on. 0 is for index and 1 is for column.

    Returns
    -------
    callable(NativeQueryCompiler)
    """
    if axis == 0:

        def set_axis(self, idx):
            self._modin_frame.index = idx

    else:

        def set_axis(self, cols):
            self._modin_frame.columns = cols

    return set_axis


@_inherit_docstrings(BaseQueryCompiler)
class NativeQueryCompiler(BaseQueryCompiler):
    """
    Query compiler for executing operations with native pandas.

    Parameters
    ----------
    pandas_frame : pandas.DataFrame
        The pandas frame to query with the compiled queries.
    """

    _OPERATION_INITIALIZATION_OVERHEAD = 0
    _OPERATION_PER_ROW_OVERHEAD = 0

    _modin_frame: pandas.DataFrame
    _should_warn_on_default_to_pandas: bool = False

    def __init__(self, pandas_frame):
        if hasattr(pandas_frame, "_to_pandas"):
            pandas_frame = pandas_frame._to_pandas()
        if is_scalar(pandas_frame):
            pandas_frame = pandas.DataFrame([pandas_frame])
        elif isinstance(pandas_frame, pandas.DataFrame):
            # For performance purposes, we create "shallow" copies when NativePandasDeepCopy
            # is disabled (the default value). This may cause unexpected behavior if the
            # parent native frame is mutated, but creates a very significant performance
            # improvement on large data.
            pandas_frame = pandas_frame.copy(deep=NativePandasDeepCopy.get())
        else:
            pandas_frame = pandas.DataFrame(pandas_frame)

        self._modin_frame = pandas_frame

    storage_format = property(
        lambda self: "Native", doc=BaseQueryCompiler.storage_format.__doc__
    )
    engine = property(lambda self: "Native", doc=BaseQueryCompiler.engine.__doc__)

    def execute(self):
        pass

    @property
    def frame_has_materialized_dtypes(self) -> bool:
        """
        Check if the underlying dataframe has materialized dtypes.

        Returns
        -------
        bool
        """
        return True

    def set_frame_dtypes_cache(self, dtypes):
        """
        Set dtypes cache for the underlying dataframe frame.

        Parameters
        ----------
        dtypes : pandas.Series, ModinDtypes, callable or None

        Notes
        -----
        This function is for consistency with other QCs,
        dtypes should be assigned directly on the frame.
        """
        pass

    def set_frame_index_cache(self, index):
        """
        Set index cache for underlying dataframe.

        Parameters
        ----------
        index : sequence, callable or None

        Notes
        -----
        This function is for consistency with other QCs,
        index should be assigned directly on the frame.
        """
        pass

    @property
    def frame_has_index_cache(self):
        """
        Check if the index cache exists for underlying dataframe.

        Returns
        -------
        bool
        """
        return True

    @property
    def frame_has_dtypes_cache(self) -> bool:
        """
        Check if the dtypes cache exists for the underlying dataframe.

        Returns
        -------
        bool
        """
        return True

    def copy(self):
        # If NativePandasDeepCopy is enabled, no need to perform an explicit copy here since the
        # constructor will perform one anyway.
        # If it is disabled, then we need to perform a deep copy.
        if NativePandasDeepCopy.get():
            return self.__constructor__(self._modin_frame)
        else:
            return self.__constructor__(self._modin_frame.copy(deep=True))

    def to_pandas(self):
        # For performance purposes, we create "shallow" copies when NativePandasDeepCopy
        # is disabled (the default value). This may cause unexpected behavior if the
        # parent native frame is mutated, but creates a very significant performance
        # improvement on large data.
        return self._modin_frame.copy(deep=NativePandasDeepCopy.get())

    @classmethod
    def from_pandas(cls, df, data_cls):
        return cls(df)

    @classmethod
    def from_arrow(cls, at, data_cls):
        return cls(at.to_pandas())

    def free(self):
        return

    def finalize(self):
        return

    def move_to(self, target_backend: str) -> Union[BaseQueryCompiler, Any]:
        return NotImplemented

    @classmethod
    def move_from(cls, source_qc: BaseQueryCompiler) -> Union[BaseQueryCompiler, Any]:
        return NotImplemented

    @classmethod
    def _engine_max_size(cls):
        # do not return the custom configuration for sub-classes
        if cls == NativeQueryCompiler:
            return NativePandasMaxRows.get()
        return cls._MAX_SIZE_THIS_ENGINE_CAN_HANDLE

    @classmethod
    def _transfer_threshold(cls):
        # do not return the custom configuration for sub-classes
        if cls == NativeQueryCompiler:
            return NativePandasTransferThreshold.get()
        return cls._TRANSFER_THRESHOLD

    def do_array_ufunc_implementation(
        self,
        frame: "BasePandasDataset",
        ufunc: np.ufunc,
        method: str,
        *inputs: Any,
        **kwargs: Any
    ) -> Union["DataFrame", "Series", Any]:
        assert (
            self is frame._query_compiler
        ), "array ufunc called with mismatched query compiler and input frame"
        pandas_frame = self._modin_frame
        if not frame._is_dataframe:
            pandas_frame = pandas_frame.iloc[:, 0]
        pandas_result = pandas_frame.__array_ufunc__(
            ufunc,
            method,
            *(
                pandas_frame if each_input is frame else try_cast_to_pandas(each_input)
                for each_input in inputs
            ),
            **try_cast_to_pandas(kwargs),
        )
        if isinstance(pandas_result, pandas.DataFrame):
            from modin.pandas import DataFrame

            return DataFrame(pandas_result)
        elif isinstance(pandas_result, pandas.Series):
            from modin.pandas import Series

            return Series(pandas_result)
        # ufuncs are required to be one-to-one mappings, so this branch should never be hit
        return pandas_result  # pragma: no cover

    # Dataframe interchange protocol
    def to_interchange_dataframe(
        self, nan_as_null: bool = False, allow_copy: bool = True
    ):
        return self._modin_frame.__dataframe__(
            nan_as_null=nan_as_null, allow_copy=allow_copy
        )

    @classmethod
    def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls):
        return cls(pandas.api.interchange.from_dataframe(df))

    # END Dataframe interchange protocol

    def support_materialization_in_worker_process(self) -> bool:
        """
        Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object.

        Returns
        -------
        bool
        """
        return False

    def get_pandas_backend(self) -> Optional[str]:
        """
        Get backend stored in `_modin_frame`.

        Returns
        -------
        str | None
            Backend name.
        """
        return None

    # NOTE that because this query compiler provides the index of its underlying
    # pandas dataframe, updating the index affects this frame, and vice versa.
    # Consequently, native execution does not suffer from the issue
    # https://github.com/modin-project/modin/issues/1618
    index: pandas.Index = property(_get_axis(0), _set_axis(0))
    columns = property(_get_axis(1), _set_axis(1))

    @_inherit_docstrings(BaseQueryCompiler.repartition)
    def repartition(self, axis=None):
        raise Exception(_NO_REPARTITION_ON_NATIVE_EXECUTION_EXCEPTION_MESSAGE)


================================================
FILE: modin/core/storage_formats/pandas/parsers.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


"""
Module houses Modin parser classes, that are used for data parsing on the workers.

Notes
-----
Data parsing mechanism differs depending on the data format type:

* text format type (CSV, EXCEL, FWF, JSON):
  File parsing begins from retrieving `start` and `end` parameters from `parse`
  kwargs - these parameters define start and end bytes of data file, that should
  be read in the concrete partition. Using this data and file handle got from
  `fname`, binary data is read by python `read` function. Then resulting data is passed
  into `pandas.read_*` function as `io.BytesIO` object to get corresponding
  `pandas.DataFrame` (we need to do this because Modin partitions internally stores data
  as `pandas.DataFrame`).

* columnar store type (FEATHER, HDF, PARQUET):
  In this case data chunk to be read is defined by columns names passed as `columns`
  parameter as part of `parse` kwargs, so no additional action is needed and `fname`
  and `kwargs` are just passed into `pandas.read_*` function (in some corner cases
  `pyarrow.read_*` function can be used).

* SQL type:
  Chunking is incorporated in the `sql` parameter as part of query, so `parse`
  parameters are passed into `pandas.read_sql` function without modification.
"""

import contextlib
import json
import os
import warnings
from io import BytesIO, IOBase, TextIOWrapper
from typing import Any, NamedTuple

import fsspec
import numpy as np
import pandas
from pandas.core.dtypes.cast import find_common_type
from pandas.core.dtypes.concat import union_categoricals
from pandas.io.common import infer_compression
from pandas.util._decorators import doc

from modin.config import MinColumnPartitionSize, MinRowPartitionSize
from modin.core.io.file_dispatcher import OpenFile
from modin.core.storage_formats.pandas.utils import split_result_of_axis_func_pandas
from modin.db_conn import ModinDatabaseConnection
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger
from modin.logging.config import LogLevel
from modin.utils import ModinAssumptionError

_doc_pandas_parser_class = """
Class for handling {data_type} on the workers using pandas storage format.

Inherits common functions from `PandasParser` class.
"""

_doc_parse_func = """
Parse data on the workers.

Parameters
----------
{parameters}
**kwargs : dict
    Keywords arguments to be used by `parse` function or
    passed into `read_*` function.

Returns
-------
list
    List with split parse results and it's metadata
    (index, dtypes, etc.).
"""

_doc_parse_parameters_common = """fname : str or path object
    Name of the file or path to read."""

_doc_common_read_kwargs = """common_read_kwargs : dict
    Common keyword parameters for read functions.
"""
_doc_parse_parameters_common2 = "\n".join(
    (_doc_parse_parameters_common, _doc_common_read_kwargs)
)


def _split_result_for_readers(axis, num_splits, df):  # pragma: no cover
    """
    Split the read DataFrame into smaller DataFrames and handle all edge cases.

    Parameters
    ----------
    axis : int
        The axis to split across (0 - index, 1 - columns).
    num_splits : int
        The number of splits to create.
    df : pandas.DataFrame
        `pandas.DataFrame` to split.

    Returns
    -------
    list
        A list of pandas DataFrames.
    """
    splits = split_result_of_axis_func_pandas(
        axis,
        num_splits,
        df,
        min_block_size=(
            MinRowPartitionSize.get() if axis == 0 else MinColumnPartitionSize.get()
        ),
    )
    if not isinstance(splits, list):
        splits = [splits]
    return splits


def find_common_type_cat(types):
    """
    Find a common data type among the given dtypes.

    Parameters
    ----------
    types : array-like
        Array of dtypes.

    Returns
    -------
    pandas.core.dtypes.dtypes.ExtensionDtype or
    np.dtype or
    None
        `dtype` that is common for all passed `types`.
    """
    if all(isinstance(t, pandas.CategoricalDtype) for t in types):
        if all(t.ordered for t in types):
            categories = np.sort(np.unique([c for t in types for c in t.categories]))
            return pandas.CategoricalDtype(
                categories,
                ordered=True,
            )
        return union_categoricals(
            [pandas.Categorical([], dtype=t) for t in types],
            sort_categories=all(t.ordered for t in types),
        ).dtype
    else:
        return find_common_type(list(types))


class PandasParser(ClassLogger, modin_layer="PARSER", log_level=LogLevel.DEBUG):
    """Base class for parser classes with pandas storage format."""

    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)
    def generic_parse(fname, **kwargs):
        warnings.filterwarnings("ignore")
        num_splits = kwargs.pop("num_splits", None)
        start = kwargs.pop("start", None)
        end = kwargs.pop("end", None)
        header_size = kwargs.pop("header_size", 0)
        common_dtypes = kwargs.pop("common_dtypes", None)
        encoding = kwargs.get("encoding", None)
        callback = kwargs.pop("callback")
        if start is None or end is None:
            # This only happens when we are reading with only one worker (Default)
            return callback(fname, **kwargs)

        # pop "compression" from kwargs because bio is uncompressed
        with OpenFile(
            fname,
            "rb",
            kwargs.pop("compression", "infer"),
            **(kwargs.pop("storage_options", None) or {}),
        ) as bio:
            header = b""
            # In this case we beware that first line can contain BOM, so
            # adding this line to the `header` for reading and then skip it
            if encoding and (
                "utf" in encoding
                and "8" not in encoding
                or encoding == "unicode_escape"
                or encoding.replace("-", "_") == "utf_8_sig"
            ):
                # do not 'close' the wrapper - underlying buffer is managed by `bio` handle
                fio = TextIOWrapper(bio, encoding=encoding, newline="")
                if header_size == 0:
                    header = fio.readline().encode(encoding)
                    kwargs["skiprows"] = 1
                for _ in range(header_size):
                    header += fio.readline().encode(encoding)
            elif encoding is not None:
                if header_size == 0:
                    header = bio.readline()
                    # `skiprows` can be only None here, so don't check it's type
                    # and just set to 1
                    kwargs["skiprows"] = 1
                for _ in range(header_size):
                    header += bio.readline()
            else:
                for _ in range(header_size):
                    header += bio.readline()

            bio.seek(start)
            to_read = header + bio.read(end - start)
        if "memory_map" in kwargs:
            kwargs = kwargs.copy()
            del kwargs["memory_map"]
        if common_dtypes is not None:
            kwargs["dtype"] = common_dtypes
        pandas_df = callback(BytesIO(to_read), **kwargs)
        index = (
            pandas_df.index
            if not isinstance(pandas_df.index, pandas.RangeIndex)
            else len(pandas_df)
        )
        return _split_result_for_readers(1, num_splits, pandas_df) + [
            index,
            pandas_df.dtypes,
        ]

    @classmethod
    def get_dtypes(cls, dtypes_ids, columns):
        """
        Get common for all partitions dtype for each of the columns.

        Parameters
        ----------
        dtypes_ids : list
            Array with references to the partitions dtypes objects.
        columns : array-like or Index (1d)
            The names of the columns in this variable will be used
            for dtypes creation.

        Returns
        -------
        frame_dtypes : pandas.Series, dtype or None
            Resulting dtype or pandas.Series where column names are used as
            index and types of columns are used as values for full resulting
            frame.
        """
        if len(dtypes_ids) == 0:
            return None
        # each element in `partitions_dtypes` is a Series, where column names are
        # used as index and types of columns for different partitions are used as values
        partitions_dtypes = cls.materialize(dtypes_ids)
        if all([len(dtype) == 0 for dtype in partitions_dtypes]):
            return None

        combined_part_dtypes = pandas.concat(partitions_dtypes, axis=1)
        frame_dtypes = combined_part_dtypes.iloc[:, 0]
        frame_dtypes.name = None

        if not combined_part_dtypes.eq(frame_dtypes, axis=0).all(axis=None):
            ErrorMessage.mismatch_with_pandas(
                operation="read_*",
                message="Data types of partitions are different! "
                + "Please refer to the troubleshooting section of the Modin documentation "
                + "to fix this issue",
            )

            # concat all elements of `partitions_dtypes` and find common dtype
            # for each of the column among all partitions
            frame_dtypes = combined_part_dtypes.apply(
                lambda row: find_common_type_cat(row.values),
                axis=1,
            ).squeeze(axis=0)

        # Set the index for the dtypes to the column names
        if isinstance(frame_dtypes, pandas.Series):
            frame_dtypes.index = columns
        else:
            frame_dtypes = pandas.Series(frame_dtypes, index=columns)

        return frame_dtypes

    @classmethod
    def single_worker_read(cls, fname, *args, reason: str, **kwargs):
        """
        Perform reading by single worker (default-to-pandas implementation).

        Parameters
        ----------
        fname : str, path object or file-like object
            Name of the file or file-like object to read.
        *args : tuple
            Positional arguments to be passed into `read_*` function.
        reason : str
            Message describing the reason for falling back to pandas.
        **kwargs : dict
            Keywords arguments to be passed into `read_*` function.

        Returns
        -------
        BaseQueryCompiler or
        dict or
        pandas.io.parsers.TextFileReader
            Object with imported data (or with reference to data) for further
            processing, object type depends on the child class `parse` function
            result type.
        """
        ErrorMessage.default_to_pandas(reason=reason)
        # Use default args for everything
        pandas_frame = cls.parse(fname, *args, **kwargs)
        if isinstance(pandas_frame, pandas.io.parsers.TextFileReader):
            pd_read = pandas_frame.read
            pandas_frame.read = (
                lambda *args, **kwargs: cls.query_compiler_cls.from_pandas(
                    pd_read(*args, **kwargs), cls.frame_cls
                )
            )
            return pandas_frame
        elif isinstance(pandas_frame, dict):
            return {
                i: cls.query_compiler_cls.from_pandas(frame, cls.frame_cls)
                for i, frame in pandas_frame.items()
            }
        return cls.query_compiler_cls.from_pandas(pandas_frame, cls.frame_cls)

    @staticmethod
    def get_types_mapper(dtype_backend):
        """
        Get types mapper that would be used in read_parquet/read_feather.

        Parameters
        ----------
        dtype_backend : {"numpy_nullable", "pyarrow", lib.no_default}

        Returns
        -------
        dict
        """
        to_pandas_kwargs = {}
        if dtype_backend == "numpy_nullable":
            from pandas.io._util import _arrow_dtype_mapping

            mapping = _arrow_dtype_mapping()
            to_pandas_kwargs["types_mapper"] = mapping.get
        elif dtype_backend == "pyarrow":
            to_pandas_kwargs["types_mapper"] = pandas.ArrowDtype
        return to_pandas_kwargs

    infer_compression = infer_compression


@doc(_doc_pandas_parser_class, data_type="CSV files")
class PandasCSVParser(PandasParser):
    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common2)
    def parse(fname, common_read_kwargs, **kwargs):
        return PandasParser.generic_parse(
            fname,
            callback=PandasCSVParser.read_callback,
            **common_read_kwargs,
            **kwargs,
        )

    @staticmethod
    def read_callback(*args, **kwargs):
        """
        Parse data on each partition.

        Parameters
        ----------
        *args : list
            Positional arguments to be passed to the callback function.
        **kwargs : dict
            Keyword arguments to be passed to the callback function.

        Returns
        -------
        pandas.DataFrame or pandas.io.parsers.TextParser
            Function call result.
        """
        return pandas.read_csv(*args, **kwargs)


@doc(_doc_pandas_parser_class, data_type="tables with fixed-width formatted lines")
class PandasFWFParser(PandasParser):
    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common2)
    def parse(fname, common_read_kwargs, **kwargs):
        return PandasParser.generic_parse(
            fname,
            callback=PandasFWFParser.read_callback,
            **common_read_kwargs,
            **kwargs,
        )

    @staticmethod
    def read_callback(*args, **kwargs):
        """
        Parse data on each partition.

        Parameters
        ----------
        *args : list
            Positional arguments to be passed to the callback function.
        **kwargs : dict
            Keyword arguments to be passed to the callback function.

        Returns
        -------
        pandas.DataFrame or pandas.io.parsers.TextFileReader
            Function call result.
        """
        return pandas.read_fwf(*args, **kwargs)


@doc(_doc_pandas_parser_class, data_type="excel files")
class PandasExcelParser(PandasParser):
    @classmethod
    def get_sheet_data(cls, sheet, convert_float):
        """
        Get raw data from the excel sheet.

        Parameters
        ----------
        sheet : openpyxl.worksheet.worksheet.Worksheet
            Sheet to get data from.
        convert_float : bool
            Whether to convert floats to ints or not.

        Returns
        -------
        list
            List with sheet data.
        """
        return [
            [cls._convert_cell(cell, convert_float) for cell in row]
            for row in sheet.rows
        ]

    @classmethod
    def _convert_cell(cls, cell, convert_float):
        """
        Convert excel cell to value.

        Parameters
        ----------
        cell : openpyxl.cell.cell.Cell
            Excel cell to convert.
        convert_float : bool
            Whether to convert floats to ints or not.

        Returns
        -------
        list
            Value that was converted from the excel cell.
        """
        if cell.is_date:
            return cell.value
        elif cell.data_type == "e":
            return np.nan
        elif cell.data_type == "b":
            return bool(cell.value)
        elif cell.value is None:
            return ""
        elif cell.data_type == "n":
            if convert_float:
                val = int(cell.value)
                if val == cell.value:
                    return val
            else:
                return float(cell.value)

        return cell.value

    @staticmethod
    def need_rich_text_param():
        """
        Determine whether a required `rich_text` parameter should be specified for the ``WorksheetReader`` constructor.

        Returns
        -------
        bool
        """
        import openpyxl
        from packaging import version

        return version.parse(openpyxl.__version__) >= version.parse("3.1.0")

    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)
    def parse(fname, **kwargs):
        num_splits = kwargs.pop("num_splits", None)
        start = kwargs.pop("start", None)
        end = kwargs.pop("end", None)
        excel_header = kwargs.get("_header")
        sheet_name = kwargs.get("sheet_name", 0)
        footer = b"</sheetData></worksheet>"

        # Default to pandas case, where we are not splitting or partitioning
        if start is None or end is None:
            return pandas.read_excel(fname, **kwargs)

        _skiprows = kwargs.pop("skiprows")

        import re
        from zipfile import ZipFile

        import openpyxl
        from openpyxl.reader.excel import ExcelReader
        from openpyxl.worksheet._reader import WorksheetReader
        from openpyxl.worksheet.worksheet import Worksheet
        from pandas.core.dtypes.common import is_list_like
        from pandas.io.excel._util import fill_mi_header, maybe_convert_usecols
        from pandas.io.parsers import TextParser

        wb = openpyxl.load_workbook(filename=fname, read_only=True)
        # Get shared strings
        ex = ExcelReader(fname, read_only=True)
        ex.read_manifest()
        ex.read_strings()
        # Convert string name 0 to string
        if sheet_name == 0:
            sheet_name = wb.sheetnames[sheet_name]
        # get the worksheet to use with the worksheet reader
        ws = Worksheet(wb)
        # Read the raw data
        with ZipFile(fname) as z:
            with z.open("xl/worksheets/{}.xml".format(sheet_name)) as file:
                file.seek(start)
                bytes_data = file.read(end - start)

        def update_row_nums(match):
            """
            Update the row numbers to start at 1.

            Parameters
            ----------
            match : re.Match object
                The match from the origin `re.sub` looking for row number tags.

            Returns
            -------
            str
                The updated string with new row numbers.

            Notes
            -----
            This is needed because the parser we are using does not scale well if
            the row numbers remain because empty rows are inserted for all "missing"
            rows.
            """
            b = match.group(0)
            return re.sub(
                rb"\d+",
                lambda c: str(int(c.group(0).decode("utf-8")) - _skiprows).encode(
                    "utf-8"
                ),
                b,
            )

        bytes_data = re.sub(rb'r="[A-Z]*\d+"', update_row_nums, bytes_data)
        bytesio = BytesIO(excel_header + bytes_data + footer)
        # Use openpyxl to read/parse sheet data
        common_args = (ws, bytesio, ex.shared_strings, False)
        if PandasExcelParser.need_rich_text_param():
            reader = WorksheetReader(*common_args, rich_text=False)
        else:
            reader = WorksheetReader(*common_args)
        # Attach cells to worksheet object
        reader.bind_cells()
        data = PandasExcelParser.get_sheet_data(ws, kwargs.pop("convert_float", True))
        usecols = maybe_convert_usecols(kwargs.pop("usecols", None))
        header = kwargs.pop("header", 0)
        index_col = kwargs.pop("index_col", None)
        # skiprows is handled externally
        skiprows = None

        # Handle header and create MultiIndex for columns if necessary
        if is_list_like(header) and len(header) == 1:
            header = header[0]
        if header is not None and is_list_like(header):
            control_row = [True] * len(data[0])

            for row in header:
                data[row], control_row = fill_mi_header(data[row], control_row)
        # Handle MultiIndex for row Index if necessary
        if is_list_like(index_col):
            # Forward fill values for MultiIndex index.
            if not is_list_like(header):
                offset = 1 + header
            else:
                offset = 1 + max(header)

            # Check if dataset is empty
            if offset < len(data):
                for col in index_col:
                    last = data[offset][col]
                    for row in range(offset + 1, len(data)):
                        if data[row][col] == "" or data[row][col] is None:
                            data[row][col] = last
                        else:
                            last = data[row][col]
        parser = TextParser(
            data,
            header=header,
            index_col=index_col,
            has_index_names=is_list_like(header) and len(header) > 1,
            skiprows=skiprows,
            usecols=usecols,
            skip_blank_lines=False,
            **kwargs,
        )
        pandas_df = parser.read()
        if (
            len(pandas_df) > 1
            and len(pandas_df.columns) != 0
            and pandas_df.isnull().all().all()
        ):
            # Drop NaN rows at the end of the DataFrame
            pandas_df = pandas.DataFrame(columns=pandas_df.columns)

        # Since we know the number of rows that occur before this partition, we can
        # correctly assign the index in cases of RangeIndex. If it is not a RangeIndex,
        # the index is already correct because it came from the data.
        if isinstance(pandas_df.index, pandas.RangeIndex):
            pandas_df.index = pandas.RangeIndex(
                start=_skiprows, stop=len(pandas_df.index) + _skiprows
            )
        # We return the length if it is a RangeIndex (common case) to reduce
        # serialization cost.
        if index_col is not None:
            index = pandas_df.index
        else:
            # The lengths will become the RangeIndex
            index = len(pandas_df)
        return _split_result_for_readers(1, num_splits, pandas_df) + [
            index,
            pandas_df.dtypes,
        ]


@doc(_doc_pandas_parser_class, data_type="JSON files")
class PandasJSONParser(PandasParser):
    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)
    def parse(fname, **kwargs):
        num_splits = kwargs.pop("num_splits", None)
        start = kwargs.pop("start", None)
        end = kwargs.pop("end", None)
        if start is not None and end is not None:
            # pop "compression" from kwargs because bio is uncompressed
            with OpenFile(
                fname,
                "rb",
                kwargs.pop("compression", "infer"),
                **(kwargs.pop("storage_options", None) or {}),
            ) as bio:
                bio.seek(start)
                to_read = b"" + bio.read(end - start)
            columns = kwargs.pop("columns")
            pandas_df = pandas.read_json(BytesIO(to_read), **kwargs)
        else:
            # This only happens when we are reading with only one worker (Default)
            return pandas.read_json(fname, **kwargs)
        if not pandas_df.columns.equals(columns):
            raise ModinAssumptionError("Columns must be the same across all rows.")
        partition_columns = pandas_df.columns
        return _split_result_for_readers(1, num_splits, pandas_df) + [
            len(pandas_df),
            pandas_df.dtypes,
            partition_columns,
        ]


class ParquetFileToRead(NamedTuple):
    """
    Class to store path and row group information for parquet reads.

    Parameters
    ----------
    path : str, path object or file-like object
        Name of the file to read.
    row_group_start : int
        Row group to start read from.
    row_group_end : int
        Row group to stop read.
    """

    path: Any
    row_group_start: int
    row_group_end: int


@doc(_doc_pandas_parser_class, data_type="PARQUET data")
class PandasParquetParser(PandasParser):
    @staticmethod
    def _read_row_group_chunk(
        f, row_group_start, row_group_end, columns, filters, engine, to_pandas_kwargs
    ):  # noqa: GL08
        if engine == "pyarrow":
            if filters is not None:
                import pyarrow.dataset as ds
                from pyarrow.parquet import filters_to_expression

                parquet_format = ds.ParquetFileFormat()
                fragment = parquet_format.make_fragment(
                    f,
                    row_groups=range(
                        row_group_start,
                        row_group_end,
                    ),
                )
                dataset = ds.FileSystemDataset(
                    [fragment],
                    schema=fragment.physical_schema,
                    format=parquet_format,
                    filesystem=fragment.filesystem,
                )

                # This lower-level API doesn't have the ability to automatically handle pandas metadata
                # The following code is based on
                # https://github.com/apache/arrow/blob/f44e28fa03a64ae5b3d9352d21aee2cc84f9af6c/python/pyarrow/parquet/core.py#L2619-L2628

                # if use_pandas_metadata, we need to include index columns in the
                # column selection, to be able to restore those in the pandas DataFrame
                metadata = dataset.schema.metadata or {}

                if b"pandas" in metadata and columns is not None:
                    index_columns = json.loads(metadata[b"pandas"].decode("utf8"))[
                        "index_columns"
                    ]
                    # In the pandas metadata, the index columns can either be string column names,
                    # or a dictionary that describes a RangeIndex.
                    # Here, we are finding the real data columns that need to be read to become part
                    # of the pandas Index, so we can skip the RangeIndex.
                    # Not only can a RangeIndex be trivially reconstructed later, but we actually
                    # ignore partition-level range indices, because we want to have a single Modin
                    # RangeIndex that spans all partitions.
                    index_columns = [
                        col for col in index_columns if not isinstance(col, dict)
                    ]
                    columns = list(columns) + list(set(index_columns) - set(columns))

                return dataset.to_table(
                    columns=columns,
                    filter=filters_to_expression(filters),
                ).to_pandas(**to_pandas_kwargs)
            else:
                from pyarrow.parquet import ParquetFile

                return (
                    ParquetFile(f)
                    .read_row_groups(
                        range(
                            row_group_start,
                            row_group_end,
                        ),
                        columns=columns,
                        use_pandas_metadata=True,
                    )
                    .to_pandas(**to_pandas_kwargs)
                )
        elif engine == "fastparquet":
            from fastparquet import ParquetFile

            return ParquetFile(f)[row_group_start:row_group_end].to_pandas(
                columns=columns,
                filters=filters,
                # Setting row_filter=True would perform filtering at the row level, which is more correct
                # (in line with pyarrow)
                # However, it doesn't work: https://github.com/dask/fastparquet/issues/873
                # Also, this would create incompatibility with pandas
            )
        else:
            # We shouldn't ever come to this case, so something went wrong
            raise ValueError(
                f"engine must be one of 'pyarrow', 'fastparquet', got: {engine}"
            )

    @staticmethod
    @doc(
        _doc_parse_func,
        parameters="""files_for_parser : list
    List of files to be read.
engine : str
    Parquet library to use (either PyArrow or fastparquet).
""",
    )
    def parse(files_for_parser, engine, **kwargs):
        columns = kwargs.get("columns", None)
        filters = kwargs.get("filters", None)
        storage_options = kwargs.get("storage_options", {})
        chunks = []
        # `single_worker_read` just passes in a string path or path-like object
        if isinstance(files_for_parser, (str, os.PathLike)):
            return pandas.read_parquet(files_for_parser, engine=engine, **kwargs)

        to_pandas_kwargs = PandasParser.get_types_mapper(kwargs["dtype_backend"])

        for file_for_parser in files_for_parser:
            if isinstance(file_for_parser.path, IOBase):
                context = contextlib.nullcontext(file_for_parser.path)
            else:
                context = fsspec.open(file_for_parser.path, **storage_options)
            with context as f:
                chunk = PandasParquetParser._read_row_group_chunk(
                    f,
                    file_for_parser.row_group_start,
                    file_for_parser.row_group_end,
                    columns,
                    filters,
                    engine,
                    to_pandas_kwargs,
                )
            chunks.append(chunk)
        df = pandas.concat(chunks)
        return df, df.index, len(df)


@doc(_doc_pandas_parser_class, data_type="HDF data")
class PandasHDFParser(PandasParser):  # pragma: no cover
    @staticmethod
    @doc(
        _doc_parse_func,
        parameters="""fname : str, path object, pandas.HDFStore or file-like object
    Name of the file, path pandas.HDFStore or file-like object to read.""",
    )
    def parse(fname, **kwargs):
        kwargs["key"] = kwargs.pop("_key", None)
        num_splits = kwargs.pop("num_splits", None)
        if num_splits is None:
            return pandas.read_hdf(fname, **kwargs)
        df = pandas.read_hdf(fname, **kwargs)
        # Append the length of the index here to build it externally
        return _split_result_for_readers(0, num_splits, df) + [len(df.index), df.dtypes]


@doc(_doc_pandas_parser_class, data_type="FEATHER files")
class PandasFeatherParser(PandasParser):
    @staticmethod
    @doc(
        _doc_parse_func,
        parameters="""fname : str, path object or file-like object
    Name of the file, path or file-like object to read.""",
    )
    def parse(fname, **kwargs):
        from pyarrow import feather

        num_splits = kwargs.pop("num_splits", None)
        if num_splits is None:
            return pandas.read_feather(fname, **kwargs)

        to_pandas_kwargs = PandasParser.get_types_mapper(kwargs["dtype_backend"])
        del kwargs["dtype_backend"]

        with OpenFile(
            fname,
            **(kwargs.pop("storage_options", None) or {}),
        ) as file:
            # The implementation is as close as possible to the one in pandas.
            # For reference see `read_feather` in pandas/io/feather_format.py.
            if not to_pandas_kwargs:
                df = feather.read_feather(file, **kwargs)
            else:
                # `read_feather` doesn't accept `types_mapper` if pyarrow<11.0
                pa_table = feather.read_table(file, **kwargs)
                df = pa_table.to_pandas(**to_pandas_kwargs)
        # Append the length of the index here to build it externally
        return _split_result_for_readers(0, num_splits, df) + [len(df.index), df.dtypes]


@doc(_doc_pandas_parser_class, data_type="SQL queries or tables")
class PandasSQLParser(PandasParser):
    @staticmethod
    @doc(
        _doc_parse_func,
        parameters="""sql : str or SQLAlchemy Selectable (select or text object)
    SQL query to be executed or a table name.
con : SQLAlchemy connectable, str, or sqlite3 connection
    Connection object to database.
index_col : str or list of str
    Column(s) to set as index(MultiIndex).
read_sql_engine : str
    Underlying engine ('pandas' or 'connectorx') used for fetching query result.""",
    )
    def parse(sql, con, index_col, read_sql_engine, **kwargs):
        enable_cx = False
        if read_sql_engine == "Connectorx":
            try:
                import connectorx as cx

                enable_cx = True
            except ImportError:
                warnings.warn(
                    "Switch to 'pandas.read_sql' since 'connectorx' is not installed, please run 'pip install connectorx'."
                )

        num_splits = kwargs.pop("num_splits", None)
        if isinstance(con, ModinDatabaseConnection):
            con = con.get_string() if enable_cx else con.get_connection()

        if num_splits is None:
            if enable_cx:
                return cx.read_sql(con, sql, index_col=index_col)
            return pandas.read_sql(sql, con, index_col=index_col, **kwargs)

        if enable_cx:
            df = cx.read_sql(con, sql, index_col=index_col)
        else:
            df = pandas.read_sql(sql, con, index_col=index_col, **kwargs)
        if index_col is None:
            index = len(df)
        else:
            index = df.index
        return _split_result_for_readers(1, num_splits, df) + [index, df.dtypes]


================================================
FILE: modin/core/storage_formats/pandas/query_compiler.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains ``PandasQueryCompiler`` class.

``PandasQueryCompiler`` is responsible for compiling efficient DataFrame algebra
queries for the ``PandasDataframe``.
"""

from __future__ import annotations

import ast
import hashlib
import re
import warnings
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any, Hashable, List, Literal, Optional, Union

import numpy as np
import pandas
from pandas._libs import lib
from pandas.api.types import is_scalar
from pandas.core.apply import reconstruct_func
from pandas.core.common import is_bool_indexer
from pandas.core.dtypes.cast import find_common_type
from pandas.core.dtypes.common import (
    is_bool_dtype,
    is_datetime64_any_dtype,
    is_list_like,
    is_numeric_dtype,
)
from pandas.core.groupby.base import transformation_kernels
from pandas.core.indexes.api import ensure_index_from_sequences
from pandas.core.indexing import check_bool_indexer
from pandas.errors import DataError

from modin.config import CpuCount, RangePartitioning
from modin.core.dataframe.algebra import (
    Binary,
    Fold,
    GroupByReduce,
    Map,
    Reduce,
    TreeReduce,
)
from modin.core.dataframe.algebra.default2pandas.groupby import (
    GroupBy,
    GroupByDefault,
    SeriesGroupByDefault,
)
from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
    ProtocolDataframe,
)
from modin.core.dataframe.pandas.metadata import (
    DtypesDescriptor,
    ModinDtypes,
    ModinIndex,
    extract_dtype,
)
from modin.core.storage_formats import BaseQueryCompiler
from modin.error_message import ErrorMessage
from modin.logging import get_logger
from modin.utils import (
    MODIN_UNNAMED_SERIES_LABEL,
    _inherit_docstrings,
    hashable,
    try_cast_to_pandas,
    wrap_udf_function,
)

from .aggregations import CorrCovBuilder
from .groupby import GroupbyReduceImpl, PivotTableImpl
from .merge import MergeImpl
from .utils import get_group_names, merge_partitioning

if TYPE_CHECKING:
    from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe


def _get_axis(axis):
    """
    Build index labels getter of the specified axis.

    Parameters
    ----------
    axis : {0, 1}
        Axis to get labels from. 0 is for index and 1 is for column.

    Returns
    -------
    callable(PandasQueryCompiler) -> pandas.Index
    """
    if axis == 0:
        return lambda self: self._modin_frame.index
    else:
        return lambda self: self._modin_frame.columns


def _set_axis(axis):
    """
    Build index labels setter of the specified axis.

    Parameters
    ----------
    axis : {0, 1}
        Axis to set labels on. 0 is for index and 1 is for column.

    Returns
    -------
    callable(PandasQueryCompiler)
    """
    if axis == 0:

        def set_axis(self, idx):
            self._modin_frame.index = idx

    else:

        def set_axis(self, cols):
            self._modin_frame.columns = cols

    return set_axis


def _str_map(func_name):
    """
    Build function that calls specified string function on frames ``str`` accessor.

    Parameters
    ----------
    func_name : str
        String function name to execute on ``str`` accessor.

    Returns
    -------
    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
    """

    def str_op_builder(df, *args, **kwargs):
        """Apply specified function against `str` accessor of the passed frame."""
        str_s = df.squeeze(axis=1).str
        res = getattr(pandas.Series.str, func_name)(str_s, *args, **kwargs)
        if hasattr(res, "to_frame"):
            res = res.to_frame()
        return res

    return str_op_builder


def _dt_prop_map(property_name):
    """
    Build function that access specified property of the ``dt`` property of the passed frame.

    Parameters
    ----------
    property_name : str
        Date-time property name to access.

    Returns
    -------
    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
        Function to be applied in the partitions.

    Notes
    -----
    This applies non-callable properties of ``Series.dt``.
    """

    def dt_op_builder(df, *args, **kwargs):
        """Access specified date-time property of the passed frame."""
        squeezed_df = df.squeeze(axis=1)
        if isinstance(squeezed_df, pandas.DataFrame) and len(squeezed_df.columns) == 0:
            return squeezed_df
        assert isinstance(squeezed_df, pandas.Series)
        prop_val = getattr(squeezed_df.dt, property_name)
        if isinstance(prop_val, pandas.Series):
            return prop_val.to_frame()
        elif isinstance(prop_val, pandas.DataFrame):
            return prop_val
        else:
            return pandas.DataFrame([prop_val])

    return dt_op_builder


def _dt_func_map(func_name):
    """
    Build function that apply specified method against ``dt`` property of the passed frame.

    Parameters
    ----------
    func_name : str
        Date-time function name to apply.

    Returns
    -------
    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
        Function to be applied in the partitions.

    Notes
    -----
    This applies callable methods of ``Series.dt``.
    """

    def dt_op_builder(df, *args, **kwargs):
        """Apply specified function against ``dt`` accessor of the passed frame."""
        dt_s = df.squeeze(axis=1).dt
        dt_func_result = getattr(pandas.Series.dt, func_name)(dt_s, *args, **kwargs)
        # If we don't specify the dtype for the frame, the frame might get the
        # wrong dtype, e.g. for to_pydatetime in https://github.com/modin-project/modin/issues/4436
        return pandas.DataFrame(dt_func_result, dtype=dt_func_result.dtype)

    return dt_op_builder


def copy_df_for_func(func, display_name: str = None):
    """
    Build function that execute specified `func` against passed frame inplace.

    Built function copies passed frame, applies `func` to the copy and returns
    the modified frame.

    Parameters
    ----------
    func : callable(pandas.DataFrame)
        The function, usually updates a dataframe inplace.
    display_name : str, optional
        The function's name, which is displayed by progress bar.

    Returns
    -------
    callable(pandas.DataFrame)
        A callable function to be applied in the partitions.
    """

    def caller(df, *args, **kwargs):
        """Apply specified function the passed frame inplace."""
        df = df.copy()
        func(df, *args, **kwargs)
        return df

    if display_name is not None:
        caller.__name__ = display_name
    return caller


def _series_logical_binop(func):
    """
    Build a callable function to pass to Binary.register for Series logical operators.

    Parameters
    ----------
    func : callable
        Binary operator method of pandas.Series to be applied.

    Returns
    -------
    callable
    """
    return lambda x, y, **kwargs: func(
        x.squeeze(axis=1),
        y.squeeze(axis=1) if kwargs.pop("squeeze_other", False) else y,
        **kwargs,
    ).to_frame()


@_inherit_docstrings(BaseQueryCompiler)
class PandasQueryCompiler(BaseQueryCompiler):
    """
    Query compiler for the pandas storage format.

    This class translates common query compiler API into the DataFrame Algebra
    queries, that is supposed to be executed by :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.

    Parameters
    ----------
    modin_frame : PandasDataframe
        Modin Frame to query with the compiled queries.
    shape_hint : {"row", "column", None}, default: None
        Shape hint for frames known to be a column or a row, otherwise None.
    """

    _modin_frame: PandasDataframe
    _shape_hint: Optional[str]

    def __init__(self, modin_frame: PandasDataframe, shape_hint: Optional[str] = None):
        self._modin_frame = modin_frame
        self._shape_hint = shape_hint

    storage_format = property(lambda self: self._modin_frame.storage_format)
    engine = property(lambda self: self._modin_frame.engine)

    @property
    def lazy_row_labels(self):
        """
        Whether the row labels are computed lazily.

        Equivalent to `not self.frame_has_materialized_index`.

        Returns
        -------
        bool
        """
        return not self.frame_has_materialized_index

    @property
    def lazy_row_count(self):
        """
        Whether the row count is computed lazily.

        Equivalent to `not self.frame_has_materialized_index`.

        Returns
        -------
        bool
        """
        return not self.frame_has_materialized_index

    @property
    def lazy_column_types(self):
        """
        Whether the dtypes are computed lazily.

        Equivalent to `not self.frame_has_materialized_dtypes`.

        Returns
        -------
        bool
        """
        return not self.frame_has_materialized_dtypes

    @property
    def lazy_column_labels(self):
        """
        Whether the column labels are computed lazily.

        Equivalent to `not self.frame_has_materialized_columns`.

        Returns
        -------
        bool
        """
        return not self.frame_has_materialized_columns

    @property
    def lazy_column_count(self):
        """
        Whether the column count is are computed lazily.

        Equivalent to `not self.frame_has_materialized_columns`.

        Returns
        -------
        bool
        """
        return not self.frame_has_materialized_columns

    # The default implementation of stay_cost will cache some information
    # which will violate some assumptions in test_internals. Since this class
    # is only used for non-hybrid operations we simply return 0 here for now.
    def stay_cost(self, api_cls_name, operation, arguments):
        return 0

    def finalize(self):
        self._modin_frame.finalize()

    def execute(self):
        self.finalize()
        self._modin_frame.wait_computations()

    def to_pandas(self):
        return self._modin_frame.to_pandas()

    @classmethod
    def from_pandas(cls, df, data_cls):
        return cls(data_cls.from_pandas(df))

    @classmethod
    def from_arrow(cls, at, data_cls):
        return cls(data_cls.from_arrow(at))

    # Dataframe exchange protocol

    def to_interchange_dataframe(
        self, nan_as_null: bool = False, allow_copy: bool = True
    ):
        return self._modin_frame.__dataframe__(
            nan_as_null=nan_as_null, allow_copy=allow_copy
        )

    @classmethod
    def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls):
        return cls(data_cls.from_interchange_dataframe(df))

    # END Dataframe exchange protocol

    index: pandas.Index = property(_get_axis(0), _set_axis(0))
    columns: pandas.Index = property(_get_axis(1), _set_axis(1))

    def get_axis_len(self, axis: Literal[0, 1]) -> int:
        """
        Return the length of the specified axis.

        Parameters
        ----------
        axis : {0, 1}
            Axis to return labels on.

        Returns
        -------
        int
        """
        if axis == 0:
            return len(self._modin_frame)
        else:
            return sum(self._modin_frame.column_widths)

    @property
    def dtypes(self) -> pandas.Series:
        return self._modin_frame.dtypes

    def get_dtypes_set(self):
        return self._modin_frame.get_dtypes_set()

    # END Index, columns, and dtypes objects

    # Metadata modification methods
    def add_prefix(self, prefix, axis=1):
        if axis == 1:
            return self.__constructor__(
                self._modin_frame.rename(new_col_labels=lambda x: f"{prefix}{x}")
            )
        else:
            return self.__constructor__(
                self._modin_frame.rename(new_row_labels=lambda x: f"{prefix}{x}")
            )

    def add_suffix(self, suffix, axis=1):
        if axis == 1:
            return self.__constructor__(
                self._modin_frame.rename(new_col_labels=lambda x: f"{x}{suffix}")
            )
        else:
            return self.__constructor__(
                self._modin_frame.rename(new_row_labels=lambda x: f"{x}{suffix}")
            )

    # END Metadata modification methods

    # Copy
    # For copy, we don't want a situation where we modify the metadata of the
    # copies if we end up modifying something here. We copy all of the metadata
    # to prevent that.
    def copy(self):
        return self.__constructor__(self._modin_frame.copy(), self._shape_hint)

    # END Copy

    # Append/Concat/Join (Not Merge)
    # The append/concat/join operations should ideally never trigger remote
    # compute. These operations should only ever be manipulations of the
    # metadata of the resulting object. It should just be a simple matter of
    # appending the other object's blocks and adding np.nan columns for the new
    # columns, if needed. If new columns are added, some compute may be
    # required, though it can be delayed.
    #
    # Currently this computation is not delayed, and it may make a copy of the
    # DataFrame in memory. This can be problematic and should be fixed in the
    # future. TODO (devin-petersohn): Delay reindexing

    def concat(self, axis, other, **kwargs):
        if not isinstance(other, list):
            other = [other]
        assert all(
            isinstance(o, type(self)) for o in other
        ), "Different Manager objects are being used. This is not allowed"
        sort = kwargs.get("sort", None)
        if sort is None:
            sort = False
        join = kwargs.get("join", "outer")
        ignore_index = kwargs.get("ignore_index", False)
        other_modin_frame = [o._modin_frame for o in other]
        new_modin_frame = self._modin_frame.concat(axis, other_modin_frame, join, sort)
        result = self.__constructor__(new_modin_frame)
        if ignore_index:
            if axis == 0:
                return result.reset_index(drop=True)
            else:
                result.columns = pandas.RangeIndex(len(result.columns))
                return result
        return result

    # END Append/Concat/Join

    # Data Management Methods
    def free(self):
        # TODO create a way to clean up this object.
        return

    # END Data Management Methods

    # Data Movement Methods
    def move_to(self, target_backend: str) -> Union[BaseQueryCompiler, Any]:
        return NotImplemented

    @classmethod
    def move_from(cls, source_qc: BaseQueryCompiler) -> Union[BaseQueryCompiler, Any]:
        return NotImplemented

    # END Data Movement Methods

    # To NumPy
    def to_numpy(self, **kwargs):
        return self._modin_frame.to_numpy(**kwargs)

    # END To NumPy

    # Binary operations (e.g. add, sub)
    # These operations require two DataFrames and will change the shape of the
    # data if the index objects don't match. An outer join + op is performed,
    # such that columns/rows that don't have an index on the other DataFrame
    # result in NaN values.

    add = Binary.register(pandas.DataFrame.add, infer_dtypes="try_sample")
    # 'combine' and 'combine_first' are working with UDFs, so it's better not so sample them
    combine = Binary.register(pandas.DataFrame.combine, infer_dtypes="common_cast")
    combine_first = Binary.register(
        pandas.DataFrame.combine_first, infer_dtypes="common_cast"
    )
    eq = Binary.register(pandas.DataFrame.eq, infer_dtypes="bool")
    equals = Binary.register(
        lambda df, other: pandas.DataFrame([[df.equals(other)]]),
        join_type=None,
        labels="drop",
        infer_dtypes="bool",
    )
    floordiv = Binary.register(pandas.DataFrame.floordiv, infer_dtypes="try_sample")
    ge = Binary.register(pandas.DataFrame.ge, infer_dtypes="bool")
    gt = Binary.register(pandas.DataFrame.gt, infer_dtypes="bool")
    le = Binary.register(pandas.DataFrame.le, infer_dtypes="bool")
    lt = Binary.register(pandas.DataFrame.lt, infer_dtypes="bool")
    mod = Binary.register(pandas.DataFrame.mod, infer_dtypes="try_sample")
    mul = Binary.register(pandas.DataFrame.mul, infer_dtypes="try_sample")
    rmul = Binary.register(pandas.DataFrame.rmul, infer_dtypes="try_sample")
    ne = Binary.register(pandas.DataFrame.ne, infer_dtypes="bool")
    pow = Binary.register(pandas.DataFrame.pow, infer_dtypes="try_sample")
    radd = Binary.register(pandas.DataFrame.radd, infer_dtypes="try_sample")
    rfloordiv = Binary.register(pandas.DataFrame.rfloordiv, infer_dtypes="try_sample")
    rmod = Binary.register(pandas.DataFrame.rmod, infer_dtypes="try_sample")
    rpow = Binary.register(pandas.DataFrame.rpow, infer_dtypes="try_sample")
    rsub = Binary.register(pandas.DataFrame.rsub, infer_dtypes="try_sample")
    rtruediv = Binary.register(pandas.DataFrame.rtruediv, infer_dtypes="try_sample")
    sub = Binary.register(pandas.DataFrame.sub, infer_dtypes="try_sample")
    truediv = Binary.register(pandas.DataFrame.truediv, infer_dtypes="try_sample")
    __and__ = Binary.register(pandas.DataFrame.__and__, infer_dtypes="bool")
    __or__ = Binary.register(pandas.DataFrame.__or__, infer_dtypes="bool")
    __rand__ = Binary.register(pandas.DataFrame.__rand__, infer_dtypes="bool")
    __ror__ = Binary.register(pandas.DataFrame.__ror__, infer_dtypes="bool")
    __rxor__ = Binary.register(pandas.DataFrame.__rxor__, infer_dtypes="bool")
    __xor__ = Binary.register(pandas.DataFrame.__xor__, infer_dtypes="bool")
    df_update = Binary.register(
        copy_df_for_func(pandas.DataFrame.update, display_name="update"),
        join_type="left",
        sort=False,
    )
    series_update = Binary.register(
        copy_df_for_func(
            lambda x, y: pandas.Series.update(x.squeeze(axis=1), y.squeeze(axis=1)),
            display_name="update",
        ),
        join_type="left",
        sort=False,
    )

    # Series logical operators take an additional fill_value flag that dataframe does not
    series_eq = Binary.register(
        _series_logical_binop(pandas.Series.eq), infer_dtypes="bool"
    )
    series_ge = Binary.register(
        _series_logical_binop(pandas.Series.ge), infer_dtypes="bool"
    )
    series_gt = Binary.register(
        _series_logical_binop(pandas.Series.gt), infer_dtypes="bool"
    )
    series_le = Binary.register(
        _series_logical_binop(pandas.Series.le), infer_dtypes="bool"
    )
    series_lt = Binary.register(
        _series_logical_binop(pandas.Series.lt), infer_dtypes="bool"
    )
    series_ne = Binary.register(
        _series_logical_binop(pandas.Series.ne), infer_dtypes="bool"
    )

    # Needed for numpy API
    _logical_and = Binary.register(
        lambda df, other, *args, **kwargs: pandas.DataFrame(
            np.logical_and(df, other, *args, **kwargs)
        ),
        infer_dtypes="bool",
    )
    _logical_or = Binary.register(
        lambda df, other, *args, **kwargs: pandas.DataFrame(
            np.logical_or(df, other, *args, **kwargs)
        ),
        infer_dtypes="bool",
    )
    _logical_xor = Binary.register(
        lambda df, other, *args, **kwargs: pandas.DataFrame(
            np.logical_xor(df, other, *args, **kwargs)
        ),
        infer_dtypes="bool",
    )

    def where(self, cond, other, **kwargs):
        assert isinstance(
            cond, type(self)
        ), "Must have the same QueryCompiler subclass to perform this operation"
        # it's doesn't work if `other` is Series._query_compiler because
        # `n_ary_op` performs columns copartition both for `cond` and `other`.
        if isinstance(other, type(self)) and other._shape_hint is not None:
            other = other.to_pandas()
        if isinstance(other, type(self)):
            # Make sure to set join_type=None so the `where` result always has
            # the same row and column labels as `self`.
            new_modin_frame = self._modin_frame.n_ary_op(
                lambda df, cond, other: df.where(cond, other, **kwargs),
                [
                    cond._modin_frame,
                    other._modin_frame,
                ],
                join_type=None,
            )
        # This will be a Series of scalars to be applied based on the condition
        # dataframe.
        else:

            def where_builder_series(df, cond):
                return df.where(cond, other, **kwargs)

            new_modin_frame = self._modin_frame.n_ary_op(
                where_builder_series, [cond._modin_frame], join_type="left"
            )
        return self.__constructor__(new_modin_frame)

    def merge(self, right, **kwargs):
        if RangePartitioning.get():
            try:
                return MergeImpl.range_partitioning_merge(self, right, kwargs)
            except NotImplementedError as e:
                message = (
                    f"Can't use range-partitioning merge implementation because of: {e}"
                    + "\nFalling back to a row-axis implementation."
                )
                get_logger().info(message)
        return MergeImpl.row_axis_merge(self, right, kwargs)

    def join(self, right: PandasQueryCompiler, **kwargs) -> PandasQueryCompiler:
        on = kwargs.get("on", None)
        how = kwargs.get("how", "left")
        sort = kwargs.get("sort", False)
        left = self

        if how in ["left", "inner"] or (
            how == "right" and right._modin_frame._partitions.size != 0
        ):
            reverted = False
            if how == "right":
                left, right = right, left
                reverted = True

            def map_func(
                left, right, kwargs=kwargs
            ) -> pandas.DataFrame:  # pragma: no cover
                if reverted:
                    df = pandas.DataFrame.join(right, left, **kwargs)
                else:
                    df = pandas.DataFrame.join(left, right, **kwargs)
                return df

            right_to_broadcast = right._modin_frame.combine()
            left = left.__constructor__(
                left._modin_frame.broadcast_apply_full_axis(
                    axis=1,
                    func=map_func,
                    # We're going to explicitly change the shape across the 1-axis,
                    # so we want for partitioning to adapt as well
                    keep_partitioning=False,
                    num_splits=merge_partitioning(
                        left._modin_frame, right._modin_frame, axis=1
                    ),
                    other=right_to_broadcast,
                )
            )
            return left.sort_rows_by_column_values(on) if sort else left
        else:
            return left.default_to_pandas(pandas.DataFrame.join, right, **kwargs)

    # END Inter-Data operations

    # Reindex/reset_index (may shuffle data)
    def reindex(self, axis, labels, **kwargs):
        new_index, indexer = (self.index, None) if axis else self.index.reindex(labels)
        new_columns, _ = self.columns.reindex(labels) if axis else (self.columns, None)
        new_dtypes = None
        if self.frame_has_materialized_dtypes and kwargs.get("method", None) is None:
            # For columns, defining types is easier because we don't have to calculate the common
            # type, since the entire column is filled. A simple `reindex` covers our needs.
            # For rows, we can avoid calculating common types if we know that no new strings of
            # arbitrary type have been added (this information is in `indexer`).
            dtype = pandas.Index([kwargs.get("fill_value", np.nan)]).dtype
            if axis == 0:
                new_dtypes = self.dtypes.copy()
                # "-1" means that the required labels are missing in the dataframe and the
                # corresponding rows will be filled with "fill_value" that may change the column type.
                if indexer is not None and -1 in indexer:
                    for col, col_dtype in new_dtypes.items():
                        new_dtypes[col] = find_common_type((col_dtype, dtype))
            else:
                new_dtypes = self.dtypes.reindex(labels, fill_value=dtype)
        new_modin_frame = self._modin_frame.apply_full_axis(
            axis,
            lambda df: df.reindex(labels=labels, axis=axis, **kwargs),
            new_index=new_index,
            new_columns=new_columns,
            dtypes=new_dtypes,
        )
        return self.__constructor__(new_modin_frame)

    def reset_index(self, **kwargs) -> PandasQueryCompiler:
        if self.lazy_row_labels:

            def _reset(df, *axis_lengths, partition_idx):  # pragma: no cover
                df = df.reset_index(**kwargs)

                if isinstance(df.index, pandas.RangeIndex):
                    # If the resulting index is a pure RangeIndex that means that
                    # `.reset_index` actually dropped all of the levels of the
                    # original index and so we have to recompute it manually for each partition
                    start = sum(axis_lengths[:partition_idx])
                    stop = sum(axis_lengths[: partition_idx + 1])

                    df.index = pandas.RangeIndex(start, stop)
                return df

            new_columns = None
            if kwargs["drop"]:
                dtypes = self._modin_frame.copy_dtypes_cache()
                if self.frame_has_columns_cache:
                    new_columns = self._modin_frame.copy_columns_cache(
                        copy_lengths=True
                    )
            else:
                # concat index dtypes with column dtypes
                index_dtypes = self._modin_frame._index_cache.maybe_get_dtypes()
                try:
                    dtypes = ModinDtypes.concat(
                        [
                            index_dtypes,
                            self._modin_frame._dtypes,
                        ]
                    )
                except NotImplementedError:
                    # may raise on duplicated names in materialized 'self.dtypes'
                    dtypes = None
                if (
                    # can precompute new columns if we know columns and index names
                    self.frame_has_materialized_columns
                    and index_dtypes is not None
                ):
                    empty_index = (
                        pandas.Index([0], name=index_dtypes.index[0])
                        if len(index_dtypes) == 1
                        else pandas.MultiIndex.from_arrays(
                            [[i] for i in range(len(index_dtypes))],
                            names=index_dtypes.index,
                        )
                    )
                    new_columns = (
                        pandas.DataFrame(columns=self.columns, index=empty_index)
                        .reset_index(**kwargs)
                        .columns
                    )

            return self.__constructor__(
                self._modin_frame.apply_full_axis(
                    axis=1,
                    func=_reset,
                    enumerate_partitions=True,
                    new_columns=new_columns,
                    dtypes=dtypes,
                    sync_labels=False,
                    pass_axis_lengths_to_partitions=True,
                )
            )

        allow_duplicates = kwargs.pop("allow_duplicates", lib.no_default)
        names = kwargs.pop("names", None)
        if allow_duplicates not in (lib.no_default, False) or names is not None:
            return self.default_to_pandas(
                pandas.DataFrame.reset_index,
                allow_duplicates=allow_duplicates,
                names=names,
                **kwargs,
            )

        drop = kwargs.get("drop", False)
        level = kwargs.get("level", None)
        new_index = None
        if level is not None:
            if not isinstance(level, (tuple, list)):
                level = [level]
            level = [self.index._get_level_number(lev) for lev in level]
            uniq_sorted_level = sorted(set(level))
            if len(uniq_sorted_level) < self.index.nlevels:
                # We handle this by separately computing the index. We could just
                # put the labels into the data and pull them back out, but that is
                # expensive.
                new_index = (
                    self.index.droplevel(uniq_sorted_level)
                    if len(level) < self.index.nlevels
                    else pandas.RangeIndex(len(self.index))
                )
        elif not drop:
            uniq_sorted_level = list(range(self.index.nlevels))

        if not drop:
            if len(uniq_sorted_level) < self.index.nlevels:
                # These are the index levels that will remain after the reset_index
                keep_levels = [
                    i for i in range(self.index.nlevels) if i not in uniq_sorted_level
                ]
                new_copy = self.copy()
                # Change the index to have only the levels that will be inserted
                # into the data. We will replace the old levels later.
                new_copy.index = self.index.droplevel(keep_levels)
                new_copy.index.names = [
                    (
                        "level_{}".format(level_value)
                        if new_copy.index.names[level_index] is None
                        else new_copy.index.names[level_index]
                    )
                    for level_index, level_value in enumerate(uniq_sorted_level)
                ]
                new_modin_frame = new_copy._modin_frame.from_labels()
                # Replace the levels that will remain as a part of the index.
                new_modin_frame.index = new_index
            else:
                new_modin_frame = self._modin_frame.from_labels()
            if isinstance(new_modin_frame.columns, pandas.MultiIndex):
                # Fix col_level and col_fill in generated column names because from_labels works with assumption
                # that col_level and col_fill are not specified but it expands tuples in level names.
                col_level = kwargs.get("col_level", 0)
                col_fill = kwargs.get("col_fill", "")
                if col_level != 0 or col_fill != "":
                    # Modify generated column names if col_level and col_fil have values different from default.
                    levels_names_list = [
                        f"level_{level_index}" if level_name is None else level_name
                        for level_index, level_name in enumerate(self.index.names)
                    ]
                    if col_fill is None:
                        # Initialize col_fill if it is None.
                        # This is some weird undocumented Pandas behavior to take first
                        # element of the last column name.
                        last_col_name = levels_names_list[uniq_sorted_level[-1]]
                        last_col_name = (
                            list(last_col_name)
                            if isinstance(last_col_name, tuple)
                            else [last_col_name]
                        )
                        if len(last_col_name) not in (1, self.columns.nlevels):
                            raise ValueError(
                                "col_fill=None is incompatible "
                                + f"with incomplete column name {last_col_name}"
                            )
                        col_fill = last_col_name[0]
                    columns_list = new_modin_frame.columns.tolist()
                    for level_index, level_value in enumerate(uniq_sorted_level):
                        level_name = levels_names_list[level_value]
                        # Expand tuples into separate items and fill the rest with col_fill
                        top_level = [col_fill] * col_level
                        middle_level = (
                            list(level_name)
                            if isinstance(level_name, tuple)
                            else [level_name]
                        )
                        bottom_level = [col_fill] * (
                            self.columns.nlevels - (col_level + len(middle_level))
                        )
                        item = tuple(top_level + middle_level + bottom_level)
                        if len(item) > self.columns.nlevels:
                            raise ValueError(
                                "Item must have length equal to number of levels."
                            )
                        columns_list[level_index] = item
                    new_modin_frame.columns = pandas.MultiIndex.from_tuples(
                        columns_list, names=self.columns.names
                    )
            new_self = self.__constructor__(new_modin_frame)
        else:
            new_self = self.copy()
            new_self.index = (
                # Cheaper to compute row lengths than index
                pandas.RangeIndex(sum(new_self._modin_frame.row_lengths))
                if new_index is None
                else new_index
            )
        return new_self

    def set_index_from_columns(
        self, keys: List[Hashable], drop: bool = True, append: bool = False
    ):
        new_modin_frame = self._modin_frame.to_labels(keys)
        if append:
            arrays = []
            # Appending keeps the original order of the index levels, then appends the
            # new index objects.
            names = list(self.index.names)
            if isinstance(self.index, pandas.MultiIndex):
                for i in range(self.index.nlevels):
                    arrays.append(self.index._get_level_values(i))
            else:
                arrays.append(self.index)

            # Add the names in the correct order.
            names.extend(new_modin_frame.index.names)
            if isinstance(new_modin_frame.index, pandas.MultiIndex):
                for i in range(new_modin_frame.index.nlevels):
                    arrays.append(new_modin_frame.index._get_level_values(i))
            else:
                arrays.append(new_modin_frame.index)
            new_modin_frame.index = ensure_index_from_sequences(arrays, names)
        if not drop:
            # The algebraic operator for this operation always drops the column, but we
            # can copy the data in this object and just use the index from the result of
            # the query compiler call.
            result = self._modin_frame.copy()
            result.index = new_modin_frame.index
        else:
            result = new_modin_frame
        return self.__constructor__(result)

    # END Reindex/reset_index

    # Transpose
    # For transpose, we aren't going to immediately copy everything. Since the
    # actual transpose operation is very fast, we will just do it before any
    # operation that gets called on the transposed data. See _prepare_method
    # for how the transpose is applied.
    #
    # Our invariants assume that the blocks are transposed, but not the
    # data inside. Sometimes we have to reverse this transposition of blocks
    # for simplicity of implementation.

    def transpose(self, *args, **kwargs) -> PandasQueryCompiler:
        # Switch the index and columns and transpose the data within the blocks.
        return self.__constructor__(self._modin_frame.transpose())

    def is_series_like(self):
        return len(self.columns) == 1 or len(self.index) == 1

    # END Transpose

    # TreeReduce operations
    count = TreeReduce.register(pandas.DataFrame.count, pandas.DataFrame.sum)

    def _dtypes_sum(dtypes: pandas.Series, *func_args, **func_kwargs):  # noqa: GL08
        # The common type evaluation for `TreeReduce` operator may differ depending
        # on the pandas function, so it's better to pass a evaluation function that
        # should be defined for each Modin's function.
        return find_common_type(dtypes.tolist())

    sum = TreeReduce.register(pandas.DataFrame.sum, compute_dtypes=_dtypes_sum)
    prod = TreeReduce.register(pandas.DataFrame.prod)
    any = TreeReduce.register(pandas.DataFrame.any, pandas.DataFrame.any)
    all = TreeReduce.register(pandas.DataFrame.all, pandas.DataFrame.all)
    # memory_usage adds an extra column for index usage, but we don't want to distribute
    # the index memory usage calculation.
    _memory_usage_without_index = TreeReduce.register(
        pandas.DataFrame.memory_usage,
        lambda x, *args, **kwargs: pandas.DataFrame.sum(x),
        axis=0,
    )

    def memory_usage(self, **kwargs):
        index = kwargs.get("index", True)
        deep = kwargs.get("deep", False)
        usage_without_index = self._memory_usage_without_index(index=False, deep=deep)
        return (
            self.from_pandas(
                pandas.DataFrame(
                    [self.index.memory_usage()],
                    columns=["Index"],
                    index=[MODIN_UNNAMED_SERIES_LABEL],
                ),
                data_cls=type(self._modin_frame),
            ).concat(axis=1, other=[usage_without_index])
            if index
            else usage_without_index
        )

    def max(self, axis, **kwargs):
        def map_func(df, **kwargs):
            return pandas.DataFrame.max(df, **kwargs)

        def reduce_func(df, **kwargs):
            if kwargs.get("numeric_only", False):
                kwargs = kwargs.copy()
                kwargs["numeric_only"] = False
            return pandas.DataFrame.max(df, **kwargs)

        return TreeReduce.register(map_func, reduce_func)(self, axis=axis, **kwargs)

    def min(self, axis, **kwargs):
        def map_func(df, **kwargs):
            return pandas.DataFrame.min(df, **kwargs)

        def reduce_func(df, **kwargs):
            if kwargs.get("numeric_only", False):
                kwargs = kwargs.copy()
                kwargs["numeric_only"] = False
            return pandas.DataFrame.min(df, **kwargs)

        return TreeReduce.register(map_func, reduce_func)(self, axis=axis, **kwargs)

    def mean(self, axis, **kwargs):
        if kwargs.get("level") is not None or axis is None:
            return self.default_to_pandas(pandas.DataFrame.mean, axis=axis, **kwargs)

        skipna = kwargs.get("skipna", True)

        # TODO-FIX: this function may work incorrectly with user-defined "numeric" values.
        # Since `count(numeric_only=True)` discards all unknown "numeric" types, we can get incorrect
        # divisor inside the reduce function.
        def map_fn(df, numeric_only=False, **kwargs):
            """
            Perform Map phase of the `mean`.

            Compute sum and number of elements in a given partition.
            """
            result = pandas.DataFrame(
                {
                    "sum": df.sum(axis=axis, skipna=skipna, numeric_only=numeric_only),
                    "count": df.count(axis=axis, numeric_only=numeric_only),
                }
            )
            return result if axis else result.T

        def reduce_fn(df, **kwargs):
            """
            Perform Reduce phase of the `mean`.

            Compute sum for all the the partitions and divide it to
            the total number of elements.
            """
            sum_cols = df["sum"] if axis else df.loc["sum"]
            count_cols = df["count"] if axis else df.loc["count"]

            if not isinstance(sum_cols, pandas.Series):
                # If we got `NaN` as the result of the sum in any axis partition,
                # then we must consider the whole sum as `NaN`, so setting `skipna=False`
                sum_cols = sum_cols.sum(axis=axis, skipna=False)
                count_cols = count_cols.sum(axis=axis, skipna=False)
            return sum_cols / count_cols

        def compute_dtypes_fn(dtypes, axis, **kwargs):
            """
            Compute the resulting Series dtype.

            When computing along rows and there are numeric and boolean columns
            Pandas returns `object`. In all other cases - `float64`.
            """
            if (
                axis == 1
                and any(is_bool_dtype(t) for t in dtypes)
                and any(is_numeric_dtype(t) for t in dtypes)
            ):
                return "object"
            return "float64"

        return TreeReduce.register(
            map_fn,
            reduce_fn,
            compute_dtypes=compute_dtypes_fn,
        )(self, axis=axis, **kwargs)

    # END TreeReduce operations

    # Reduce operations
    idxmax = Reduce.register(pandas.DataFrame.idxmax)
    idxmin = Reduce.register(pandas.DataFrame.idxmin)

    def median(self, axis, **kwargs):
        if axis is None:
            return self.default_to_pandas(pandas.DataFrame.median, axis=axis, **kwargs)
        return Reduce.register(pandas.DataFrame.median)(self, axis=axis, **kwargs)

    def nunique(self, axis=0, dropna=True):
        if not RangePartitioning.get():
            return Reduce.register(pandas.DataFrame.nunique)(
                self, axis=axis, dropna=dropna
            )

        unsupported_message = ""
        if axis != 0:
            unsupported_message += (
                "Range-partitioning 'nunique()' is only supported for 'axis=0'.\n"
            )

        if len(self.columns) > 1:
            unsupported_message += "Range-partitioning 'nunique()' is only supported for a signle-column dataframe.\n"

        if len(unsupported_message) > 0:
            message = (
                f"Can't use range-partitioning implementation for 'nunique' because:\n{unsupported_message}"
                + "Falling back to a full-axis reduce implementation."
            )
            get_logger().info(message)
            ErrorMessage.warn(message)
            return Reduce.register(pandas.DataFrame.nunique)(
                self, axis=axis, dropna=dropna
            )

        # compute '.nunique()' for each row partitions
        new_modin_frame = self._modin_frame._apply_func_to_range_partitioning(
            key_columns=self.columns.tolist(),
            func=lambda df: df.nunique(dropna=dropna).to_frame(),
        )
        # sum the results of each row part to get the final value
        new_modin_frame = new_modin_frame.reduce(axis=0, function=lambda df: df.sum())
        return self.__constructor__(new_modin_frame, shape_hint="column")

    def skew(self, axis, **kwargs):
        if axis is None:
            return self.default_to_pandas(pandas.DataFrame.skew, axis=axis, **kwargs)
        return Reduce.register(pandas.DataFrame.skew)(self, axis=axis, **kwargs)

    def kurt(self, axis, **kwargs):
        if axis is None:
            return self.default_to_pandas(pandas.DataFrame.kurt, axis=axis, **kwargs)
        return Reduce.register(pandas.DataFrame.kurt)(self, axis=axis, **kwargs)

    sem = Reduce.register(pandas.DataFrame.sem)
    std = Reduce.register(pandas.DataFrame.std)
    var = Reduce.register(pandas.DataFrame.var)
    sum_min_count = Reduce.register(pandas.DataFrame.sum)
    prod_min_count = Reduce.register(pandas.DataFrame.prod)
    quantile_for_single_value = Reduce.register(pandas.DataFrame.quantile)

    def to_datetime(self, *args, **kwargs):
        if len(self.columns) == 1:
            return Map.register(
                # to_datetime has inplace side effects, see GH#3063
                lambda df, *args, **kwargs: pandas.to_datetime(
                    df.squeeze(axis=1), *args, **kwargs
                ).to_frame(),
                shape_hint="column",
            )(self, *args, **kwargs)
        else:
            return Reduce.register(pandas.to_datetime, axis=1, shape_hint="column")(
                self, *args, **kwargs
            )

    # END Reduce operations

    def _resample_func(
        self,
        resample_kwargs,
        func_name,
        new_columns=None,
        df_op=None,
        allow_range_impl=True,
        *args,
        **kwargs,
    ):
        """
        Resample underlying time-series data and apply aggregation on it.

        Parameters
        ----------
        resample_kwargs : dict
            Resample parameters in the format of ``modin.pandas.DataFrame.resample`` signature.
        func_name : str
            Aggregation function name to apply on resampler object.
        new_columns : list of labels, optional
            Actual column labels of the resulted frame, supposed to be a hint for the
            Modin frame. If not specified will be computed automaticly.
        df_op : callable(pandas.DataFrame) -> [pandas.DataFrame, pandas.Series], optional
            Preprocessor function to apply to the passed frame before resampling.
        allow_range_impl : bool, default: True
            Whether to use range-partitioning if ``RangePartitioning.get() is True``.
        *args : args
            Arguments to pass to the aggregation function.
        **kwargs : kwargs
            Arguments to pass to the aggregation function.

        Returns
        -------
        PandasQueryCompiler
            New QueryCompiler containing the result of resample aggregation.
        """
        from modin.core.dataframe.pandas.dataframe.utils import ShuffleResample

        def map_func(df, resample_kwargs=resample_kwargs):  # pragma: no cover
            """Resample time-series data of the passed frame and apply aggregation function on it."""
            if len(df) == 0:
                if resample_kwargs["on"] is not None:
                    df = df.set_index(resample_kwargs["on"])
                return df
            if "bin_bounds" in df.attrs:
                timestamps = df.attrs["bin_bounds"]
                if isinstance(df.index, pandas.MultiIndex):
                    level_to_keep = resample_kwargs["level"]
                    if isinstance(level_to_keep, int):
                        to_drop = [
                            lvl
                            for lvl in range(df.index.nlevels)
                            if lvl != level_to_keep
                        ]
                    else:
                        to_drop = [
                            lvl for lvl in df.index.names if lvl != level_to_keep
                        ]
                    df.index = df.index.droplevel(to_drop)
                    resample_kwargs = resample_kwargs.copy()
                    resample_kwargs["level"] = None
                filler = pandas.DataFrame(
                    np.nan, index=pandas.Index(timestamps), columns=df.columns
                )
                df = pandas.concat([df, filler], copy=False)
            if df_op is not None:
                df = df_op(df)
            resampled_val = df.resample(**resample_kwargs)
            op = getattr(pandas.core.resample.Resampler, func_name)
            if callable(op):
                try:
                    # This will happen with Arrow buffer read-only errors. We don't want to copy
                    # all the time, so this will try to fast-path the code first.
                    val = op(resampled_val, *args, **kwargs)
                except ValueError:
                    resampled_val = df.copy().resample(**resample_kwargs)
                    val = op(resampled_val, *args, **kwargs)
            else:
                val = getattr(resampled_val, func_name)

            if isinstance(val, pandas.Series):
                return val.to_frame()
            else:
                return val

        if resample_kwargs["on"] is None:
            level = [
                0 if resample_kwargs["level"] is None else resample_kwargs["level"]
            ]
            key_columns = []
        else:
            level = None
            key_columns = [resample_kwargs["on"]]

        if (
            not allow_range_impl
            or resample_kwargs["axis"] not in (0, "index")
            or not RangePartitioning.get()
        ):
            new_modin_frame = self._modin_frame.apply_full_axis(
                axis=0, func=map_func, new_columns=new_columns
            )
        else:
            new_modin_frame = self._modin_frame._apply_func_to_range_partitioning(
                key_columns=key_columns,
                level=level,
                func=map_func,
                shuffle_func_cls=ShuffleResample,
                resample_kwargs=resample_kwargs,
            )
        return self.__constructor__(new_modin_frame)

    def resample_get_group(self, resample_kwargs, name, obj):
        return self._resample_func(
            resample_kwargs, "get_group", name=name, allow_range_impl=False, obj=obj
        )

    def resample_app_ser(self, resample_kwargs, func, *args, **kwargs):
        return self._resample_func(
            resample_kwargs,
            "apply",
            df_op=lambda df: df.squeeze(axis=1),
            func=func,
            *args,
            **kwargs,
        )

    def resample_app_df(self, resample_kwargs, func, *args, **kwargs):
        return self._resample_func(resample_kwargs, "apply", func=func, *args, **kwargs)

    def resample_agg_ser(self, resample_kwargs, func, *args, **kwargs):
        return self._resample_func(
            resample_kwargs,
            "aggregate",
            df_op=lambda df: df.squeeze(axis=1),
            func=func,
            *args,
            **kwargs,
        )

    def resample_agg_df(self, resample_kwargs, func, *args, **kwargs):
        return self._resample_func(
            resample_kwargs, "aggregate", func=func, *args, **kwargs
        )

    def resample_transform(self, resample_kwargs, arg, *args, **kwargs):
        return self._resample_func(
            resample_kwargs,
            "transform",
            arg=arg,
            allow_range_impl=False,
            *args,
            **kwargs,
        )

    def resample_pipe(self, resample_kwargs, func, *args, **kwargs):
        return self._resample_func(resample_kwargs, "pipe", func=func, *args, **kwargs)

    def resample_ffill(self, resample_kwargs, limit):
        return self._resample_func(
            resample_kwargs, "ffill", limit=limit, allow_range_impl=False
        )

    def resample_bfill(self, resample_kwargs, limit):
        return self._resample_func(
            resample_kwargs, "bfill", limit=limit, allow_range_impl=False
        )

    def resample_nearest(self, resample_kwargs, limit):
        return self._resample_func(
            resample_kwargs, "nearest", limit=limit, allow_range_impl=False
        )

    def resample_fillna(self, resample_kwargs, method, limit):
        return self._resample_func(
            resample_kwargs,
            "fillna",
            method=method,
            limit=limit,
            allow_range_impl=method is None,
        )

    def resample_asfreq(self, resample_kwargs, fill_value):
        return self._resample_func(resample_kwargs, "asfreq", fill_value=fill_value)

    def resample_interpolate(
        self,
        resample_kwargs,
        method,
        axis,
        limit,
        inplace,
        limit_direction,
        limit_area,
        downcast,
        **kwargs,
    ):
        return self._resample_func(
            resample_kwargs,
            "interpolate",
            axis=axis,
            limit=limit,
            inplace=inplace,
            limit_direction=limit_direction,
            limit_area=limit_area,
            downcast=downcast,
            allow_range_impl=False,
            **kwargs,
        )

    def resample_count(self, resample_kwargs):
        return self._resample_func(resample_kwargs, "count")

    def resample_nunique(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(resample_kwargs, "nunique", *args, **kwargs)

    def resample_first(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(
            resample_kwargs, "first", allow_range_impl=False, *args, **kwargs
        )

    def resample_last(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(
            resample_kwargs, "last", allow_range_impl=False, *args, **kwargs
        )

    def resample_max(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(resample_kwargs, "max", *args, **kwargs)

    def resample_mean(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(resample_kwargs, "mean", *args, **kwargs)

    def resample_median(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(resample_kwargs, "median", *args, **kwargs)

    def resample_min(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(resample_kwargs, "min", *args, **kwargs)

    def resample_ohlc_ser(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(
            resample_kwargs,
            "ohlc",
            df_op=lambda df: df.squeeze(axis=1),
            *args,
            **kwargs,
        )

    def resample_ohlc_df(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(resample_kwargs, "ohlc", *args, **kwargs)

    def resample_prod(self, resample_kwargs, min_count, *args, **kwargs):
        return self._resample_func(
            resample_kwargs,
            "prod",
            min_count=min_count,
            *args,
            **kwargs,
        )

    def resample_size(self, resample_kwargs):
        return self._resample_func(
            resample_kwargs,
            "size",
            new_columns=[MODIN_UNNAMED_SERIES_LABEL],
            allow_range_impl=False,
        )

    def resample_sem(self, resample_kwargs, *args, **kwargs):
        return self._resample_func(resample_kwargs, "sem", *args, **kwargs)

    def resample_std(self, resample_kwargs, ddof, *args, **kwargs):
        return self._resample_func(resample_kwargs, "std", ddof=ddof, *args, **kwargs)

    def resample_sum(self, resample_kwargs, min_count, *args, **kwargs):
        return self._resample_func(
            resample_kwargs,
            "sum",
            min_count=min_count,
            *args,
            **kwargs,
        )

    def resample_var(self, resample_kwargs, ddof, *args, **kwargs):
        return self._resample_func(resample_kwargs, "var", ddof=ddof, *args, **kwargs)

    def resample_quantile(self, resample_kwargs, q, **kwargs):
        return self._resample_func(resample_kwargs, "quantile", q=q, **kwargs)

    def expanding_aggregate(self, axis, expanding_args, func, *args, **kwargs):
        new_modin_frame = self._modin_frame.apply_full_axis(
            axis,
            lambda df: pandas.DataFrame(
                df.expanding(*expanding_args).aggregate(func=func, *args, **kwargs)
            ),
            new_index=self.index,
        )
        return self.__constructor__(new_modin_frame)

    expanding_sum = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).sum(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_min = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).min(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_max = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).max(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_mean = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).mean(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_median = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).median(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_var = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).var(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_std = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).std(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_count = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).count(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    def expanding_cov(
        self,
        fold_axis,
        expanding_args,
        squeeze_self,
        squeeze_other,
        other=None,
        pairwise=None,
        ddof=1,
        numeric_only=False,
        **kwargs,
    ):
        other_for_pandas = (
            other
            if other is None
            else (
                other.to_pandas().squeeze(axis=1)
                if squeeze_other
                else other.to_pandas()
            )
        )
        if len(self.columns) > 1:
            # computing covariance for each column requires having the other columns,
            # so we can't parallelize this as a full-column operation
            return self.default_to_pandas(
                lambda df: pandas.DataFrame.expanding(df, *expanding_args).cov(
                    other=other_for_pandas,
                    pairwise=pairwise,
                    ddof=ddof,
                    numeric_only=numeric_only,
                    **kwargs,
                )
            )
        return Fold.register(
            lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
                (df.squeeze(axis=1) if squeeze_self else df)
                .expanding(*expanding_args)
                .cov(*args, **kwargs)
            ),
            shape_preserved=True,
        )(
            self,
            fold_axis,
            expanding_args,
            other=other_for_pandas,
            pairwise=pairwise,
            ddof=ddof,
            numeric_only=numeric_only,
            **kwargs,
        )

    def expanding_corr(
        self,
        fold_axis,
        expanding_args,
        squeeze_self,
        squeeze_other,
        other=None,
        pairwise=None,
        ddof=1,
        numeric_only=False,
        **kwargs,
    ):
        other_for_pandas = (
            other
            if other is None
            else (
                other.to_pandas().squeeze(axis=1)
                if squeeze_other
                else other.to_pandas()
            )
        )
        if len(self.columns) > 1:
            # computing correlation for each column requires having the other columns,
            # so we can't parallelize this as a full-column operation
            return self.default_to_pandas(
                lambda df: pandas.DataFrame.expanding(df, *expanding_args).corr(
                    other=other_for_pandas,
                    pairwise=pairwise,
                    ddof=ddof,
                    numeric_only=numeric_only,
                    **kwargs,
                )
            )
        return Fold.register(
            lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
                (df.squeeze(axis=1) if squeeze_self else df)
                .expanding(*expanding_args)
                .corr(*args, **kwargs)
            ),
            shape_preserved=True,
        )(
            self,
            fold_axis,
            expanding_args,
            other=other_for_pandas,
            pairwise=pairwise,
            ddof=ddof,
            numeric_only=numeric_only,
            **kwargs,
        )

    expanding_quantile = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).quantile(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_sem = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).sem(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_kurt = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).kurt(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_skew = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).skew(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    expanding_rank = Fold.register(
        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(
            df.expanding(*expanding_args).rank(*args, **kwargs)
        ),
        shape_preserved=True,
    )

    window_mean = Fold.register(
        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).mean(*args, **kwargs)
        ),
        shape_preserved=True,
    )
    window_sum = Fold.register(
        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).sum(*args, **kwargs)
        ),
        shape_preserved=True,
    )
    window_var = Fold.register(
        lambda df, rolling_kwargs, ddof, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).var(ddof=ddof, *args, **kwargs)
        ),
        shape_preserved=True,
    )
    window_std = Fold.register(
        lambda df, rolling_kwargs, ddof, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).std(ddof=ddof, *args, **kwargs)
        ),
        shape_preserved=True,
    )
    rolling_count = Fold.register(
        lambda df, rolling_kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).count()
        ),
        shape_preserved=True,
    )
    rolling_sum = Fold.register(
        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).sum(*args, **kwargs)
        ),
        shape_preserved=True,
    )
    rolling_sem = Fold.register(
        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).sem(*args, **kwargs)
        ),
        shape_preserved=True,
    )
    rolling_mean = Fold.register(
        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).mean(*args, **kwargs)
        ),
        shape_preserved=True,
    )
    rolling_median = Fold.register(
        lambda df, rolling_kwargs, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).median(**kwargs)
        ),
        shape_preserved=True,
    )
    rolling_var = Fold.register(
        lambda df, rolling_kwargs, ddof, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).var(ddof=ddof, *args, **kwargs)
        ),
        shape_preserved=True,
    )
    rolling_std = Fold.register(
        lambda df, rolling_kwargs, ddof, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).std(ddof=ddof, *args, **kwargs)
        ),
        shape_preserved=True,
    )
    rolling_min = Fold.register(
        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).min(*args, **kwargs)
        ),
        shape_preserved=True,
    )
    rolling_max = Fold.register(
        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).max(*args, **kwargs)
        ),
        shape_preserved=True,
    )
    rolling_skew = Fold.register(
        lambda df, rolling_kwargs, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).skew(**kwargs)
        ),
        shape_preserved=True,
    )
    rolling_kurt = Fold.register(
        lambda df, rolling_kwargs, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).kurt(**kwargs)
        ),
        shape_preserved=True,
    )
    rolling_apply = Fold.register(
        lambda df, rolling_kwargs, func, raw, engine, engine_kwargs, args, kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).apply(
                func=func,
                raw=raw,
                engine=engine,
                engine_kwargs=engine_kwargs,
                args=args,
                kwargs=kwargs,
            ),
        ),
        shape_preserved=True,
    )
    rolling_quantile = Fold.register(
        lambda df, rolling_kwargs, q, interpolation, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).quantile(
                q=q, interpolation=interpolation, **kwargs
            ),
        ),
        shape_preserved=True,
    )
    rolling_rank = Fold.register(
        lambda df, rolling_kwargs, method, ascending, pct, numeric_only, **kwargs: pandas.DataFrame(
            df.rolling(**rolling_kwargs).rank(
                method=method,
                ascending=ascending,
                pct=pct,
                numeric_only=numeric_only,
                **kwargs,
            ),
        ),
        shape_preserved=True,
    )

    def rolling_corr(self, axis, rolling_kwargs, other, pairwise, *args, **kwargs):
        if len(self.columns) > 1:
            return self.default_to_pandas(
                lambda df: pandas.DataFrame.rolling(df, **rolling_kwargs).corr(
                    other=other, pairwise=pairwise, *args, **kwargs
                )
            )
        else:
            return Fold.register(
                lambda df: pandas.DataFrame(
                    df.rolling(**rolling_kwargs).corr(
                        other=other, pairwise=pairwise, *args, **kwargs
                    )
                ),
                shape_preserved=True,
            )(self, axis)

    def rolling_cov(self, axis, rolling_kwargs, other, pairwise, ddof, **kwargs):
        if len(self.columns) > 1:
            return self.default_to_pandas(
                lambda df: pandas.DataFrame.rolling(df, **rolling_kwargs).cov(
                    other=other, pairwise=pairwise, ddof=ddof, **kwargs
                )
            )
        else:
            return Fold.register(
                lambda df: pandas.DataFrame(
                    df.rolling(**rolling_kwargs).cov(
                        other=other, pairwise=pairwise, ddof=ddof, **kwargs
                    )
                ),
                shape_preserved=True,
            )(self, axis)

    def rolling_aggregate(self, axis, rolling_kwargs, func, *args, **kwargs):
        new_modin_frame = self._modin_frame.apply_full_axis(
            axis,
            lambda df: pandas.DataFrame(
                df.rolling(**rolling_kwargs).aggregate(func=func, *args, **kwargs)
            ),
            new_index=self.index,
        )
        return self.__constructor__(new_modin_frame)

    def unstack(self, level, fill_value):
        if not isinstance(self.index, pandas.MultiIndex) or (
            isinstance(self.index, pandas.MultiIndex)
            and is_list_like(level)
            and len(level) == self.index.nlevels
        ):
            axis = 1
            new_columns = [MODIN_UNNAMED_SERIES_LABEL]
            need_reindex = True
        else:
            axis = 0
            new_columns = None
            need_reindex = False

        def map_func(df):  # pragma: no cover
            return pandas.DataFrame(df.unstack(level=level, fill_value=fill_value))

        def is_tree_like_or_1d(calc_index, valid_index):
            """
            Check whether specified index is a single dimensional or built in a tree manner.

            Parameters
            ----------
            calc_index : pandas.Index
                Frame index to check.
            valid_index : pandas.Index
                Frame index on the opposite from `calc_index` axis.

            Returns
            -------
            bool
                True if `calc_index` is not MultiIndex or MultiIndex and built in a tree manner.
                False otherwise.
            """
            if not isinstance(calc_index, pandas.MultiIndex):
                return True
            actual_len = 1
            for lvl in calc_index.levels:
                actual_len *= len(lvl)
            return len(self.index) * len(self.columns) == actual_len * len(valid_index)

        is_tree_like_or_1d_index = is_tree_like_or_1d(self.index, self.columns)
        is_tree_like_or_1d_cols = is_tree_like_or_1d(self.columns, self.index)

        is_all_multi_list = False
        if (
            isinstance(self.index, pandas.MultiIndex)
            and isinstance(self.columns, pandas.MultiIndex)
            and is_list_like(level)
            and len(level) == self.index.nlevels
            and is_tree_like_or_1d_index
            and is_tree_like_or_1d_cols
        ):
            is_all_multi_list = True
            real_cols_bkp = self.columns
            obj = self.copy()
            obj.columns = np.arange(len(obj.columns))
        else:
            obj = self

        new_modin_frame = obj._modin_frame.apply_full_axis(
            axis, map_func, new_columns=new_columns
        )
        result = self.__constructor__(new_modin_frame)

        def compute_index(index, columns, consider_index=True, consider_columns=True):
            """
            Compute new index for the unstacked frame.

            Parameters
            ----------
            index : pandas.Index
                Index of the original frame.
            columns : pandas.Index
                Columns of the original frame.
            consider_index : bool, default: True
                Whether original index contains duplicated values.
                If True all duplicates will be droped.
            consider_columns : bool, default: True
                Whether original columns contains duplicated values.
                If True all duplicates will be droped.

            Returns
            -------
            pandas.Index
                New index to use in the unstacked frame.
            """

            def get_unique_level_values(index):
                return [
                    index.get_level_values(lvl).unique()
                    for lvl in np.arange(index.nlevels)
                ]

            new_index = (
                get_unique_level_values(index)
                if consider_index
                else index if isinstance(index, list) else [index]
            )

            new_columns = (
                get_unique_level_values(columns) if consider_columns else [columns]
            )
            return pandas.MultiIndex.from_product([*new_columns, *new_index])

        if is_all_multi_list and is_tree_like_or_1d_index and is_tree_like_or_1d_cols:
            result = result.sort_index()
            index_level_values = [lvl for lvl in obj.index.levels]

            result.index = compute_index(
                index_level_values, real_cols_bkp, consider_index=False
            )
            return result

        if need_reindex:
            if is_tree_like_or_1d_index and is_tree_like_or_1d_cols:
                is_recompute_index = isinstance(self.index, pandas.MultiIndex)
                is_recompute_columns = not is_recompute_index and isinstance(
                    self.columns, pandas.MultiIndex
                )
                new_index = compute_index(
                    self.index, self.columns, is_recompute_index, is_recompute_columns
                )
            elif is_tree_like_or_1d_index != is_tree_like_or_1d_cols:
                if isinstance(self.columns, pandas.MultiIndex) or not isinstance(
                    self.index, pandas.MultiIndex
                ):
                    return result
                else:
                    index = (
                        self.index.sortlevel()[0]
                        if is_tree_like_or_1d_index
                        and not is_tree_like_or_1d_cols
                        and isinstance(self.index, pandas.MultiIndex)
                        else self.index
                    )
                    index = pandas.MultiIndex.from_tuples(
                        list(index) * len(self.columns)
                    )
                    columns = self.columns.repeat(len(self.index))
                    index_levels = [
                        index.get_level_values(i) for i in range(index.nlevels)
                    ]
                    new_index = pandas.MultiIndex.from_arrays(
                        [columns] + index_levels,
                        names=self.columns.names + self.index.names,
                    )
            else:
                return result
            result = result.reindex(0, new_index)
        return result

    def stack(self, level, dropna, sort):
        if not isinstance(self.columns, pandas.MultiIndex) or (
            isinstance(self.columns, pandas.MultiIndex)
            and is_list_like(level)
            and len(level) == self.columns.nlevels
        ):
            new_columns = [MODIN_UNNAMED_SERIES_LABEL]
        else:
            new_columns = None

        new_modin_frame = self._modin_frame.apply_full_axis(
            1,
            lambda df: pandas.DataFrame(
                df.stack(level=level, dropna=dropna, sort=sort)
            ),
            new_columns=new_columns,
        )
        return self.__constructor__(new_modin_frame)

    # Map partitions operations
    # These operations are operations that apply a function to every partition.
    def isin(self, values, ignore_indices=False):
        shape_hint = self._shape_hint
        if isinstance(values, type(self)):
            # HACK: if we don't cast to pandas, then the execution engine will try to
            # propagate the distributed Series to workers and most likely would have
            # some performance problems.
            # TODO: A better way of doing so could be passing this `values` as a query compiler
            # and broadcast accordingly.
            values = values.to_pandas()
            if ignore_indices:
                # Pandas logic is that it ignores indexing if 'values' is a 1D object
                values = values.squeeze(axis=1)

        def isin_func(df, values):
            if shape_hint == "column":
                df = df.squeeze(axis=1)
            res = df.isin(values)
            if isinstance(res, pandas.Series):
                res = res.to_frame(
                    MODIN_UNNAMED_SERIES_LABEL if res.name is None else res.name
                )
            return res

        return Map.register(isin_func, shape_hint=shape_hint, dtypes=np.bool_)(
            self, values
        )

    abs = Map.register(pandas.DataFrame.abs, dtypes="copy")
    map = Map.register(pandas.DataFrame.map)
    conj = Map.register(lambda df, *args, **kwargs: pandas.DataFrame(np.conj(df)))

    def convert_dtypes(
        self,
        infer_objects: bool = True,
        convert_string: bool = True,
        convert_integer: bool = True,
        convert_boolean: bool = True,
        convert_floating: bool = True,
        dtype_backend: str = "numpy_nullable",
    ):
        result = Fold.register(pandas.DataFrame.convert_dtypes, shape_preserved=True)(
            self,
            infer_objects=infer_objects,
            convert_string=convert_string,
            convert_integer=convert_integer,
            convert_boolean=convert_boolean,
            convert_floating=convert_floating,
            dtype_backend=dtype_backend,
        )
        # TODO: `numpy_nullable` should be handled similar
        if dtype_backend == "pyarrow":
            result._modin_frame._pandas_backend = "pyarrow"
        return result

    invert = Map.register(pandas.DataFrame.__invert__, dtypes="copy")
    isna = Map.register(pandas.DataFrame.isna, dtypes=np.bool_)
    # TODO: better way to distinguish methods for NumPy API?
    _isfinite = Map.register(
        lambda df, *args, **kwargs: pandas.DataFrame(np.isfinite(df, *args, **kwargs)),
        dtypes=np.bool_,
    )
    _isinf = Map.register(  # Needed for numpy API
        lambda df, *args, **kwargs: pandas.DataFrame(np.isinf(df, *args, **kwargs)),
        dtypes=np.bool_,
    )
    _isnat = Map.register(  # Needed for numpy API
        lambda df, *args, **kwargs: pandas.DataFrame(np.isnat(df, *args, **kwargs)),
        dtypes=np.bool_,
    )
    _isneginf = Map.register(  # Needed for numpy API
        lambda df, *args, **kwargs: pandas.DataFrame(np.isneginf(df, *args, **kwargs)),
        dtypes=np.bool_,
    )
    _isposinf = Map.register(  # Needed for numpy API
        lambda df, *args, **kwargs: pandas.DataFrame(np.isposinf(df, *args, **kwargs)),
        dtypes=np.bool_,
    )
    _iscomplex = Map.register(  # Needed for numpy API
        lambda df, *args, **kwargs: pandas.DataFrame(np.iscomplex(df, *args, **kwargs)),
        dtypes=np.bool_,
    )
    _isreal = Map.register(  # Needed for numpy API
        lambda df, *args, **kwargs: pandas.DataFrame(np.isreal(df, *args, **kwargs)),
        dtypes=np.bool_,
    )
    _logical_not = Map.register(np.logical_not, dtypes=np.bool_)  # Needed for numpy API
    _tanh = Map.register(
        lambda df, *args, **kwargs: pandas.DataFrame(np.tanh(df, *args, **kwargs))
    )  # Needed for numpy API
    _sqrt = Map.register(
        lambda df, *args, **kwargs: pandas.DataFrame(np.sqrt(df, *args, **kwargs))
    )  # Needed for numpy API
    _exp = Map.register(
        lambda df, *args, **kwargs: pandas.DataFrame(np.exp(df, *args, **kwargs))
    )  # Needed for numpy API
    negative = Map.register(pandas.DataFrame.__neg__)
    notna = Map.register(pandas.DataFrame.notna, dtypes=np.bool_)
    round = Map.register(pandas.DataFrame.round)
    replace = Map.register(pandas.DataFrame.replace)
    series_view = Map.register(
        lambda df, *args, **kwargs: pandas.DataFrame(
            df.squeeze(axis=1).view(*args, **kwargs)
        )
    )
    to_numeric = Map.register(
        lambda df, *args, **kwargs: pandas.DataFrame(
            pandas.to_numeric(df.squeeze(axis=1), *args, **kwargs)
        )
    )
    to_timedelta = Map.register(
        lambda s, *args, **kwargs: pandas.to_timedelta(
            s.squeeze(axis=1), *args, **kwargs
        ).to_frame(),
        dtypes="timedelta64[ns]",
    )

    # END Map partitions operations

    # String map partitions operations

    str_capitalize = Map.register(_str_map("capitalize"), dtypes="copy")
    str_center = Map.register(_str_map("center"), dtypes="copy")
    str_contains = Map.register(_str_map("contains"), dtypes=np.bool_)
    str_count = Map.register(_str_map("count"), dtypes=int)
    str_endswith = Map.register(_str_map("endswith"), dtypes=np.bool_)
    str_find = Map.register(_str_map("find"), dtypes=np.int64)
    str_findall = Map.register(_str_map("findall"), dtypes="copy")
    str_get = Map.register(_str_map("get"), dtypes="copy")
    str_index = Map.register(_str_map("index"), dtypes=np.int64)
    str_isalnum = Map.register(_str_map("isalnum"), dtypes=np.bool_)
    str_isalpha = Map.register(_str_map("isalpha"), dtypes=np.bool_)
    str_isdecimal = Map.register(_str_map("isdecimal"), dtypes=np.bool_)
    str_isdigit = Map.register(_str_map("isdigit"), dtypes=np.bool_)
    str_islower = Map.register(_str_map("islower"), dtypes=np.bool_)
    str_isnumeric = Map.register(_str_map("isnumeric"), dtypes=np.bool_)
    str_isspace = Map.register(_str_map("isspace"), dtypes=np.bool_)
    str_istitle = Map.register(_str_map("istitle"), dtypes=np.bool_)
    str_isupper = Map.register(_str_map("isupper"), dtypes=np.bool_)
    str_join = Map.register(_str_map("join"), dtypes="copy")
    str_len = Map.register(_str_map("len"), dtypes=int)
    str_ljust = Map.register(_str_map("ljust"), dtypes="copy")
    str_lower = Map.register(_str_map("lower"), dtypes="copy")
    str_lstrip = Map.register(_str_map("lstrip"), dtypes="copy")
    str_match = Map.register(_str_map("match"), dtypes="copy")
    str_normalize = Map.register(_str_map("normalize"), dtypes="copy")
    str_pad = Map.register(_str_map("pad"), dtypes="copy")
    _str_partition = Map.register(_str_map("partition"), dtypes="copy")

    def str_partition(self, sep=" ", expand=True):
        # For `expand`, need an operator that can create more columns than before
        if expand:
            return super().str_partition(sep=sep, expand=expand)
        return self._str_partition(sep=sep, expand=False)

    str_repeat = Map.register(_str_map("repeat"), dtypes="copy")
    _str_extract = Map.register(_str_map("extract"), dtypes="copy")

    def str_extract(self, pat, flags, expand):
        regex = re.compile(pat, flags=flags)
        # need an operator that can create more columns than before
        if expand and regex.groups == 1:
            qc = self._str_extract(pat, flags=flags, expand=expand)
            qc.columns = get_group_names(regex)
        else:
            qc = super().str_extract(pat, flags=flags, expand=expand)
        return qc

    str_replace = Map.register(_str_map("replace"), dtypes="copy", shape_hint="column")
    str_rfind = Map.register(_str_map("rfind"), dtypes=np.int64, shape_hint="column")
    str_rindex = Map.register(_str_map("rindex"), dtypes=np.int64, shape_hint="column")
    str_rjust = Map.register(_str_map("rjust"), dtypes="copy", shape_hint="column")
    _str_rpartition = Map.register(
        _str_map("rpartition"), dtypes="copy", shape_hint="column"
    )

    def str_rpartition(self, sep=" ", expand=True):
        if expand:
            # For `expand`, need an operator that can create more columns than before
            return super().str_rpartition(sep=sep, expand=expand)
        return self._str_rpartition(sep=sep, expand=False)

    _str_rsplit = Map.register(_str_map("rsplit"), dtypes="copy", shape_hint="column")

    def str_rsplit(self, pat=None, n=-1, expand=False):
        if expand:
            # For `expand`, need an operator that can create more columns than before
            return super().str_rsplit(pat=pat, n=n, expand=expand)
        return self._str_rsplit(pat=pat, n=n, expand=False)

    str_rstrip = Map.register(_str_map("rstrip"), dtypes="copy", shape_hint="column")
    str_slice = Map.register(_str_map("slice"), dtypes="copy", shape_hint="column")
    str_slice_replace = Map.register(
        _str_map("slice_replace"), dtypes="copy", shape_hint="column"
    )
    _str_split = Map.register(_str_map("split"), dtypes="copy", shape_hint="column")

    def str_split(self, pat=None, n=-1, expand=False, regex=None):
        if expand:
            # For `expand`, need an operator that can create more columns than before
            return super().str_split(pat=pat, n=n, expand=expand, regex=regex)
        return self._str_split(pat=pat, n=n, expand=False, regex=regex)

    str_startswith = Map.register(
        _str_map("startswith"), dtypes=np.bool_, shape_hint="column"
    )
    str_strip = Map.register(_str_map("strip"), dtypes="copy", shape_hint="column")
    str_swapcase = Map.register(
        _str_map("swapcase"), dtypes="copy", shape_hint="column"
    )
    str_title = Map.register(_str_map("title"), dtypes="copy", shape_hint="column")
    str_translate = Map.register(
        _str_map("translate"), dtypes="copy", shape_hint="column"
    )
    str_upper = Map.register(_str_map("upper"), dtypes="copy", shape_hint="column")
    str_wrap = Map.register(_str_map("wrap"), dtypes="copy", shape_hint="column")
    str_zfill = Map.register(_str_map("zfill"), dtypes="copy", shape_hint="column")
    str___getitem__ = Map.register(
        _str_map("__getitem__"), dtypes="copy", shape_hint="column"
    )

    # END String map partitions operations

    def unique(self, keep="first", ignore_index=True, subset=None):
        # kernels with 'pandas.Series.unique()' work faster
        can_use_unique_kernel = (
            subset is None
            and ignore_index
            and len(self.columns) == 1
            and keep is not False
        )

        if not can_use_unique_kernel and not RangePartitioning.get():
            return super().unique(keep=keep, ignore_index=ignore_index, subset=subset)

        if RangePartitioning.get():
            new_modin_frame = self._modin_frame._apply_func_to_range_partitioning(
                key_columns=self.columns.tolist() if subset is None else subset,
                func=(
                    (
                        lambda df: pandas.DataFrame(
                            df.squeeze(axis=1).unique(), columns=["__reduced__"]
                        )
                    )
                    if can_use_unique_kernel
                    else (
                        lambda df: df.drop_duplicates(
                            keep=keep, ignore_index=ignore_index, subset=subset
                        )
                    )
                ),
                preserve_columns=True,
            )
        else:
            # return self.to_pandas().squeeze(axis=1).unique() works faster
            # but returns pandas type instead of query compiler
            # TODO: https://github.com/modin-project/modin/issues/7182
            new_modin_frame = self._modin_frame.apply_full_axis(
                0,
                lambda x: x.squeeze(axis=1).unique(),
                new_columns=self.columns,
            )
        return self.__constructor__(new_modin_frame, shape_hint=self._shape_hint)

    def searchsorted(self, **kwargs):
        def searchsorted(df):
            """Apply `searchsorted` function to a single partition."""
            result = df.squeeze(axis=1).searchsorted(**kwargs)
            if not is_list_like(result):
                result = [result]
            return pandas.DataFrame(result)

        return self.default_to_pandas(searchsorted)

    # Dt map partitions operations

    dt_date = Map.register(_dt_prop_map("date"), dtypes=np.object_)
    dt_time = Map.register(_dt_prop_map("time"), dtypes=np.object_)
    dt_timetz = Map.register(_dt_prop_map("timetz"), dtypes=np.object_)
    dt_year = Map.register(_dt_prop_map("year"), dtypes=np.int32)
    dt_month = Map.register(_dt_prop_map("month"), dtypes=np.int32)
    dt_day = Map.register(_dt_prop_map("day"), dtypes=np.int32)
    dt_hour = Map.register(_dt_prop_map("hour"), dtypes=np.int64)
    dt_minute = Map.register(_dt_prop_map("minute"), dtypes=np.int64)
    dt_second = Map.register(_dt_prop_map("second"), dtypes=np.int64)
    dt_microsecond = Map.register(_dt_prop_map("microsecond"), dtypes=np.int64)
    dt_nanosecond = Map.register(_dt_prop_map("nanosecond"), dtypes=np.int64)
    dt_dayofweek = Map.register(_dt_prop_map("dayofweek"), dtypes=np.int64)
    dt_weekday = Map.register(_dt_prop_map("weekday"), dtypes=np.int64)
    dt_dayofyear = Map.register(_dt_prop_map("dayofyear"), dtypes=np.int64)
    dt_quarter = Map.register(_dt_prop_map("quarter"), dtypes=np.int64)
    dt_is_month_start = Map.register(_dt_prop_map("is_month_start"), dtypes=np.bool_)
    dt_is_month_end = Map.register(_dt_prop_map("is_month_end"), dtypes=np.bool_)
    dt_is_quarter_start = Map.register(
        _dt_prop_map("is_quarter_start"), dtypes=np.bool_
    )
    dt_is_quarter_end = Map.register(_dt_prop_map("is_quarter_end"), dtypes=np.bool_)
    dt_is_year_start = Map.register(_dt_prop_map("is_year_start"), dtypes=np.bool_)
    dt_is_year_end = Map.register(_dt_prop_map("is_year_end"), dtypes=np.bool_)
    dt_is_leap_year = Map.register(_dt_prop_map("is_leap_year"), dtypes=np.bool_)
    dt_daysinmonth = Map.register(_dt_prop_map("daysinmonth"), dtypes=np.int64)
    dt_days_in_month = Map.register(_dt_prop_map("days_in_month"), dtypes=np.int64)
    dt_asfreq = Map.register(_dt_func_map("asfreq"))
    dt_to_period = Map.register(_dt_func_map("to_period"))
    dt_to_pydatetime = Map.register(_dt_func_map("to_pydatetime"), dtypes=np.object_)
    dt_tz_localize = Map.register(_dt_func_map("tz_localize"))
    dt_tz_convert = Map.register(_dt_func_map("tz_convert"))
    dt_normalize = Map.register(_dt_func_map("normalize"))
    dt_strftime = Map.register(_dt_func_map("strftime"), dtypes=np.object_)
    dt_round = Map.register(_dt_func_map("round"))
    dt_floor = Map.register(_dt_func_map("floor"))
    dt_ceil = Map.register(_dt_func_map("ceil"))
    dt_month_name = Map.register(_dt_func_map("month_name"), dtypes=np.object_)
    dt_day_name = Map.register(_dt_func_map("day_name"), dtypes=np.object_)
    dt_to_pytimedelta = Map.register(_dt_func_map("to_pytimedelta"), dtypes=np.object_)
    dt_total_seconds = Map.register(_dt_func_map("total_seconds"), dtypes=np.float64)
    dt_seconds = Map.register(_dt_prop_map("seconds"), dtypes=np.int64)
    dt_days = Map.register(_dt_prop_map("days"), dtypes=np.int64)
    dt_microseconds = Map.register(_dt_prop_map("microseconds"), dtypes=np.int64)
    dt_nanoseconds = Map.register(_dt_prop_map("nanoseconds"), dtypes=np.int64)
    dt_qyear = Map.register(_dt_prop_map("qyear"), dtypes=np.int64)
    dt_start_time = Map.register(_dt_prop_map("start_time"))
    dt_end_time = Map.register(_dt_prop_map("end_time"))
    dt_to_timestamp = Map.register(_dt_func_map("to_timestamp"))

    # END Dt map partitions operations

    def astype(self, col_dtypes, errors: str = "raise"):
        # `errors` parameter needs to be part of the function signature because
        # other query compilers may not take care of error handling at the API
        # layer. This query compiler assumes there won't be any errors due to
        # invalid type keys.
        return self.__constructor__(
            self._modin_frame.astype(col_dtypes, errors=errors),
            shape_hint=self._shape_hint,
        )

    def infer_objects(self):
        return self.__constructor__(self._modin_frame.infer_objects())

    # Column/Row partitions reduce operations

    def first_valid_index(self):
        def first_valid_index_builder(df):
            """Get the position of the first valid index in a single partition."""
            return df.set_axis(pandas.RangeIndex(len(df.index)), axis="index").apply(
                lambda df: df.first_valid_index()
            )

        # We get the minimum from each column, then take the min of that to get
        # first_valid_index. The `to_pandas()` here is just for a single value and
        # `squeeze` will convert it to a scalar.
        first_result = (
            self.__constructor__(self._modin_frame.reduce(0, first_valid_index_builder))
            .min(axis=1)
            .to_pandas()
            .squeeze()
        )
        return self.index[first_result]

    def last_valid_index(self):
        def last_valid_index_builder(df):
            """Get the position of the last valid index in a single partition."""
            return df.set_axis(pandas.RangeIndex(len(df.index)), axis="index").apply(
                lambda df: df.last_valid_index()
            )

        # We get the maximum from each column, then take the max of that to get
        # last_valid_index. The `to_pandas()` here is just for a single value and
        # `squeeze` will convert it to a scalar.
        first_result = (
            self.__constructor__(self._modin_frame.reduce(0, last_valid_index_builder))
            .max(axis=1)
            .to_pandas()
            .squeeze()
        )
        return self.index[first_result]

    # END Column/Row partitions reduce operations

    def describe(self, percentiles: np.ndarray):
        # Use pandas to calculate the correct columns
        empty_df = (
            pandas.DataFrame(columns=self.columns)
            .astype(self.dtypes)
            .describe(percentiles, include="all")
        )
        new_index = empty_df.index

        def describe_builder(df, internal_indices=[]):  # pragma: no cover
            """Apply `describe` function to the subset of columns in a single partition."""
            # The index of the resulting dataframe is the same amongst all partitions
            # when dealing with the same data type. However, if we work with columns
            # that contain strings, we can get extra values in our result index such as
            # 'unique', 'top', and 'freq'. Since we call describe() on each partition,
            # we can have cases where certain partitions do not contain any of the
            # object string data leading to an index mismatch between partitions.
            # Thus, we must reindex each partition with the global new_index.
            return (
                df.iloc[:, internal_indices]
                .describe(percentiles=percentiles, include="all")
                .reindex(new_index)
            )

        return self.__constructor__(
            self._modin_frame.apply_full_axis_select_indices(
                0,
                describe_builder,
                empty_df.columns,
                new_index=new_index,
                new_columns=empty_df.columns,
            )
        )

    # END Column/Row partitions reduce operations over select indices

    # Map across rows/columns
    # These operations require some global knowledge of the full column/row
    # that is being operated on. This means that we have to put all of that
    # data in the same place.

    cummax = Fold.register(pandas.DataFrame.cummax, shape_preserved=True)
    cummin = Fold.register(pandas.DataFrame.cummin, shape_preserved=True)
    cumsum = Fold.register(pandas.DataFrame.cumsum, shape_preserved=True)
    cumprod = Fold.register(pandas.DataFrame.cumprod, shape_preserved=True)
    _diff = Fold.register(pandas.DataFrame.diff, shape_preserved=True)

    def diff(self, axis, periods):
        return self._diff(fold_axis=axis, axis=axis, periods=periods)

    def clip(self, lower, upper, **kwargs):
        if isinstance(lower, BaseQueryCompiler):
            lower = lower.to_pandas().squeeze(1)
        if isinstance(upper, BaseQueryCompiler):
            upper = upper.to_pandas().squeeze(1)
        kwargs["upper"] = upper
        kwargs["lower"] = lower
        axis = kwargs.get("axis", 0)
        if is_list_like(lower) or is_list_like(upper):
            new_modin_frame = self._modin_frame.fold(
                axis, lambda df: df.clip(**kwargs), shape_preserved=True
            )
        else:
            new_modin_frame = self._modin_frame.map(lambda df: df.clip(**kwargs))
        return self.__constructor__(new_modin_frame)

    corr = CorrCovBuilder.build_corr_method()

    def cov(self, min_periods=None, ddof=1):
        if self.get_pandas_backend() == "pyarrow":
            return super().cov(min_periods=min_periods, ddof=ddof)
        # _nancorr use numpy which incompatible with pandas dataframes on pyarrow
        return self._nancorr(min_periods=min_periods, cov=True, ddof=ddof)

    def _nancorr(self, min_periods=1, cov=False, ddof=1):
        """
        Compute either pairwise covariance or pairwise correlation of columns.

        This function considers NA/null values the same like pandas does.

        Parameters
        ----------
        min_periods : int, default: 1
            Minimum number of observations required per pair of columns
            to have a valid result.
        cov : boolean, default: False
            Either covariance or correlation should be computed.
        ddof : int, default: 1
            Means Delta Degrees of Freedom. The divisor used in calculations.

        Returns
        -------
        PandasQueryCompiler
            The covariance or correlation matrix.

        Notes
        -----
        This method is only used to compute covariance at the moment.
        """
        other = self.to_numpy()
        try:
            other_mask = self._isfinite().to_numpy()
        except TypeError as err:
            # Pandas raises ValueError on unsupported types, so casting
            # the exception to a proper type
            raise ValueError("Unsupported types with 'numeric_only=False'") from err
        n_cols = other.shape[1]

        if min_periods is None:
            min_periods = 1

        def map_func(df):  # pragma: no cover
            """Compute covariance or correlation matrix for the passed frame."""
            df = df.to_numpy()
            n_rows = df.shape[0]
            df_mask = np.isfinite(df)

            result = np.empty((n_rows, n_cols), dtype=np.float64)

            for i in range(n_rows):
                df_ith_row = df[i]
                df_ith_mask = df_mask[i]

                for j in range(n_cols):
                    other_jth_col = other[:, j]

                    valid = df_ith_mask & other_mask[:, j]

                    vx = df_ith_row[valid]
                    vy = other_jth_col[valid]

                    nobs = len(vx)

                    if nobs < min_periods:
                        result[i, j] = np.nan
                    else:
                        vx = vx - vx.mean()
                        vy = vy - vy.mean()
                        sumxy = (vx * vy).sum()
                        sumxx = (vx * vx).sum()
                        sumyy = (vy * vy).sum()

                        denom = (nobs - ddof) if cov else np.sqrt(sumxx * sumyy)
                        if denom != 0:
                            result[i, j] = sumxy / denom
                        else:
                            result[i, j] = np.nan

            return pandas.DataFrame(result)

        columns = self.columns
        index = columns.copy()
        transponed_self = self.transpose()
        new_modin_frame = transponed_self._modin_frame.apply_full_axis(
            1, map_func, new_index=index, new_columns=columns
        )
        return transponed_self.__constructor__(new_modin_frame)

    def dot(self, other, squeeze_self=None, squeeze_other=None):
        if isinstance(other, PandasQueryCompiler):
            other = (
                other.to_pandas().squeeze(axis=1)
                if squeeze_other
                else other.to_pandas()
            )

        num_cols = other.shape[1] if len(other.shape) > 1 else 1
        if len(self.columns) == 1:
            new_index = (
                [MODIN_UNNAMED_SERIES_LABEL]
                if (len(self.index) == 1 or squeeze_self) and num_cols == 1
                else None
            )
            new_columns = (
                [MODIN_UNNAMED_SERIES_LABEL] if squeeze_self and num_cols == 1 else None
            )
            axis = 0
        else:
            new_index = self.index
            new_columns = [MODIN_UNNAMED_SERIES_LABEL] if num_cols == 1 else None
            axis = 1

        # If either new index or new columns are supposed to be a single-dimensional,
        # then we use a special labeling for them. Besides setting the new labels as
        # a metadata to the resulted frame, we also want to set them inside the kernel,
        # so actual partitions would be labeled accordingly (there's a 'sync_label'
        # parameter that can do the same, but doing it manually is faster)
        align_index = isinstance(new_index, list) and new_index == [
            MODIN_UNNAMED_SERIES_LABEL
        ]
        align_columns = new_columns == [MODIN_UNNAMED_SERIES_LABEL]

        def map_func(df, other=other, squeeze_self=squeeze_self):  # pragma: no cover
            """Compute matrix multiplication of the passed frames."""
            result = df.squeeze(axis=1).dot(other) if squeeze_self else df.dot(other)

            if is_list_like(result):
                res = pandas.DataFrame(result)
            else:
                res = pandas.DataFrame([result])

            # manual aligning with external index to avoid `sync_labels` overhead
            if align_columns:
                res.columns = [MODIN_UNNAMED_SERIES_LABEL]
            if align_index:
                res.index = [MODIN_UNNAMED_SERIES_LABEL]
            return res

        new_modin_frame = self._modin_frame.apply_full_axis(
            axis,
            map_func,
            new_index=new_index,
            new_columns=new_columns,
            sync_labels=False,
        )
        return self.__constructor__(new_modin_frame)

    def _nsort(self, n, columns=None, keep="first", sort_type="nsmallest"):
        """
        Return first N rows of the data sorted in the specified order.

        Parameters
        ----------
        n : int
            Number of rows to return.
        columns : list of labels, optional
            Column labels to sort data by.
        keep : {"first", "last", "all"}, default: "first"
            How to pick first rows in case of duplicated values:
            - "first": prioritize first occurrence.
            - "last": prioritize last occurrence.
            - "all": do not drop any duplicates, even if it means selecting more than `n` rows.
        sort_type : {"nsmallest", "nlargest"}, default: "nsmallest"
            "nsmallest" means sort in descending order, "nlargest" means
            sort in ascending order.

        Returns
        -------
        PandasQueryCompiler
            New QueryCompiler containing the first N rows of the data
            sorted in the given order.
        """

        def map_func(df, n=n, keep=keep, columns=columns):  # pragma: no cover
            """Return first `N` rows of the sorted data for a single partition."""
            if columns is None:
                return pandas.DataFrame(
                    getattr(pandas.Series, sort_type)(
                        df.squeeze(axis=1), n=n, keep=keep
                    )
                )
            return getattr(pandas.DataFrame, sort_type)(
                df, n=n, columns=columns, keep=keep
            )

        if columns is None:
            new_columns = [MODIN_UNNAMED_SERIES_LABEL]
        else:
            new_columns = self.columns

        new_modin_frame = self._modin_frame.apply_full_axis(
            axis=0, func=map_func, new_columns=new_columns
        )
        return self.__constructor__(new_modin_frame)

    def nsmallest(self, *args, **kwargs):
        return self._nsort(sort_type="nsmallest", *args, **kwargs)

    def nlargest(self, *args, **kwargs):
        return self._nsort(sort_type="nlargest", *args, **kwargs)

    def eval(self, expr, **kwargs):
        # Make a copy of columns and eval on the copy to determine if result type is
        # series or not
        empty_eval = (
            pandas.DataFrame(columns=self.columns)
            .astype(self.dtypes)
            .eval(expr, inplace=False, **kwargs)
        )
        if isinstance(empty_eval, pandas.Series):
            new_columns = (
                [empty_eval.name]
                if empty_eval.name is not None
                else [MODIN_UNNAMED_SERIES_LABEL]
            )
        else:
            new_columns = empty_eval.columns
        new_modin_frame = self._modin_frame.apply_full_axis(
            1,
            lambda df: pandas.DataFrame(df.eval(expr, inplace=False, **kwargs)),
            new_index=self.index,
            new_columns=new_columns,
        )
        return self.__constructor__(new_modin_frame)

    def mode(self, **kwargs):
        axis = kwargs.get("axis", 0)

        def mode_builder(df):  # pragma: no cover
            """Compute modes for a single partition."""
            result = pandas.DataFrame(df.mode(**kwargs))
            # We return a dataframe with the same shape as the input to ensure
            # that all the partitions will be the same shape
            if axis == 0 and len(df) != len(result):
                # Pad rows
                result = result.reindex(index=pandas.RangeIndex(len(df.index)))
            elif axis == 1 and len(df.columns) != len(result.columns):
                # Pad columns
                result = result.reindex(columns=pandas.RangeIndex(len(df.columns)))
            return pandas.DataFrame(result)

        if axis == 0:
            new_index = pandas.RangeIndex(len(self.index))
            new_columns = self.columns
        else:
            new_index = self.index
            new_columns = pandas.RangeIndex(len(self.columns))
        new_modin_frame = self._modin_frame.apply_full_axis(
            axis, mode_builder, new_index=new_index, new_columns=new_columns
        )
        return self.__constructor__(new_modin_frame).dropna(axis=axis, how="all")

    def fillna(self, **kwargs):
        squeeze_self = kwargs.pop("squeeze_self", False)
        squeeze_value = kwargs.pop("squeeze_value", False)
        axis = kwargs.get("axis", 0)
        value = kwargs.pop("value")
        method = kwargs.get("method", None)
        limit = kwargs.get("limit", None)
        full_axis = method is not None or limit is not None
        new_dtypes = None
        if isinstance(value, BaseQueryCompiler):
            # This code assumes that the operation occurs with the same query compiler
            assert isinstance(value, PandasQueryCompiler)
            if squeeze_self:
                # Self is a Series type object
                if full_axis:
                    value = value.to_pandas().squeeze(axis=1)

                    def fillna_builder(series):  # pragma: no cover
                        # `limit` parameter works only on `Series` type, so we have to squeeze both objects to get
                        # correct behavior.
                        return series.squeeze(axis=1).fillna(value=value, **kwargs)

                    new_modin_frame = self._modin_frame.apply_full_axis(
                        0, fillna_builder
                    )
                else:

                    def fillna_builder(df, value_arg):
                        if isinstance(value_arg, pandas.DataFrame):
                            value_arg = value_arg.squeeze(axis=1)
                        res = df.squeeze(axis=1).fillna(value=value_arg, **kwargs)
                        return pandas.DataFrame(res)

                    new_modin_frame = self._modin_frame.n_ary_op(
                        fillna_builder,
                        [value._modin_frame],
                        join_type="left",
                        copartition_along_columns=False,
                    )

                return self.__constructor__(new_modin_frame)
            else:
                # Self is a DataFrame type object
                if squeeze_value:
                    # Value is Series type object
                    value = value.to_pandas().squeeze(axis=1)

                    def fillna(df):
                        return df.fillna(value=value, **kwargs)

                    # Continue to end of this function

                else:
                    # Value is a DataFrame type object
                    def fillna_builder(df, right):
                        return df.fillna(value=right, **kwargs)

                    new_modin_frame = self._modin_frame.broadcast_apply(
                        0, fillna_builder, value._modin_frame
                    )
                    return self.__constructor__(new_modin_frame)

        elif isinstance(value, dict):
            if squeeze_self:
                # For Series dict works along the index.
                def fillna(df):
                    return pandas.DataFrame(
                        df.squeeze(axis=1).fillna(value=value, **kwargs)
                    )

            else:
                # For DataFrames dict works along columns, all columns have to be present.
                def fillna(df):
                    func_dict = {
                        col: val for (col, val) in value.items() if col in df.columns
                    }
                    return df.fillna(value=func_dict, **kwargs)

                if self.frame_has_materialized_dtypes:
                    dtypes = self.dtypes
                    value_dtypes = pandas.DataFrame(
                        {k: [v] for (k, v) in value.items()}
                    ).dtypes
                    if all(
                        find_common_type([dtypes[col], dtype]) == dtypes[col]
                        for (col, dtype) in value_dtypes.items()
                        if col in dtypes
                    ):
                        new_dtypes = dtypes

        else:
            if self.frame_has_materialized_dtypes:
                dtype = pandas.Series(value).dtype
                if all(find_common_type([t, dtype]) == t for t in self.dtypes):
                    new_dtypes = self.dtypes

            def fillna(df):
                return df.fillna(value=value, **kwargs)

        if full_axis:
            new_modin_frame = self._modin_frame.fold(axis, fillna, shape_preserved=True)
        else:
            new_modin_frame = self._modin_frame.map(fillna, dtypes=new_dtypes)
        return self.__constructor__(new_modin_frame)

    def quantile_for_list_of_values(self, **kwargs):
        axis = kwargs.get("axis", 0)
        q = kwargs.get("q")
        numeric_only = kwargs.get("numeric_only", True)
        assert isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list, tuple))

        if numeric_only:
            new_columns = self._modin_frame.numeric_columns()
        else:
            new_columns = [
                col
                for col, dtype in zip(self.columns, self.dtypes)
                if (is_numeric_dtype(dtype) or lib.is_np_dtype(dtype, "mM"))
            ]
        if axis == 1:
            query_compiler = self.getitem_column_array(new_columns)
            new_columns = self.index
        else:
            query_compiler = self

        def quantile_builder(df, **kwargs):
            result = df.quantile(**kwargs)
            return result.T if kwargs.get("axis", 0) == 1 else result

        # This took a long time to debug, so here is the rundown of why this is needed.
        # Previously, we were operating on select indices, but that was broken. We were
        # not correctly setting the columns/index. Because of how we compute `to_pandas`
        # and because of the static nature of the index for `axis=1` it is easier to
        # just handle this as the transpose (see `quantile_builder` above for the
        # transpose within the partition) than it is to completely rework other
        # internal methods. Basically we are returning the transpose of the object for
        # correctness and cleanliness of the code.
        if axis == 1:
            q_index = new_columns
            new_columns = pandas.Index(q)
        else:
            q_index = pandas.Index(q)
        new_modin_frame = query_compiler._modin_frame.apply_full_axis(
            axis,
            lambda df: quantile_builder(df, **kwargs),
            new_index=q_index,
            new_columns=new_columns,
            dtypes=np.float64,
        )
        result = self.__constructor__(new_modin_frame)
        return result.transpose() if axis == 1 else result

    def rank(self, **kwargs):
        axis = kwargs.get("axis", 0)
        numeric_only = True if axis else kwargs.get("numeric_only", False)
        new_modin_frame = self._modin_frame.apply_full_axis(
            axis,
            lambda df: df.rank(**kwargs),
            new_index=self._modin_frame.copy_index_cache(copy_lengths=True),
            new_columns=(
                self._modin_frame.copy_columns_cache(copy_lengths=True)
                if not numeric_only
                else None
            ),
            dtypes=np.float64,
            sync_labels=False,
        )
        return self.__constructor__(new_modin_frame)

    def sort_index(self, **kwargs):
        axis = kwargs.pop("axis", 0)
        level = kwargs.pop("level", None)
        sort_remaining = kwargs.pop("sort_remaining", True)
        kwargs["inplace"] = False

        if level is not None or self.has_multiindex(axis=axis):
            return self.default_to_pandas(
                pandas.DataFrame.sort_index,
                axis=axis,
                level=level,
                sort_remaining=sort_remaining,
                **kwargs,
            )

        # sort_index can have ascending be None and behaves as if it is False.
        # sort_values cannot have ascending be None. Thus, the following logic is to
        # convert the ascending argument to one that works with sort_values
        ascending = kwargs.pop("ascending", True)
        if ascending is None:
            ascending = False
        kwargs["ascending"] = ascending
        if axis:
            new_columns = self.columns.to_frame().sort_index(**kwargs).index
            new_index = self.index
        else:
            new_index = self.index.to_frame().sort_index(**kwargs).index
            new_columns = self.columns
        new_modin_frame = self._modin_frame.apply_full_axis(
            axis,
            lambda df: df.sort_index(
                axis=axis, level=level, sort_remaining=sort_remaining, **kwargs
            ),
            new_index,
            new_columns,
            dtypes="copy" if axis == 0 else None,
        )
        return self.__constructor__(new_modin_frame)

    def melt(
        self,
        id_vars=None,
        value_vars=None,
        var_name=None,
        value_name="value",
        col_level=None,
        ignore_index=True,
    ):
        ErrorMessage.mismatch_with_pandas(
            operation="melt", message="Order of rows could be different from pandas"
        )

        if var_name is None:
            var_name = "variable"

        def _convert_to_list(x):
            """Convert passed object to a list."""
            if is_list_like(x):
                x = [*x]
            elif x is not None:
                x = [x]
            else:
                x = []
            return x

        id_vars, value_vars = map(_convert_to_list, [id_vars, value_vars])

        if len(value_vars) == 0:
            value_vars = self.columns.drop(id_vars)

        if len(id_vars) != 0:
            to_broadcast = self.getitem_column_array(id_vars)._modin_frame
        else:
            to_broadcast = None

        def applyier(df, internal_indices, other=[], internal_other_indices=[]):
            """
            Apply `melt` function to a single partition.

            Parameters
            ----------
            df : pandas.DataFrame
                Partition of the self frame.
            internal_indices : list of ints
                Positional indices of columns in this particular partition which
                represents `value_vars` columns in the source frame.
            other : pandas.DataFrame
                Broadcasted partition which contains `id_vars` columns of the
                source frame.
            internal_other_indices : list of ints
                Positional indices of columns in `other` partition which
                represents `id_vars` columns in the source frame.

            Returns
            -------
            pandas.DataFrame
                The result of the `melt` function for this particular partition.
            """
            if len(other):
                other = pandas.concat(other, axis=1)
                columns_to_add = other.columns.difference(df.columns)
                df = pandas.concat([df, other[columns_to_add]], axis=1)
            return df.melt(
                id_vars=id_vars,
                value_vars=df.columns[internal_indices],
                var_name=var_name,
                value_name=value_name,
                col_level=col_level,
            )

        # we have no able to calculate correct indices here, so making it `dummy_index`
        inconsistent_frame = self._modin_frame.broadcast_apply_select_indices(
            axis=0,
            apply_indices=value_vars,
            func=applyier,
            other=to_broadcast,
            new_index=["dummy_index"] * len(id_vars),
            new_columns=["dummy_index"] * len(id_vars),
        )
        # after applying `melt` for selected indices we will get partitions like this:
        #     id_vars   vars   value |     id_vars   vars   value
        #  0      foo   col3       1 |  0      foo   col5       a    so stacking it into
        #  1      fiz   col3       2 |  1      fiz   col5       b    `new_parts` to get
        #  2      bar   col3       3 |  2      bar   col5       c    correct answer
        #  3      zoo   col3       4 |  3      zoo   col5       d
        new_parts = np.array(
            [np.array([x]) for x in np.concatenate(inconsistent_frame._partitions.T)]
        )
        new_index = pandas.RangeIndex(len(self.index) * len(value_vars))
        new_modin_frame = self._modin_frame.__constructor__(
            new_parts,
            index=new_index,
            columns=id_vars + [var_name, value_name],
        )
        result = self.__constructor__(new_modin_frame)
        # this assigment needs to propagate correct indices into partitions
        result.index = new_index
        return result

    # END Map across rows/columns

    # __getitem__ methods
    __getitem_bool = Binary.register(
        lambda df, r: df[[r]] if is_scalar(r) else df[r],
        join_type="left",
        labels="drop",
    )

    # __setitem__ methods
    def setitem_bool(self, row_loc: PandasQueryCompiler, col_loc, item):
        def _set_item(df, row_loc):  # pragma: no cover
            df = df.copy()
            df.loc[row_loc.squeeze(axis=1), col_loc] = item
            return df

        if self.frame_has_materialized_dtypes and is_scalar(item):
            new_dtypes = self.dtypes.copy()
            old_dtypes = new_dtypes[col_loc]
            item_type = extract_dtype(item)
            if isinstance(old_dtypes, pandas.Series):
                new_dtypes[col_loc] = [
                    find_common_type([dtype, item_type]) for dtype in old_dtypes.values
                ]
            else:
                new_dtypes[col_loc] = find_common_type([old_dtypes, item_type])
        else:
            new_dtypes = None

        new_modin_frame = self._modin_frame.broadcast_apply_full_axis(
            axis=1,
            func=_set_item,
            other=row_loc._modin_frame,
            new_index=self._modin_frame.copy_index_cache(copy_lengths=True),
            new_columns=self._modin_frame.copy_columns_cache(),
            keep_partitioning=False,
            dtypes=new_dtypes,
        )
        return self.__constructor__(new_modin_frame)

    # END __setitem__ methods

    def __validate_bool_indexer(self, indexer):
        if len(indexer) != len(self.index):
            raise ValueError(
                f"Item wrong length {len(indexer)} instead of {len(self.index)}."
            )
        if isinstance(indexer, pandas.Series) and not indexer.equals(self.index):
            warnings.warn(
                "Boolean Series key will be reindexed to match DataFrame index.",
                PendingDeprecationWarning,
                stacklevel=4,
            )

    def getitem_array(self, key):
        if isinstance(key, type(self)):
            # here we check for a subset of bool indexers only to simplify the code;
            # there could (potentially) be more of those, but we assume the most frequent
            # ones are just of bool dtype
            if len(key.dtypes) == 1 and is_bool_dtype(key.dtypes.iloc[0]):
                self.__validate_bool_indexer(key.index)
                return self.__getitem_bool(key, broadcast=True, dtypes="copy")

            key = key.to_pandas().squeeze(axis=1)

        if is_bool_indexer(key):
            self.__validate_bool_indexer(key)
            key = check_bool_indexer(self.index, key)
            # We convert to a RangeIndex because getitem_row_array is expecting a list
            # of indices, and RangeIndex will give us the exact indices of each boolean
            # requested.
            key = pandas.RangeIndex(len(self.index))[key]
            if len(key):
                return self.getitem_row_array(key)
            else:
                return self.from_pandas(
                    pandas.DataFrame(columns=self.columns), type(self._modin_frame)
                )
        else:
            if any(k not in self.columns for k in key):
                raise KeyError(
                    "{} not index".format(
                        str([k for k in key if k not in self.columns]).replace(",", "")
                    )
                )
            return self.getitem_column_array(key)

    def getitem_column_array(
        self, key, numeric=False, ignore_order=False
    ) -> PandasQueryCompiler:
        shape_hint = "column" if len(key) == 1 else None
        if numeric:
            if ignore_order and is_list_like(key):
                key = np.sort(key)
            new_modin_frame = self._modin_frame.take_2d_labels_or_positional(
                col_positions=key
            )
        else:
            if ignore_order and is_list_like(key):
                key_set = frozenset(key)
                key = [col for col in self.columns if col in key_set]
            new_modin_frame = self._modin_frame.take_2d_labels_or_positional(
                col_labels=key
            )
        return self.__constructor__(new_modin_frame, shape_hint=shape_hint)

    def getitem_row_array(self, key):
        return self.__constructor__(
            self._modin_frame.take_2d_labels_or_positional(row_positions=key)
        )

    def setitem(self, axis, key, value):
        # Default to pandas for empty frames to avoid complex partitioning issues
        if axis == 0 and not self.lazy_row_count and self.get_axis_len(0) == 0:

            def do_setitem(df: pandas.DataFrame, key, value) -> pandas.DataFrame:
                df[key] = value
                return df

            return self.default_to_pandas(do_setitem, key=key, value=value)

        if axis == 0:
            value = self._wrap_column_data(value)
        return self._setitem(axis=axis, key=key, value=value, how=None)

    def _setitem(self, axis, key, value, how="inner"):
        """
        Set the row/column defined by `key` to the `value` provided.

        In contrast with `setitem` with this function you can specify how
        to handle non-aligned `self` and `value`.

        Parameters
        ----------
        axis : {0, 1}
            Axis to set `value` along. 0 means set row, 1 means set column.
        key : scalar
            Row/column label to set `value` in.
        value : PandasQueryCompiler (1xN), list-like or scalar
            Define new row/column value.
        how : {"inner", "outer", "left", "right", None}, default: "inner"
            Type of join to perform if specified axis of `self` and `value` are not
            equal. If `how` is `None`, reindex `value` with `self` labels without joining.

        Returns
        -------
        BaseQueryCompiler
            New QueryCompiler with updated `key` value.
        """

        def setitem_builder(df, internal_indices=[]):  # pragma: no cover
            """
            Set the row/column to the `value` in a single partition.

            Parameters
            ----------
            df : pandas.DataFrame
                Partition of the self frame.
            internal_indices : list of ints
                Positional indices of rows/columns in this particular partition
                which represents `key` in the source frame.

            Returns
            -------
            pandas.DataFrame
                Partition data with updated values.
            """
            df = df.copy()
            if len(internal_indices) == 1:
                if axis == 0:
                    df[df.columns[internal_indices[0]]] = value
                else:
                    df.iloc[internal_indices[0]] = value
            else:
                if axis == 0:
                    df[df.columns[internal_indices]] = value
                else:
                    df.iloc[internal_indices] = value
            return df

        if isinstance(value, type(self)):
            value.columns = [key]
            if axis == 1:
                value = value.transpose()
            idx = self.get_axis(axis ^ 1).get_indexer_for([key])[0]
            return self.insert_item(axis ^ 1, idx, value, how, replace=True)

        if axis == 0:
            value_dtype = extract_dtype(value)

            old_columns = self.columns.difference(pandas.Index([key]))
            old_dtypes = ModinDtypes(self._modin_frame._dtypes).lazy_get(old_columns)
            new_dtypes = ModinDtypes.concat(
                [
                    old_dtypes,
                    DtypesDescriptor({key: value_dtype}, cols_with_unknown_dtypes=[]),
                ]
                # get dtypes in a proper order
            ).lazy_get(self.columns)
        else:
            # TODO: apply 'find_common_dtype' to the value's dtype and old column dtypes
            new_dtypes = None

        # TODO: rework by passing list-like values to `apply_select_indices`
        # as an item to distribute
        if is_list_like(value):
            new_modin_frame = self._modin_frame.apply_full_axis_select_indices(
                axis,
                setitem_builder,
                [key],
                new_index=self.index,
                new_columns=self.columns,
                keep_remaining=True,
                new_dtypes=new_dtypes,
            )
        else:
            new_modin_frame = self._modin_frame.apply_select_indices(
                axis,
                setitem_builder,
                [key],
                new_index=self.index,
                new_columns=self.columns,
                new_dtypes=new_dtypes,
                keep_remaining=True,
            )
        return self.__constructor__(new_modin_frame)

    # END __getitem__ methods

    # Drop/Dropna
    # This will change the shape of the resulting data.
    def dropna(self, **kwargs):
        is_column_wise = kwargs.get("axis", 0) == 1
        no_thresh_passed = kwargs.get("thresh", lib.no_default) in (
            lib.no_default,
            None,
        )
        # The map reduce approach works well for frames with few columnar partitions
        processable_amount_of_partitions = (
            self._modin_frame.num_parts < CpuCount.get() * 32
        )

        if is_column_wise and no_thresh_passed and processable_amount_of_partitions:
            how = kwargs.get("how", "any")
            subset = kwargs.get("subset")
            how = "any" if how in (lib.no_default, None) else how
            condition = lambda df: getattr(df, how)()  # noqa: E731 (lambda assignment)

            def mapper(df: pandas.DataFrame):
                """Compute a mask indicating whether there are all/any NaN values in each column."""
                if subset is not None:
                    subset_mask = condition(
                        df.loc[df.index.intersection(subset)].isna()
                    )
                    # we have to keep other columns so setting their mask
                    # values with `False`
                    mask = pandas.Series(
                        np.zeros(df.shape[1], dtype=bool), index=df.columns
                    )
                    mask.update(subset_mask)
                else:
                    mask = condition(df.isna())
                # for proper partitioning at the 'reduce' phase each partition has to
                # represent a one-row frame rather than a one-column frame, so calling `.T` here
                return mask.to_frame().T

            masks = self._modin_frame.apply_full_axis(
                func=mapper, axis=1, keep_partitioning=True
            )

            def reduce(df: pandas.DataFrame, mask: pandas.DataFrame):
                """Drop columns from `df` that satisfy the NaN `mask`."""
                # `mask` here consists of several rows each representing the masks result
                # for a certain row partition:
                #     col1  col2   col3
                # 0   True  True  False                         col1     True
                # 1  False  True  False  ---> mask.any() --->   col2     True
                # 2   True  True  False                         col3    False
                # in order to get the proper 1D mask we have to reduce the partition's
                # results by applying the condition one more time
                to_take_mask = ~condition(mask)

                to_take = []
                for col, value in to_take_mask.items():
                    if value and col in df:
                        to_take.append(col)

                return df[to_take]

            result = self._modin_frame.broadcast_apply(
                # 'masks' have identical partitioning as we specified 'keep_partitioning=True' before,
                # this means that we can safely skip the 'co-partitioning' stage
                axis=1,
                func=reduce,
                other=masks,
                copartition=False,
                labels="drop",
            )
            return self.__constructor__(result, shape_hint=self._shape_hint)

        return self.__constructor__(
            self._modin_frame.filter(
                kwargs.get("axis", 0) ^ 1,
                lambda df: pandas.DataFrame.dropna(df, **kwargs),
            ),
            shape_hint=self._shape_hint,
        )

    def drop(
        self, index=None, columns=None, errors: str = "raise"
    ) -> PandasQueryCompiler:
        # `errors` parameter needs to be part of the function signature because
        # other query compilers may not take care of error handling at the API
        # layer. This query compiler assumes there won't be any errors due to
        # invalid keys.
        if index is not None:
            index = np.sort(self.index.get_indexer_for(self.index.difference(index)))
        if columns is not None:
            columns = np.sort(
                self.columns.get_indexer_for(self.columns.difference(columns))
            )
        new_modin_frame = self._modin_frame.take_2d_labels_or_positional(
            row_positions=index, col_positions=columns
        )
        return self.__constructor__(new_modin_frame)

    # END Drop/Dropna

    def duplicated(self, **kwargs):
        def _compute_hash(df):
            result = df.apply(
                lambda s: hashlib.new("md5", str(tuple(s)).encode()).hexdigest(), axis=1
            )
            if isinstance(result, pandas.Series):
                result = result.to_frame(
                    result.name
                    if result.name is not None
                    else MODIN_UNNAMED_SERIES_LABEL
                )
            return result

        def _compute_duplicated(df):  # pragma: no cover
            result = df.duplicated(**kwargs)
            if isinstance(result, pandas.Series):
                result = result.to_frame(
                    result.name
                    if result.name is not None
                    else MODIN_UNNAMED_SERIES_LABEL
                )
            return result

        if self._modin_frame._partitions.shape[1] > 1:
            # if the number of columns (or column partitions) we are checking for duplicates is larger than 1,
            # we must first hash them to generate a single value that can be compared across rows.
            hashed_modin_frame = self._modin_frame.reduce(
                axis=1,
                function=_compute_hash,
                dtypes=pandas.api.types.pandas_dtype("O"),
            )
        else:
            hashed_modin_frame = self._modin_frame
        new_modin_frame = hashed_modin_frame.apply_full_axis(
            axis=0,
            func=_compute_duplicated,
            new_index=self._modin_frame.copy_index_cache(),
            new_columns=[MODIN_UNNAMED_SERIES_LABEL],
            dtypes=np.bool_,
            keep_partitioning=True,
        )
        return self.__constructor__(new_modin_frame, shape_hint="column")

    # Insert
    # This method changes the shape of the resulting data. In Pandas, this
    # operation is always inplace, but this object is immutable, so we just
    # return a new one from here and let the front end handle the inplace
    # update.
    def insert(self, loc, column, value):
        value = self._wrap_column_data(value)
        if isinstance(value, type(self)):
            value.columns = [column]
            return self.insert_item(axis=1, loc=loc, value=value, how=None)

        def insert(df, internal_indices=[]):  # pragma: no cover
            """
            Insert new column to the partition.

            Parameters
            ----------
            df : pandas.DataFrame
                Partition of the self frame.
            internal_indices : list of ints
                Positional index of the column in this particular partition
                to insert new column after.
            """
            internal_idx = int(internal_indices[0])
            df.insert(internal_idx, column, value)
            return df

        value_dtype = extract_dtype(value)
        new_columns = self.columns.insert(loc, column)
        new_dtypes = ModinDtypes.concat(
            [
                self._modin_frame._dtypes,
                DtypesDescriptor({column: value_dtype}, cols_with_unknown_dtypes=[]),
            ]
        ).lazy_get(
            new_columns
        )  # get dtypes in a proper order

        # TODO: rework by passing list-like values to `apply_select_indices`
        # as an item to distribute
        new_modin_frame = self._modin_frame.apply_full_axis_select_indices(
            0,
            insert,
            numeric_indices=[loc],
            keep_remaining=True,
            new_index=self.index,
            new_columns=new_columns,
            new_dtypes=new_dtypes,
        )
        return self.__constructor__(new_modin_frame)

    def _wrap_column_data(self, data):
        """
        If the data is list-like, create a single column query compiler.

        Parameters
        ----------
        data : any

        Returns
        -------
        data or PandasQueryCompiler
        """
        if is_list_like(data):
            return self.from_pandas(
                pandas.DataFrame(pandas.Series(data, index=self.index)),
                data_cls=type(self._modin_frame),
            )
        return data

    # END Insert

    def explode(self, column):
        return self.__constructor__(
            self._modin_frame.explode(1, lambda df: df.explode(column))
        )

    # UDF (apply and agg) methods
    # There is a wide range of behaviors that are supported, so a lot of the
    # logic can get a bit convoluted.
    def apply(self, func, axis, *args, **kwargs):
        # if any of args contain modin object, we should
        # convert it to pandas
        args = try_cast_to_pandas(args)
        kwargs = try_cast_to_pandas(kwargs)
        _, func, _, _ = reconstruct_func(func, **kwargs)
        if isinstance(func, dict):
            return self._dict_func(func, axis, *args, **kwargs)
        elif is_list_like(func):
            return self._list_like_func(func, axis, *args, **kwargs)
        else:
            return self._callable_func(func, axis, *args, **kwargs)

    def apply_on_series(self, func, *args, **kwargs):
        args = try_cast_to_pandas(args)
        kwargs = try_cast_to_pandas(kwargs)

        assert self.is_series_like()

        # We use apply_full_axis here instead of map since the latter assumes that the
        # shape of the DataFrame does not change. However, it is possible for functions
        # applied to Series objects to end up creating DataFrames. It is possible that
        # using apply_full_axis is much less performant compared to using a variant of
        # map.
        return self.__constructor__(
            self._modin_frame.apply_full_axis(
                1, lambda df: df.squeeze(axis=1).apply(func, *args, **kwargs)
            )
        )

    def _dict_func(self, func, axis, *args, **kwargs):
        """
        Apply passed functions to the specified rows/columns.

        Parameters
        ----------
        func : dict(label) -> [callable, str]
            Dictionary that maps axis labels to the function to apply against them.
        axis : {0, 1}
            Target axis to apply functions along. 0 means apply to columns,
            1 means apply to rows.
        *args : args
            Arguments to pass to the specified functions.
        **kwargs : kwargs
            Arguments to pass to the specified functions.

        Returns
        -------
        PandasQueryCompiler
            New QueryCompiler containing the results of passed functions.
        """
        if "axis" not in kwargs:
            kwargs["axis"] = axis

        func = {k: wrap_udf_function(v) if callable(v) else v for k, v in func.items()}

        def dict_apply_builder(df, internal_indices=[]):  # pragma: no cover
            # Sometimes `apply` can return a `Series`, but we require that internally
            # all objects are `DataFrame`s.
            # It looks like it doesn't need to use `internal_indices` option internally
            # for the case since `apply` use labels from dictionary keys in `func` variable.
            return pandas.DataFrame(df.apply(func, *args, **kwargs))

        labels = list(func.keys())
        return self.__constructor__(
            self._modin_frame.apply_full_axis_select_indices(
                axis,
                dict_apply_builder,
                labels,
                new_index=labels if axis == 1 else None,
                new_columns=labels if axis == 0 else None,
                keep_remaining=False,
            )
        )

    def _list_like_func(self, func, axis, *args, **kwargs):
        """
        Apply passed functions to each row/column.

        Parameters
        ----------
        func : list of callable
            List of functions to apply against each row/column.
        axis : {0, 1}
            Target axis to apply functions along. 0 means apply to columns,
            1 means apply to rows.
        *args : args
            Arguments to pass to the specified functions.
        **kwargs : kwargs
            Arguments to pass to the specified functions.

        Returns
        -------
        PandasQueryCompiler
            New QueryCompiler containing the results of passed functions.
        """
        # When the function is list-like, the function names become the index/columns
        new_index = (
            [f if isinstance(f, str) else f.__name__ for f in func]
            if axis == 0
            else self.index
        )
        new_columns = (
            [f if isinstance(f, str) else f.__name__ for f in func]
            if axis == 1
            else self.columns
        )
        func = [wrap_udf_function(f) if callable(f) else f for f in func]
        new_modin_frame = self._modin_frame.apply_full_axis(
            axis,
            lambda df: pandas.DataFrame(df.apply(func, axis, *args, **kwargs)),
            new_index=new_index,
            new_columns=new_columns,
        )
        return self.__constructor__(new_modin_frame)

    def rowwise_query(self, expr, **kwargs):
        """
        Query the columns of a ``PandasQueryCompiler`` with a boolean row-wise expression.

        Basically, in row-wise expressions we only allow column names, constants
        and other variables captured using the '@' symbol. No function/method
        cannot be called inside such expressions.

        Parameters
        ----------
        expr : str
            Row-wise boolean expression.
        **kwargs : dict
            Arguments to pass to the ``pandas.DataFrame.query()``.

        Returns
        -------
        PandasQueryCompiler

        Raises
        ------
        NotImplementedError
            In case the passed expression cannot be executed row-wise.
        """
        # Walk through the AST and verify it doesn't contain any nodes that
        # prevent us from executing the query row-wise (we're basically
        # looking for 'ast.Call')
        nodes = ast.parse(expr.replace("@", "")).body
        is_row_wise_query = True

        while nodes:
            node = nodes.pop()
            if isinstance(node, ast.Expr):
                node = getattr(node, "value", node)

            if isinstance(node, ast.UnaryOp):
                nodes.append(node.operand)
            elif isinstance(node, ast.BinOp):
                nodes.extend([node.left, node.right])
            elif isinstance(node, ast.BoolOp):
                nodes.extend(node.values)
            elif isinstance(node, ast.Compare):
                nodes.extend([node.left] + node.comparators)
            elif isinstance(node, (ast.Name, ast.Constant)):
                pass
            else:
                # if we end up here then the expression is no longer simple
                # enough to run it row-wise, so exiting
                is_row_wise_query = False
                break

        if not is_row_wise_query:
            raise NotImplementedError("A non row-wise query was passed.")

        def query_builder(df, **modin_internal_kwargs):
            return df.query(expr, inplace=False, **kwargs, **modin_internal_kwargs)

        return self.__constructor__(self._modin_frame.filter(1, query_builder))

    def _callable_func(self, func, axis, *args, **kwargs):
        """
        Apply passed function to each row/column.

        Parameters
        ----------
        func : callable or str
            Function to apply.
        axis : {0, 1}
            Target axis to apply function along. 0 means apply to columns,
            1 means apply to rows.
        *args : args
            Arguments to pass to the specified function.
        **kwargs : kwargs
            Arguments to pass to the specified function.

        Returns
        -------
        PandasQueryCompiler
            New QueryCompiler containing the results of passed function
            for each row/column.
        """
        if callable(func):
            func = wrap_udf_function(func)

        new_modin_frame = self._modin_frame.apply_full_axis(
            axis, lambda df: df.apply(func, axis=axis, *args, **kwargs)
        )
        return self.__constructor__(new_modin_frame)

    # END UDF

    # Manual Partitioning methods (e.g. merge, groupby)
    # These methods require some sort of manual partitioning due to their
    # nature. They require certain data to exist on the same partition, and
    # after the shuffle, there should be only a local map required.

    def _groupby_separate_by(self, by, drop):
        """
        Separate internal and external groupers in `by` argument of groupby.

        Parameters
        ----------
        by : BaseQueryCompiler, column or index label, Grouper or list
        drop : bool
            Indicates whether or not by data came from self frame.
            True, by data came from self. False, external by data.

        Returns
        -------
        external_by : list of BaseQueryCompiler and arrays
            Values to group by.
        internal_by : list of str
            List of column names from `self` to group by.
        by_positions : list of ints
            Specifies the order of grouping by `internal_by` and `external_by` columns.
            Each element in `by_positions` specifies an index from either `external_by` or `internal_by`.
            Indices for `external_by` are positive and start from 0. Indices for `internal_by` are negative
            and start from -1 (so in order to convert them to a valid indices one should do ``-idx - 1``)
            '''
            by_positions = [0, -1, 1, -2, 2, 3]
            internal_by = ["col1", "col2"]
            external_by = [sr1, sr2, sr3, sr4]

            df.groupby([sr1, "col1", sr2, "col2", sr3, sr4])
            '''.
        """
        if isinstance(by, type(self)):
            if drop:
                internal_by = by.columns.tolist()
                external_by = []
                by_positions = [-i - 1 for i in range(len(internal_by))]
            else:
                internal_by = []
                external_by = [by]
                by_positions = [i for i in range(len(external_by[0].columns))]
        else:
            if not isinstance(by, list):
                by = [by] if by is not None else []
            internal_by = []
            external_by = []
            external_by_counter = 0
            by_positions = []
            for o in by:
                if isinstance(o, pandas.Grouper) and o.key in self.columns:
                    internal_by.append(o.key)
                    by_positions.append(-len(internal_by))
                elif hashable(o) and o in self.columns:
                    internal_by.append(o)
                    by_positions.append(-len(internal_by))
                else:
                    external_by.append(o)
                    for _ in range(len(o.columns) if isinstance(o, type(self)) else 1):
                        by_positions.append(external_by_counter)
                        external_by_counter += 1
        return external_by, internal_by, by_positions

    groupby_all = GroupbyReduceImpl.build_qc_method("all")
    groupby_any = GroupbyReduceImpl.build_qc_method("any")
    groupby_count = GroupbyReduceImpl.build_qc_method("count")
    groupby_max = GroupbyReduceImpl.build_qc_method("max")
    groupby_min = GroupbyReduceImpl.build_qc_method("min")
    groupby_prod = GroupbyReduceImpl.build_qc_method("prod")
    groupby_sum = GroupbyReduceImpl.build_qc_method("sum")
    groupby_skew = GroupbyReduceImpl.build_qc_method("skew")

    def groupby_nth(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        result = super().groupby_nth(
            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
        )
        if not groupby_kwargs.get("as_index", True):
            # pandas keeps order of columns intact, follow suit
            return result.getitem_column_array(self.columns)
        return result

    def groupby_mean(self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False):
        if RangePartitioning.get():
            try:
                return self._groupby_shuffle(
                    by=by,
                    agg_func="mean",
                    axis=axis,
                    groupby_kwargs=groupby_kwargs,
                    agg_args=agg_args,
                    agg_kwargs=agg_kwargs,
                    drop=drop,
                )
            except NotImplementedError as e:
                ErrorMessage.warn(
                    f"Can't use range-partitioning groupby implementation because of: {e}"
                    + "\nFalling back to a TreeReduce implementation."
                )

        _, internal_by, _ = self._groupby_separate_by(by, drop)

        numeric_only = agg_kwargs.get("numeric_only", False)
        datetime_cols = (
            {
                col: dtype
                for col, dtype in zip(self.dtypes.index, self.dtypes)
                if is_datetime64_any_dtype(dtype) and col not in internal_by
            }
            if not numeric_only
            else dict()
        )

        if len(datetime_cols) > 0:
            datetime_qc = self.getitem_array(datetime_cols)
            if datetime_qc.isna().any().any(axis=1).to_pandas().squeeze():
                return super().groupby_mean(
                    by=by,
                    axis=axis,
                    groupby_kwargs=groupby_kwargs,
                    agg_args=agg_args,
                    agg_kwargs=agg_kwargs,
                    drop=drop,
                )

        qc_with_converted_datetime_cols = (
            self.astype({col: "int64" for col in datetime_cols.keys()})
            if len(datetime_cols) > 0
            else self
        )

        result = GroupbyReduceImpl.build_qc_method("mean")(
            query_compiler=qc_with_converted_datetime_cols,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

        if len(datetime_cols) > 0:
            result = result.astype({col: dtype for col, dtype in datetime_cols.items()})
        return result

    def groupby_size(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        if RangePartitioning.get():
            try:
                return self._groupby_shuffle(
                    by=by,
                    agg_func="size",
                    axis=axis,
                    groupby_kwargs=groupby_kwargs,
                    agg_args=agg_args,
                    agg_kwargs=agg_kwargs,
                    drop=drop,
                )
            except NotImplementedError as e:
                ErrorMessage.warn(
                    f"Can't use range-partitioning groupby implementation because of: {e}"
                    + "\nFalling back to a TreeReduce implementation."
                )

        result = self._groupby_dict_reduce(
            by=by,
            axis=axis,
            agg_func={self.columns[0]: [("__size_col__", "size")]},
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            groupby_kwargs=groupby_kwargs,
            drop=drop,
            method="size",
            default_to_pandas_func=lambda grp: grp.size(),
        )
        if groupby_kwargs.get("as_index", True):
            result.columns = [MODIN_UNNAMED_SERIES_LABEL]
        elif isinstance(result.columns, pandas.MultiIndex):
            # Dropping one extra-level which was added because of renaming aggregation
            result.columns = (
                result.columns[:-1].droplevel(-1).append(pandas.Index(["size"]))
            )
        return result

    def _groupby_dict_reduce(
        self,
        by,
        agg_func,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
        **kwargs,
    ):
        """
        Group underlying data and apply aggregation functions to each group of the specified column/row.

        This method is responsible of performing dictionary groupby aggregation for such functions,
        that can be implemented via TreeReduce approach.

        Parameters
        ----------
        by : PandasQueryCompiler, column or index label, Grouper or list of such
            Object that determine groups.
        agg_func : dict(label) -> str
            Dictionary that maps row/column labels to the function names.
            **Note:** specified functions have to be supported by ``modin.core.dataframe.algebra.GroupByReduce``.
            Supported functions are listed in the ``modin.core.dataframe.algebra.GroupByReduce.groupby_reduce_functions``
            dictionary.
        axis : {0, 1}
            Axis to group and apply aggregation function along.
            0 is for index, when 1 is for columns.
        groupby_kwargs : dict
            GroupBy parameters in the format of ``modin.pandas.DataFrame.groupby`` signature.
        agg_args : list-like
            Serves the compatibility purpose. Does not affect the result.
        agg_kwargs : dict
            Arguments to pass to the aggregation functions.
        drop : bool, default: False
            If `by` is a QueryCompiler indicates whether or not by-data came
            from the `self`.
        **kwargs : dict
            Additional parameters to pass to the ``modin.core.dataframe.algebra.GroupByReduce.register``.

        Returns
        -------
        PandasQueryCompiler
            New QueryCompiler containing the result of groupby dictionary aggregation.
        """
        map_dict = {}
        reduce_dict = {}
        kwargs.setdefault(
            "default_to_pandas_func",
            lambda grp, *args, **kwargs: grp.agg(agg_func, *args, **kwargs),
        )

        rename_columns = any(
            not isinstance(fn, str) and isinstance(fn, Iterable)
            for fn in agg_func.values()
        )
        for col, col_funcs in agg_func.items():
            if not rename_columns:
                map_dict[col], reduce_dict[col], _ = GroupbyReduceImpl.get_impl(
                    col_funcs
                )
                continue

            if isinstance(col_funcs, str):
                col_funcs = [col_funcs]

            map_fns = []
            for i, fn in enumerate(col_funcs):
                if not isinstance(fn, str) and isinstance(fn, Iterable):
                    new_col_name, func = fn
                elif isinstance(fn, str):
                    new_col_name, func = fn, fn
                else:
                    raise TypeError

                map_fn, reduce_fn, _ = GroupbyReduceImpl.get_impl(func)

                map_fns.append((new_col_name, map_fn))
                reduced_col_name = (
                    (*col, new_col_name)
                    if isinstance(col, tuple)
                    else (col, new_col_name)
                )
                reduce_dict[reduced_col_name] = reduce_fn
            map_dict[col] = map_fns
        return GroupByReduce.register(map_dict, reduce_dict, **kwargs)(
            query_compiler=self,
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    def groupby_dtypes(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        return self.groupby_agg(
            by=by,
            axis=axis,
            agg_func=lambda df: df.dtypes,
            how="group_wise",
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            groupby_kwargs=groupby_kwargs,
            drop=drop,
        )

    @_inherit_docstrings(BaseQueryCompiler.groupby_agg)
    def _groupby_shuffle(
        self,
        by,
        agg_func,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
        how="axis_wise",
        series_groupby=False,
    ):
        # Defaulting to pandas in case of an empty frame as we can't process it properly.
        # Higher API level won't pass empty data here unless the frame has delayed
        # computations. FIXME: We apparently lose some laziness here (due to index access)
        # because of the inability to process empty groupby natively.
        if len(self.columns) == 0 or len(self._modin_frame) == 0:
            return super().groupby_agg(
                by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, how, drop
            )

        grouping_on_level = groupby_kwargs.get("level") is not None
        if any(
            isinstance(obj, pandas.Grouper)
            for obj in (by if isinstance(by, list) else [by])
        ):
            raise NotImplementedError(
                "Grouping on a pandas.Grouper with range-partitioning groupby is not yet supported: "
                + "https://github.com/modin-project/modin/issues/5926"
            )

        if grouping_on_level:
            external_by, internal_by, by_positions = [], [], []
        else:
            external_by, internal_by, by_positions = self._groupby_separate_by(by, drop)

        all_external_are_qcs = all(isinstance(obj, type(self)) for obj in external_by)
        if not all_external_are_qcs:
            raise NotImplementedError(
                "Grouping on an external grouper with range-partitioning groupby is only supported with Series'es: "
                + "https://github.com/modin-project/modin/issues/5926"
            )

        is_transform = how == "transform" or GroupBy.is_transformation_kernel(agg_func)
        if is_transform:
            # https://github.com/modin-project/modin/issues/5924
            ErrorMessage.mismatch_with_pandas(
                operation="range-partitioning groupby",
                message="the order of rows may be shuffled for the result",
            )

        # This check materializes dtypes for 'by' columns
        if not is_transform and groupby_kwargs.get("observed", False) in (
            False,
            lib.no_default,
        ):
            # The following 'dtypes' check materializes dtypes for 'by' columns
            internal_dtypes = pandas.Series()
            external_dtypes = pandas.Series()
            if len(internal_by) > 0:
                internal_dtypes = (
                    self._modin_frame._dtypes.lazy_get(internal_by).get()
                    if isinstance(self._modin_frame._dtypes, ModinDtypes)
                    else self.dtypes[internal_by]
                )
            if len(external_by) > 0:
                dtypes_list = []
                for obj in external_by:
                    if not isinstance(obj, type(self)):
                        # we're only interested in categorical dtypes here, which can only
                        # appear in modin objects
                        continue
                    dtypes_list.append(obj.dtypes)
                external_dtypes = pandas.concat(dtypes_list)

            by_dtypes = pandas.concat([internal_dtypes, external_dtypes])
            add_missing_cats = any(
                isinstance(dtype, pandas.CategoricalDtype) for dtype in by_dtypes
            )
        else:
            add_missing_cats = False

        if add_missing_cats and not groupby_kwargs.get("as_index", True):
            raise NotImplementedError(
                "Range-partitioning groupby is not implemented for grouping on categorical columns with "
                + "the following set of parameters {'as_index': False, 'observed': False}. Change either 'as_index' "
                + "or 'observed' to True and try again. "
                + "https://github.com/modin-project/modin/issues/5926"
            )

        if isinstance(agg_func, dict):
            assert (
                how == "axis_wise"
            ), f"Only 'axis_wise' aggregation is supported with dictionary functions, got: {how}"

            subset = internal_by + list(agg_func.keys())
            # extracting unique values; no we can't use np.unique here as it would
            # convert a list of tuples to a 2D matrix and so mess up the result
            subset = list(dict.fromkeys(subset))
            obj = self.getitem_column_array(subset)
        else:
            obj = self

        agg_method = (
            SeriesGroupByDefault if series_groupby else GroupByDefault
        ).get_aggregation_method(how)
        original_agg_func = agg_func

        def agg_func(grp, *args, **kwargs):
            result = agg_method(grp, original_agg_func, *args, **kwargs)

            # Convert Series to DataFrame
            if result.ndim == 1:
                result = result.to_frame(
                    MODIN_UNNAMED_SERIES_LABEL if result.name is None else result.name
                )

            return result

        result = obj._modin_frame.groupby(
            axis=axis,
            internal_by=internal_by,
            external_by=[
                obj._modin_frame if isinstance(obj, type(self)) else obj
                for obj in external_by
            ],
            by_positions=by_positions,
            series_groupby=series_groupby,
            operator=lambda grp: agg_func(grp, *agg_args, **agg_kwargs),
            # UDFs passed to '.apply()' are allowed to produce results with arbitrary shapes,
            # that's why we have to align the partition's shapes/labeling across different
            # row partitions
            align_result_columns=how == "group_wise",
            add_missing_cats=add_missing_cats,
            **groupby_kwargs,
        )
        result_qc: PandasQueryCompiler = self.__constructor__(result)

        if not is_transform and not groupby_kwargs.get("as_index", True):
            return result_qc.reset_index(drop=True)

        return result_qc

    def groupby_corr(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        ErrorMessage.default_to_pandas("`GroupBy.corr`")
        # TODO(https://github.com/modin-project/modin/issues/1323) implement this.
        # Right now, using this class's groupby_agg method, even with how="group_wise",
        # produces a result with the wrong index, so default to pandas by using the
        # super class's groupby_agg method.
        return super().groupby_agg(
            by=by,
            agg_func="corr",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    def groupby_cov(
        self,
        by,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        ErrorMessage.default_to_pandas("`GroupBy.cov`")
        # TODO(https://github.com/modin-project/modin/issues/1322) implement this.
        # Right now, using this class's groupby_agg method, even with how="group_wise",
        # produces a result with the wrong index, so default to pandas by using the
        # super class's groupby_agg method.
        return super().groupby_agg(
            by=by,
            agg_func="cov",
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
        )

    def groupby_rolling(
        self,
        by,
        agg_func,
        axis,
        groupby_kwargs,
        rolling_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
    ):
        # 'corr' and 'cov' require knowledge about the whole row axis (all columns have
        # to be available in the same partitions), this requirement is not being satisfied
        # in the current groupby implementation
        unsupported_groupby = (
            agg_func in ("corr", "cov") or rolling_kwargs.get("on") is not None
        )

        if isinstance(agg_func, str):
            str_func = agg_func

            def agg_func(window, *args, **kwargs):
                return getattr(window, str_func)(*args, **kwargs)

        else:
            assert callable(agg_func)

        kwargs = {
            "by": by,
            "agg_func": lambda grp, *args, **kwargs: agg_func(
                grp.rolling(**rolling_kwargs), *args, **kwargs
            ),
            "axis": axis,
            "groupby_kwargs": groupby_kwargs,
            "agg_args": agg_args,
            "agg_kwargs": agg_kwargs,
            "how": "direct",
            "drop": drop,
        }

        if unsupported_groupby:
            return super(PandasQueryCompiler, self).groupby_agg(**kwargs)

        try:
            return self._groupby_shuffle(**kwargs)
        except NotImplementedError as e:
            get_logger().info(
                f"Can't use range-partitioning groupby implementation because of: {e}"
                + "\nFalling back to a full-axis implementation."
            )
            return self.groupby_agg(**kwargs)

    def groupby_agg(
        self,
        by,
        agg_func,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        how="axis_wise",
        drop=False,
        series_groupby=False,
    ):
        # Defaulting to pandas in case of an empty frame as we can't process it properly.
        # Higher API level won't pass empty data here unless the frame has delayed
        # computations. So we apparently lose some laziness here (due to index access)
        # because of the inability to process empty groupby natively.
        if len(self.columns) == 0 or len(self._modin_frame) == 0:
            return super().groupby_agg(
                by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, how, drop
            )

        # 'group_wise' means 'groupby.apply()'. We're certain that range-partitioning groupby
        # always works better for '.apply()', so we're using it regardless of the 'RangePartitioning'
        # value
        if how == "group_wise" or RangePartitioning.get():
            try:
                return self._groupby_shuffle(
                    by=by,
                    agg_func=agg_func,
                    axis=axis,
                    groupby_kwargs=groupby_kwargs,
                    agg_args=agg_args,
                    agg_kwargs=agg_kwargs,
                    drop=drop,
                    how=how,
                    series_groupby=series_groupby,
                )
            except NotImplementedError as e:
                # if a user wants to use range-partitioning groupby explicitly, then we should print a visible
                # warning to them on a failure, otherwise we're only logging it
                message = (
                    f"Can't use range-partitioning groupby implementation because of: {e}"
                    + "\nFalling back to a full-axis implementation."
                )
                get_logger().info(message)
                if RangePartitioning.get():
                    ErrorMessage.warn(message)

        if isinstance(agg_func, dict) and GroupbyReduceImpl.has_impl_for(agg_func):
            return self._groupby_dict_reduce(
                by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, drop
            )

        is_transform_method = how == "transform" or (
            isinstance(agg_func, str) and agg_func in transformation_kernels
        )

        original_agg_func = agg_func

        if isinstance(agg_func, dict):
            assert (
                how == "axis_wise"
            ), f"Only 'axis_wise' aggregation is supported with dictionary functions, got: {how}"
        else:
            agg_method = (
                SeriesGroupByDefault if series_groupby else GroupByDefault
            ).get_aggregation_method(how)

            def agg_func(grp, *args, **kwargs):
                return agg_method(grp, original_agg_func, *args, **kwargs)

        # since we're going to modify `groupby_kwargs` dict in a `groupby_agg_builder`,
        # we want to copy it to not propagate these changes into source dict, in case
        # of unsuccessful end of function
        groupby_kwargs = groupby_kwargs.copy()

        as_index = groupby_kwargs.get("as_index", True)
        external_by, internal_by, _ = self._groupby_separate_by(by, drop)
        internal_qc = (
            [self.getitem_column_array(internal_by)] if len(internal_by) else []
        )
        by = internal_qc + external_by

        broadcastable_by = [o._modin_frame for o in by if isinstance(o, type(self))]
        not_broadcastable_by = [o for o in by if not isinstance(o, type(self))]

        def groupby_agg_builder(df, by=None, drop=False, partition_idx=None):
            """
            Compute groupby aggregation for a single partition.

            Parameters
            ----------
            df : pandas.DataFrame
                Partition of the self frame.
            by : pandas.DataFrame, optional
                Broadcasted partition which contains `by` columns.
            drop : bool, default: False
                Indicates whether `by` partition came from the `self` frame.
            partition_idx : int, optional
                Positional partition index along groupby axis.

            Returns
            -------
            pandas.DataFrame
                DataFrame containing the result of groupby aggregation
                for this particular partition.
            """
            # Set `as_index` to True to track the metadata of the grouping object
            # It is used to make sure that between phases we are constructing the
            # right index and placing columns in the correct order.
            groupby_kwargs["as_index"] = True

            # We have to filter func-dict BEFORE inserting broadcasted 'by' columns
            # to avoid multiple aggregation results for 'by' cols in case they're
            # present in the func-dict:
            partition_agg_func = GroupByReduce.get_callable(agg_func, df)

            internal_by_cols = pandas.Index([])
            missed_by_cols = pandas.Index([])

            if by is not None:
                internal_by_df = by[internal_by]

                if isinstance(internal_by_df, pandas.Series):
                    internal_by_df = internal_by_df.to_frame()

                missed_by_cols = internal_by_df.columns.difference(df.columns)
                if len(missed_by_cols) > 0:
                    df = pandas.concat(
                        [df, internal_by_df[missed_by_cols]],
                        axis=1,
                        copy=False,
                    )

                internal_by_cols = internal_by_df.columns

                external_by = by.columns.difference(internal_by).unique()
                external_by_df = by[external_by].squeeze(axis=1)

                if isinstance(external_by_df, pandas.DataFrame):
                    external_by_cols = [o for _, o in external_by_df.items()]
                else:
                    external_by_cols = [external_by_df]

                by = internal_by_cols.tolist() + external_by_cols

            else:
                by = []

            by += not_broadcastable_by
            level = groupby_kwargs.get("level", None)
            if level is not None and not by:
                by = None
                by_length = len(level) if is_list_like(level) else 1
            else:
                by_length = len(by)

            def compute_groupby(df, drop=False, partition_idx=0):
                """Compute groupby aggregation for a single partition."""
                target_df = df.squeeze(axis=1) if series_groupby else df
                grouped_df = target_df.groupby(by=by, axis=axis, **groupby_kwargs)
                try:
                    result = partition_agg_func(grouped_df, *agg_args, **agg_kwargs)
                except DataError:
                    # This happens when the partition is filled with non-numeric data and a
                    # numeric operation is done. We need to build the index here to avoid
                    # issues with extracting the index.
                    result = pandas.DataFrame(index=grouped_df.size().index)
                if isinstance(result, pandas.Series):
                    result = result.to_frame(
                        result.name
                        if result.name is not None
                        else MODIN_UNNAMED_SERIES_LABEL
                    )

                selection = agg_func.keys() if isinstance(agg_func, dict) else None
                if selection is None:
                    # Some pandas built-in aggregation functions aggregate 'by' columns
                    # (for example 'apply', 'dtypes', maybe more...). Since we make sure
                    # that all of the 'by' columns are presented in every partition by
                    # inserting the missed ones, we will end up with all of the 'by'
                    # columns being aggregated in every partition. To avoid duplications
                    # in the result we drop all of the 'by' columns that were inserted
                    # in this partition AFTER handling 'as_index' parameter. The order
                    # is important for proper naming-conflicts handling.
                    misaggregated_cols = missed_by_cols.intersection(result.columns)
                else:
                    misaggregated_cols = []

                if not as_index:
                    GroupBy.handle_as_index_for_dataframe(
                        result,
                        internal_by_cols,
                        by_cols_dtypes=df[internal_by_cols].dtypes.values,
                        by_length=by_length,
                        selection=selection,
                        partition_idx=partition_idx,
                        drop=drop,
                        inplace=True,
                        method="transform" if is_transform_method else None,
                    )
                else:
                    new_index_names = tuple(
                        (
                            None
                            if isinstance(name, str)
                            and name.startswith(MODIN_UNNAMED_SERIES_LABEL)
                            else name
                        )
                        for name in result.index.names
                    )
                    result.index.names = new_index_names

                if len(misaggregated_cols) > 0:
                    result.drop(columns=misaggregated_cols, inplace=True)

                return result

            try:
                return compute_groupby(df, drop, partition_idx)
            except (ValueError, KeyError):
                # This will happen with Arrow buffer read-only errors. We don't want to copy
                # all the time, so this will try to fast-path the code first.
                return compute_groupby(df.copy(), drop, partition_idx)

        if isinstance(original_agg_func, dict):
            apply_indices = list(agg_func.keys())
        elif isinstance(original_agg_func, list):
            apply_indices = self.columns.difference(internal_by).tolist()
        else:
            apply_indices = None

        if (
            # For now handling only simple cases, where 'by' columns are described by a single query compiler
            agg_kwargs.get("as_index", True)
            and len(not_broadcastable_by) == 0
            and len(broadcastable_by) == 1
            and broadcastable_by[0].has_materialized_dtypes
        ):
            new_index = ModinIndex(
                # actual value will be assigned on a parent update
                value=None,
                axis=0,
                dtypes=broadcastable_by[0].dtypes,
            )
        else:
            new_index = None

        new_modin_frame = self._modin_frame.broadcast_apply_full_axis(
            axis=axis,
            func=lambda df, by=None, partition_idx=None: groupby_agg_builder(
                df, by, drop, partition_idx
            ),
            other=broadcastable_by,
            new_index=new_index,
            apply_indices=apply_indices,
            enumerate_partitions=True,
        )
        result = self.__constructor__(new_modin_frame)

        # that means that exception in `compute_groupby` was raised
        # in every partition, so we also should raise it
        if (
            len(result.columns) == 0
            and len(self.columns) != 0
            and agg_kwargs.get("numeric_only", False)
        ):
            raise TypeError("No numeric types to aggregate.")

        return result

    # END Manual Partitioning methods

    def pivot(self, index, columns, values):
        from pandas.core.reshape.pivot import _convert_by

        def __convert_by(by):
            """Convert passed value to a list."""
            if isinstance(by, pandas.Index):
                by = list(by)
            by = _convert_by(by)
            if (
                len(by) > 0
                and (not is_list_like(by[0]) or isinstance(by[0], tuple))
                and not all([key in self.columns for key in by])
            ):
                by = [by]
            return by

        index, columns, values = map(__convert_by, [index, columns, values])
        is_custom_index = (
            len(index) == 1
            and is_list_like(index[0])
            and not isinstance(index[0], tuple)
        )

        if is_custom_index or len(index) == 0:
            to_reindex = columns
        else:
            to_reindex = index + columns

        if len(values) != 0:
            obj = self.getitem_column_array(to_reindex + values)
        else:
            obj = self

        if is_custom_index:
            obj.index = index

        reindexed = self.__constructor__(
            obj._modin_frame.apply_full_axis(
                1,
                lambda df: df.set_index(to_reindex, append=(len(to_reindex) == 1)),
                new_columns=obj.columns.drop(to_reindex),
            )
        )

        unstacked = reindexed.unstack(level=columns, fill_value=None)
        if len(reindexed.columns) == 1 and unstacked.columns.nlevels > 1:
            unstacked.columns = unstacked.columns.droplevel(0)

        return unstacked

    def pivot_table(
        self,
        index,
        values,
        columns,
        aggfunc,
        fill_value,
        margins,
        dropna,
        margins_name,
        observed,
        sort,
    ):
        ErrorMessage.mismatch_with_pandas(
            operation="pivot_table",
            message="Order of columns could be different from pandas",
        )

        from pandas.core.reshape.pivot import _convert_by

        def __convert_by(by):
            """Convert passed value to a list."""
            if isinstance(by, pandas.Index):
                return list(by)
            return _convert_by(by)

        is_1d_values = values is not None and not is_list_like(values)
        index, columns = map(__convert_by, [index, columns])

        if len(index) + len(columns) == 0:
            raise ValueError("No group keys passed!")

        if is_1d_values and len(index) > 0 and len(columns) > 0:
            drop_column_level = 1 if isinstance(aggfunc, list) else 0
        else:
            drop_column_level = None

        # if the value is 'None' it will be converted to an empty list (no columns to aggregate),
        # which is invalid for 'values', as 'None' means aggregate ALL columns instead
        if values is not None:
            values = __convert_by(values)

        # using 'pandas.unique' instead of 'numpy' as it guarantees to not change the original order
        unique_keys = pandas.Series(index + columns).unique()

        kwargs = {
            "qc": self,
            "unique_keys": unique_keys,
            "drop_column_level": drop_column_level,
            "pivot_kwargs": {
                "index": index,
                "values": values,
                "columns": columns,
                "aggfunc": aggfunc,
                "fill_value": fill_value,
                "margins": margins,
                "dropna": dropna,
                "margins_name": margins_name,
                "observed": observed,
                "sort": sort,
            },
        }

        try:
            return PivotTableImpl.map_reduce_impl(**kwargs)
        except NotImplementedError as e:
            message = (
                f"Can't use MapReduce 'pivot_table' implementation because of: {e}"
                + "\nFalling back to a range-partitioning implementation."
            )
            get_logger().info(message)

        try:
            return PivotTableImpl.range_partition_impl(**kwargs)
        except NotImplementedError as e:
            message = (
                f"Can't use range-partitioning 'pivot_table' implementation because of: {e}"
                + "\nFalling back to a full-axis implementation."
            )
            get_logger().info(message)

        return PivotTableImpl.full_axis_impl(**kwargs)

    # Get_dummies
    def get_dummies(self, columns, **kwargs):
        # `columns` as None does not mean all columns, by default it means only
        # non-numeric columns.
        if columns is None:
            columns = [c for c in self.columns if not is_numeric_dtype(self.dtypes[c])]
            # If we aren't computing any dummies, there is no need for any
            # remote compute.
            if len(columns) == 0:
                return self.copy()
        elif not is_list_like(columns):
            columns = [columns]

        def map_fn(df):  # pragma: no cover
            cols_to_encode = df.columns.intersection(columns)
            return pandas.get_dummies(df, columns=cols_to_encode, **kwargs)

        # In some cases, we are mapping across all of the data. It is more
        # efficient if we are mapping over all of the data to do it this way
        # than it would be to reuse the code for specific columns.
        if len(columns) == len(self.columns):
            new_modin_frame = self._modin_frame.apply_full_axis(
                0, map_fn, new_index=self.index, dtypes=bool
            )
            untouched_frame = None
        else:
            new_modin_frame = self._modin_frame.take_2d_labels_or_positional(
                col_labels=columns
            ).apply_full_axis(0, map_fn, new_index=self.index, dtypes=bool)
            untouched_frame = self.drop(columns=columns)
        # If we mapped over all the data we are done. If not, we need to
        # prepend the `new_modin_frame` with the raw data from the columns that were
        # not selected.
        if len(columns) != len(self.columns):
            new_modin_frame = untouched_frame._modin_frame.concat(
                1, [new_modin_frame], how="left", sort=False
            )
        return self.__constructor__(new_modin_frame)

    # END Get_dummies

    # Indexing
    def take_2d_positional(self, index=None, columns=None):
        return self.__constructor__(
            self._modin_frame.take_2d_labels_or_positional(
                row_positions=index, col_positions=columns
            )
        )

    def write_items(
        self, row_numeric_index, col_numeric_index, item, need_columns_reindex=True
    ):
        # We have to keep this import away from the module level to avoid circular import
        from modin.pandas.utils import broadcast_item, is_scalar

        def iloc_mut(partition, row_internal_indices, col_internal_indices, item):
            """
            Write `value` in a specified location in a single partition.

            Parameters
            ----------
            partition : pandas.DataFrame
                Partition of the self frame.
            row_internal_indices : list of ints
                Positional indices of rows in this particular partition
                to write `item` to.
            col_internal_indices : list of ints
                Positional indices of columns in this particular partition
                to write `item` to.
            item : 2D-array
                Value to write.

            Returns
            -------
            pandas.DataFrame
                Partition data with updated values.
            """
            partition = partition.copy()
            try:
                partition.iloc[row_internal_indices, col_internal_indices] = item
            except ValueError:
                # `copy` is needed to avoid "ValueError: buffer source array is read-only" for `item`
                # because the item may be converted to the type that is in the dataframe.
                # TODO: in the future we will need to convert to the correct type manually according
                # to the following warning. Example: "FutureWarning: Setting an item of incompatible
                # dtype is deprecated and will raise in a future error of pandas. Value '[1.38629436]'
                # has dtype incompatible with int64, please explicitly cast to a compatible dtype first."
                partition.iloc[row_internal_indices, col_internal_indices] = item.copy()
            return partition

        if not is_scalar(item):
            (
                broadcasted_item,
                broadcasted_dtypes,
                row_numeric_index,
                col_numeric_index,
            ) = broadcast_item(
                self,
                row_numeric_index,
                col_numeric_index,
                item,
                need_columns_reindex=need_columns_reindex,
            )
        else:
            broadcasted_item, broadcasted_dtypes = item, pandas.Series(
                [extract_dtype(item)] * len(col_numeric_index)
            )

        new_dtypes = None
        if (
            # compute dtypes only if assigning entire columns
            isinstance(row_numeric_index, slice)
            and row_numeric_index == slice(None)
            and self.frame_has_materialized_dtypes
        ):
            new_dtypes = self.dtypes.copy()
            new_dtypes.iloc[col_numeric_index] = broadcasted_dtypes.values

        new_modin_frame = self._modin_frame.apply_select_indices(
            axis=None,
            func=iloc_mut,
            row_labels=row_numeric_index,
            col_labels=col_numeric_index,
            new_index=self.index,
            new_columns=self.columns,
            new_dtypes=new_dtypes,
            keep_remaining=True,
            item_to_distribute=broadcasted_item,
        )
        return self.__constructor__(new_modin_frame)

    def sort_rows_by_column_values(self, columns, ascending=True, **kwargs):
        new_modin_frame = self._modin_frame.sort_by(
            0, columns, ascending=ascending, **kwargs
        )
        return self.__constructor__(new_modin_frame)

    def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):
        if not is_list_like(rows):
            rows = [rows]
        ErrorMessage.default_to_pandas("sort_values")
        broadcast_value_list = [
            self.getitem_row_array([row]).to_pandas() for row in rows
        ]
        index_builder = list(zip(broadcast_value_list, rows))
        broadcast_values = pandas.concat(
            [row for row, idx in index_builder], copy=False
        )
        broadcast_values.columns = self.columns
        new_columns = broadcast_values.sort_values(
            by=rows, axis=1, ascending=ascending, **kwargs
        ).columns
        return self.reindex(axis=1, labels=new_columns)

    # Cat operations
    def cat_codes(self):
        def func(df: pandas.DataFrame) -> pandas.DataFrame:
            ser = df.iloc[:, 0]
            return ser.cat.codes.to_frame(name=MODIN_UNNAMED_SERIES_LABEL)

        res = self._modin_frame.map(func=func, new_columns=[MODIN_UNNAMED_SERIES_LABEL])
        return self.__constructor__(res, shape_hint="column")

    # END Cat operations

    def compare(self, other, **kwargs):
        return self.__constructor__(
            self._modin_frame.broadcast_apply_full_axis(
                0,
                lambda left, right: pandas.DataFrame.compare(
                    left, other=right, **kwargs
                ),
                other._modin_frame,
            )
        )

    def case_when(self, caselist):
        qc_type = type(self)
        caselist = [
            tuple(
                data._modin_frame if isinstance(data, qc_type) else data
                for data in case_tuple
            )
            for case_tuple in caselist
        ]
        return self.__constructor__(
            self._modin_frame.case_when(caselist),
            shape_hint=self._shape_hint,
        )


================================================
FILE: modin/core/storage_formats/pandas/query_compiler_caster.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains ``QueryCompilerCaster`` class.

``QueryCompilerCaster`` is used for automatically casting query compiler
arguments to the type of the current query compiler for query compiler class functions.
This ensures compatibility between different query compiler classes.
"""

import functools
import inspect
import random
from abc import ABC, abstractmethod
from collections import defaultdict, namedtuple
from types import FunctionType, MappingProxyType, MethodType
from typing import Any, Callable, Dict, Optional, Tuple, TypeVar, Union, ValuesView

import pandas
from pandas.core.indexes.frozen import FrozenList
from typing_extensions import Self

from modin.config import AutoSwitchBackend, Backend, BackendMergeCastInPlace
from modin.config import context as config_context
from modin.core.storage_formats.base.query_compiler import (
    BaseQueryCompiler,
    QCCoercionCost,
)
from modin.core.storage_formats.base.query_compiler_calculator import (
    BackendCostCalculator,
    all_switchable_backends,
)
from modin.error_message import ErrorMessage
from modin.logging import disable_logging, get_logger
from modin.logging.metrics import emit_metric
from modin.utils import _inherit_docstrings, sentinel

Fn = TypeVar("Fn", bound=Any)

# Constant for the default class name when class_of_wrapped_fn is None
# (represents functions in the modin.pandas module)
MODIN_PANDAS_MODULE_NAME = "modin.pandas"


def _normalize_class_name(class_of_wrapped_fn: Optional[str]) -> str:
    """
    Normalize class name for logging and operation tracking.

    Parameters
    ----------
    class_of_wrapped_fn : Optional[str]
        The name of the class that the function belongs to. `None` for functions
        in the modin.pandas module.

    Returns
    -------
    str
        The normalized class name. Returns "modin.pandas" if input is None.
    """
    return (
        class_of_wrapped_fn
        if class_of_wrapped_fn is not None
        else MODIN_PANDAS_MODULE_NAME
    )


# This type describes a defaultdict that maps backend name (or `None` for
# method implementation and not bound to any one extension) to the dictionary of
# extensions for that backend. The keys of the inner dictionary are the names of
# the extensions, and the values are the extensions themselves.
EXTENSION_DICT_TYPE = defaultdict[Optional[str], dict[str, Any]]


_NON_EXTENDABLE_ATTRIBUTES = {
    # we use these attributes to implement casting and backend dispatching, so
    # we can't allow extensions to override them.
    "__getattribute__",
    "__setattr__",
    "__delattr__",
    "__getattr__",
    "_getattribute__from_extension_impl",
    "_getattr__from_extension_impl",
    "get_backend",
    "move_to",
    "set_backend",
    "_get_extension",
    "_query_compiler",
    "_get_query_compiler",
    "_copy_into",
    "_update_inplace",
    "is_backend_pinned",
    "_set_backend_pinned",
    "pin_backend",
    "unpin_backend",
    "__dict__",
}


# Do not look up these attributes when searching for extensions. We use them
# to implement the extension lookup itself.
EXTENSION_NO_LOOKUP = {
    "_get_extension",
    "_query_compiler",
    "get_backend",
    "_getattribute__from_extension_impl",
    "_getattr__from_extension_impl",
    "_get_query_compiler",
    "set_backend",
    "_pinned",
    "is_backend_pinned",
    "_set_backend_pinned",
    "pin_backend",
    "unpin_backend",
    "_update_inplace",
}


BackendAndClassName = namedtuple("BackendAndClassName", ["backend", "class_name"])

_AUTO_SWITCH_CLASS = defaultdict[BackendAndClassName, set[str]]

_CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS: _AUTO_SWITCH_CLASS = _AUTO_SWITCH_CLASS(
    set
)

_CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS: _AUTO_SWITCH_CLASS = _AUTO_SWITCH_CLASS(
    set
)


def _get_empty_qc_for_default_backend() -> BaseQueryCompiler:
    """
    Get an empty query compiler for the default backend.

    Returns
    -------
    BaseQueryCompiler
        An empty query compiler for the default backend.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return FactoryDispatcher.get_factory().io_cls.from_pandas(pandas.DataFrame())


_BACKEND_TO_EMPTY_QC: defaultdict[str, BaseQueryCompiler] = defaultdict(
    _get_empty_qc_for_default_backend
)


class QueryCompilerCaster(ABC):
    """Cast all query compiler arguments of the member function to current query compiler."""

    @classmethod
    def __init_subclass__(
        cls,
        **kwargs: Dict,
    ) -> None:
        """
        Apply type casting to all children of ``QueryCompilerCaster``.

        This method is called automatically when a class inherits from
        ``QueryCompilerCaster``. It ensures that all member functions within the
        subclass have their arguments automatically casted to the current query
        compiler type.

        Parameters
        ----------
        **kwargs : Additional keyword arguments
        """
        super().__init_subclass__(**kwargs)
        apply_argument_cast_to_class(cls)

    @abstractmethod
    def _get_query_compiler(self) -> Optional[BaseQueryCompiler]:
        """
        Get the query compiler storing data for this object.

        Returns
        -------
        Optional[BaseQueryCompiler]
            The query compiler storing data for this object, if it exists.
            Otherwise, None.
        """
        pass

    @abstractmethod
    def is_backend_pinned(self) -> bool:
        """
        Get whether this object's data is pinned to a particular backend.

        Returns
        -------
        bool
            True if the data is pinned.
        """
        pass

    @abstractmethod
    def _set_backend_pinned(self, pinned: bool, inplace: bool) -> Optional[Self]:
        """
        Update whether this object's data is pinned to a particular backend.

        Parameters
        ----------
        pinned : bool
            Whether the data is pinned.

        inplace : bool, default: False
            Whether to update the object in place.

        Returns
        -------
        Optional[Self]
            The object with the new pin state, if `inplace` is False. Otherwise, None.
        """
        pass

    def pin_backend(self, inplace: bool = False) -> Optional[Self]:
        """
        Pin the object's underlying data, preventing Modin from automatically moving it to another backend.

        Parameters
        ----------
        inplace : bool, default: False
            Whether to update the object in place.

        Returns
        -------
        Optional[Self]
            The newly-pinned object, if `inplace` is False. Otherwise, None.
        """
        return self._set_backend_pinned(True, inplace)

    def unpin_backend(self, inplace: bool = False) -> Optional[Self]:
        """
        Unpin the object's underlying data, allowing Modin to automatically move it to another backend.

        Parameters
        ----------
        inplace : bool, default: False
            Whether to update the object in place.

        Returns
        -------
        Optional[Self]
            The newly-unpinned object, if `inplace` is False. Otherwise, None.
        """
        return self._set_backend_pinned(False, inplace)

    @abstractmethod
    def get_backend(self) -> str:
        """
        Get the backend of this object.

        Returns
        -------
        str
            The backend of this object. The backend name must be title-cased.
        """
        pass

    @abstractmethod
    def set_backend(
        self,
        backend: str,
        inplace: bool = False,
        *,
        switch_operation: Optional[str] = None,
    ) -> Optional[Self]:
        """
        Set the backend of this object.

        Parameters
        ----------
        backend : str
            The new backend.

        inplace : bool, default: False
            Whether to update the object in place.

        switch_operation : Optional[str], default: None
            The name of the operation that triggered the set_backend call.
            Internal argument used for displaying progress bar information.

        Returns
        -------
        Optional[Self]
            The object with the new backend, if `inplace` is False. Otherwise, None.
        """
        pass

    @_inherit_docstrings(set_backend)
    def move_to(
        self,
        backend: str,
        inplace: bool = False,
        *,
        switch_operation: Optional[str] = None,
    ) -> Optional[Self]:
        return self.set_backend(
            backend=backend, inplace=inplace, switch_operation=switch_operation
        )

    @abstractmethod
    def _copy_into(self, other: Self) -> None:
        """
        Copy the data from this object into another object of the same type.

        Parameters
        ----------
        other : Self
            The object to copy data into.
        """
        pass

    @disable_logging
    def _get_extension(self, name: str, extensions: EXTENSION_DICT_TYPE) -> Any:
        """
        Get an extension with the given name from the given set of extensions.

        Parameters
        ----------
        name : str
            The name of the extension.
        extensions : EXTENSION_DICT_TYPE
            The set of extensions.

        Returns
        -------
        Any
            The extension with the given name, or `sentinel` if the extension is not found.
        """
        if self._get_query_compiler() is not None:
            extensions_for_backend = extensions[self.get_backend()]
            if name in extensions_for_backend:
                return extensions_for_backend[name]
            if name in extensions[None]:
                return extensions[None][name]
        return sentinel

    @disable_logging
    def _getattribute__from_extension_impl(
        self, item: str, extensions: EXTENSION_DICT_TYPE
    ):
        """
        __getatttribute__() an extension with the given name from the given set of extensions.

        Implement __getattribute__() for extensions. Python calls
        __getattribute_() every time you access an attribute of an object.

        Parameters
        ----------
        item : str
            The name of the attribute to get.
        extensions : EXTENSION_DICT_TYPE
            The set of extensions.

        Returns
        -------
        Any
            The attribute from the extension, or `sentinel` if the attribute is
            not found.
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(item, extensions)
        if (
            extension is not sentinel
            # We should implement callable extensions by wrapping them in
            # methods that dispatch to the corrrect backend. We should get the
            # wrapped method with the usual object.__getattribute__() method
            # lookup rather than by getting a particular extension when we call
            # __getattribute__(). For example, if we've extended sort_values(),
            # then __getattribute__('sort_values') should return a wrapper that
            # calls the correct extension once it's invoked.
            and not callable(extension)
        ):
            return (
                extension.__get__(self) if hasattr(extension, "__get__") else extension
            )
        return sentinel

    @disable_logging
    def _getattr__from_extension_impl(
        self,
        key: str,
        default_behavior_attributes: set[str],
        extensions: EXTENSION_DICT_TYPE,
    ) -> Any:
        """
        Implement __getattr__, which the python interpreter falls back to if __getattribute__ raises AttributeError.

        We override this method to make sure we try to get the extension
        attribute for `key`, even if this class has a different
        attribute for `key`.

        Parameters
        ----------
        key : str
            Attribute name.
        default_behavior_attributes : set[str]
            The set of attributes for which we should follow the default
            __getattr__ behavior and not try to get the extension.
        extensions : EXTENSION_DICT_TYPE
            The set of extensions.

        Returns
        -------
        The value of the attribute.
        """
        if key not in default_behavior_attributes:
            # If this class has a an extension for `key`, but __getattribute__()
            # for the extension raises an AttributeError, we end up in this
            # method, which should try getting the extension again (and
            # probably raise the AttributeError that
            # _getattribute__from_extension_impl() originally raised), rather
            # than following back to object.__getattribute__().
            extensions_result = self._getattribute__from_extension_impl(key, extensions)
            # If extensions_result is not `sentinel`, __getattribute__() should have
            # returned it first.
            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=extensions_result is not sentinel,
                extra_log=(
                    "This object should return extensions via "
                    + "__getattribute__ rather than __getattr__"
                ),
            )
        return object.__getattribute__(self, key)


def visit_nested_args(arguments, fn: callable):
    """
    Visit each argument recursively, calling fn on each one.

    Parameters
    ----------
    arguments : tuple or dict
    fn : Callable to apply to matching arguments

    Returns
    -------
    tuple or dict
        Returns args and kwargs with all query compilers casted to current_qc.
    """
    if isinstance(arguments, pandas.NamedAgg):
        # NamedAgg needs special treatment because it's an immutable subclass
        # of tuple that can't be constructed from another tuple.
        return pandas.NamedAgg(
            column=fn(arguments.column), aggfunc=fn(arguments.aggfunc)
        )
    immutable_types = (FrozenList, tuple, ValuesView)
    if isinstance(arguments, immutable_types):
        args_type = type(arguments)
        return (
            # ValuesView, which we might get from dict.values(), is immutable,
            # but not constructable, so we convert it to a tuple. Otherwise,
            # we return an object of the same type as the input.
            tuple
            if issubclass(args_type, ValuesView)
            else args_type
        )(visit_nested_args(list(arguments), fn))
    types_to_recursively_visit = (list, dict, *immutable_types)
    if isinstance(
        arguments,
        list,
    ):
        for i in range(len(arguments)):
            if isinstance(arguments[i], types_to_recursively_visit):
                visit_nested_args(arguments[i], fn)
            else:
                arguments[i] = fn(arguments[i])
    elif isinstance(arguments, dict):
        for key in arguments:
            if isinstance(arguments[key], types_to_recursively_visit):
                visit_nested_args(arguments[key], fn)
            else:
                arguments[key] = fn(arguments[key])
    return arguments


def _assert_casting_functions_wrap_same_implementation(
    m1: callable, m2: callable
) -> None:
    """
    Assert that two casting wrappers wrap the same implementation.

    Parameters
    ----------
    m1 : callable
        The first casting wrapper.
    m2 : callable
        The second casting wrapper.

    Raises
    ------
    AssertionError
        If the two casting wrappers wrap different implementations.
    """
    assert (
        # For cases like (m1=Series.agg, m2=Series.aggregate), where Series
        # defines its own method and aliases it, the two wrapped methods
        # are the same.
        m2._wrapped_method_for_casting is m1._wrapped_method_for_casting
        # For cases like (m1=Series.kurt, m2=Series.kurtosis), where Series
        # inherits both kurt and kurtosis from BasePandasDataset but does
        # not define its own implementation of either,
        # Series.kurt._wrapped_method_for_casting points to
        # BasePandasDataset.kurt, which is not the same as
        # BasePandasDataset.kurtosis. In that case, we need to go one level
        # deeper to compare the wrapped methods of the two aliases of
        # BasePandasDataset.
        or m2._wrapped_method_for_casting._wrapped_method_for_casting
        is m1._wrapped_method_for_casting._wrapped_method_for_casting
    )


def apply_argument_cast_to_class(klass: type) -> type:
    """
    Apply argument casting to all functions in a class.

    Parameters
    ----------
    klass : type
        The class to apply argument casting to.

    Returns
    -------
    type
        The class with argument casting applied to all functions.
    """
    all_attrs = dict(inspect.getmembers(klass))
    # This is required because inspect converts class methods to member functions
    current_class_attrs = vars(klass)
    for key in current_class_attrs:
        all_attrs[key] = current_class_attrs[key]

    for attr_name, attr_value in all_attrs.items():
        if attr_name in _NON_EXTENDABLE_ATTRIBUTES or not isinstance(
            attr_value, (FunctionType, classmethod, staticmethod)
        ):
            continue

        implementation_function = (
            attr_value.__func__
            if isinstance(attr_value, (classmethod, staticmethod))
            else attr_value
        )
        if attr_name not in klass._extensions[None]:
            # Register the original implementation as the default
            # extension. We fall back to this implementation if the
            # object's backend does not have an implementation for this
            # method.
            klass._extensions[None][attr_name] = implementation_function

        casting_implementation = wrap_function_in_argument_caster(
            klass=klass,
            f=implementation_function,
            wrapping_function_type=(
                classmethod
                if isinstance(attr_value, classmethod)
                else (
                    staticmethod if isinstance(attr_value, staticmethod) else MethodType
                )
            ),
            extensions=klass._extensions,
            name=attr_name,
        )
        wrapped = (
            classmethod(casting_implementation)
            if isinstance(attr_value, classmethod)
            else (
                staticmethod(casting_implementation)
                if isinstance(attr_value, staticmethod)
                else casting_implementation
            )
        )
        if attr_name not in klass.__dict__:
            # If this class's method comes from a superclass (i.e.
            # it's not in klass.__dict__), mark it so that
            # modin.utils._inherit_docstrings knows that the method
            # must get its docstrings from its superclass.
            wrapped._wrapped_superclass_method = attr_value
        setattr(klass, attr_name, wrapped)

    return klass


def _maybe_switch_backend_pre_op(
    function_name: str,
    input_qc: BaseQueryCompiler,
    class_of_wrapped_fn: Optional[str],
    arguments: MappingProxyType[str, Any],
) -> tuple[str, Callable[[Any], Any]]:
    """
    Possibly switch backend before a function.

    Parameters
    ----------
    function_name : str
        The name of the function.
    input_qc : BaseQueryCompiler
        The input query compiler.
    class_of_wrapped_fn : Optional[str]
        The name of the class that the function belongs to. `None` for functions
        in the modin.pandas module.
    arguments : MappingProxyType[str, Any]
        Mapping from operation argument names to their values.

    Returns
    -------
    Tuple[str, callable]
        A tuple of the new backend and a function that casts all castable arguments
        to the new query compiler type.
    """
    input_backend = input_qc.get_backend()
    if (
        function_name
        in _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS[
            BackendAndClassName(
                backend=input_qc.get_backend(), class_name=class_of_wrapped_fn
            )
        ]
    ):
        result_backend = _get_backend_for_auto_switch(
            input_qc=input_qc,
            class_of_wrapped_fn=class_of_wrapped_fn,
            function_name=function_name,
            arguments=arguments,
        )
    else:
        result_backend = input_backend

    def cast_to_qc(arg: Any) -> Any:
        if not (
            isinstance(arg, QueryCompilerCaster)
            and arg._get_query_compiler() is not None
            and arg.get_backend() != result_backend
        ):
            return arg
        arg.set_backend(
            result_backend,
            inplace=True,
            switch_operation=f"{_normalize_class_name(class_of_wrapped_fn)}.{function_name}",
        )
        return arg

    return result_backend, cast_to_qc


def _maybe_switch_backend_post_op(
    result: Any,
    function_name: str,
    qc_list: list[BaseQueryCompiler],
    starting_backend: str,
    class_of_wrapped_fn: Optional[str],
    pin_backend: bool,
    arguments: MappingProxyType[str, Any],
) -> Any:
    """
    Possibly switch the backend of the result of a function.

    Use cost-based optimization to determine whether to switch the backend of the
    result of a function. If the function returned a QueryCompilerCaster and the
    cost of switching is less than the cost of staying on the current backend,
    we switch. If there are multiple backends we can switch to, we choose the
    one that minimizes cost_to_move - cost_to_stay.

    Parameters
    ----------
    result : Any
        The result of the function.
    function_name : str
        The name of the function.
    qc_list : list[BaseQueryCompiler]
        The list of query compilers that were arguments to the function.
    starting_backend : str
        The backend used to run the function.
    class_of_wrapped_fn : Optional[str]
        The name of the class that the function belongs to. `None` for functions
        in the modin.pandas module.
    pin_backend : bool
        Whether the result should have its backend pinned, and therefore not moved.
    arguments : MappingProxyType[str, Any]
        Mapping from operation argument names to their values.

    Returns
    -------
    Any
        The result of the function, possibly with its backend switched.
    """
    # If any input QC was pinned, then the output should be as well.
    if pin_backend:
        if isinstance(result, QueryCompilerCaster):
            result.pin_backend(inplace=True)
        return result
    if (
        # only apply post-operation switch to nullary and unary methods
        len(qc_list) in (0, 1)
        and function_name
        in _CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS[
            BackendAndClassName(
                backend=(
                    qc_list[0].get_backend() if len(qc_list) == 1 else starting_backend
                ),
                class_name=class_of_wrapped_fn,
            )
        ]
        # if the operation did not return a query compiler, we can't switch the
        # backend of the result.
        and isinstance(result, QueryCompilerCaster)
        and (input_qc := result._get_query_compiler()) is not None
    ):
        return result.move_to(
            _get_backend_for_auto_switch(
                input_qc=input_qc,
                class_of_wrapped_fn=class_of_wrapped_fn,
                function_name=function_name,
                arguments=arguments,
            ),
            switch_operation=f"{_normalize_class_name(class_of_wrapped_fn)}.{function_name}",
        )
    return result


def _get_backend_for_auto_switch(
    input_qc: BaseQueryCompiler,
    class_of_wrapped_fn: str,
    function_name: str,
    arguments: MappingProxyType[str, Any],
) -> str:
    """
    Get the best backend to switch to.

    Use cost-based optimization to determine whether to switch the backend of the
    arguments to a function. If the cost of switching is less than the cost of
    staying on the current backend, we switch. If there are multiple backends we
    can switch to, we choose the one that minimizes cost_to_move - cost_to_stay.

    Parameters
    ----------
    input_qc : BaseQueryCompiler
        The query compiler representing the starting backend.
    class_of_wrapped_fn : Optional[str]
        The name of the class that the function belongs to. `None` for functions
        in the modin.pandas module.
    function_name : str
        The name of the function.
    arguments : MappingProxyType[str, Any]
        Mapping from operation argument names to their values.

    Returns
    -------
    str
        The name of the best backend to switch to.
    """
    # TODO(https://github.com/modin-project/modin/issues/7503): Make costing
    # methods take backend instead of query compiler type so that we don't
    # have to use the dispatcher to figure out the appropriate type for each
    # backend.
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    # Does not need to be secure, should not use system entropy
    metrics_group = "%04x" % random.randrange(16**4)
    starting_backend = input_qc.get_backend()

    min_move_stay_delta = None
    best_backend = starting_backend

    stay_cost = input_qc.stay_cost(
        api_cls_name=class_of_wrapped_fn,
        operation=function_name,
        arguments=arguments,
    )
    data_max_shape = input_qc._max_shape()
    emit_metric(
        f"hybrid.auto.api.{class_of_wrapped_fn}.{function_name}.group.{metrics_group}",
        1,
    )
    emit_metric(
        f"hybrid.auto.current.{starting_backend}.group.{metrics_group}.stay_cost",
        stay_cost,
    )
    emit_metric(
        f"hybrid.auto.current.{starting_backend}.group.{metrics_group}.rows",
        data_max_shape[0],
    )
    emit_metric(
        f"hybrid.auto.current.{starting_backend}.group.{metrics_group}.cols",
        data_max_shape[1],
    )
    for backend in all_switchable_backends():
        if backend == starting_backend:
            continue
        move_to_class = FactoryDispatcher._get_prepared_factory_for_backend(
            backend=backend
        ).io_cls.query_compiler_cls
        move_to_cost = input_qc.move_to_cost(
            move_to_class,
            api_cls_name=class_of_wrapped_fn,
            operation=function_name,
            arguments=arguments,
        )
        other_execute_cost = move_to_class.move_to_me_cost(
            input_qc,
            api_cls_name=class_of_wrapped_fn,
            operation=function_name,
            arguments=arguments,
        )
        if (
            move_to_cost is not None
            and stay_cost is not None
            and other_execute_cost is not None
        ):
            if stay_cost >= QCCoercionCost.COST_IMPOSSIBLE:
                # We cannot execute the workload on the current engine
                # disregard the move_to_cost and just consider whether
                # the other engine can execute the workload
                move_stay_delta = other_execute_cost - stay_cost
            else:
                # We can execute this workload if we need to, consider
                # move_to_cost/transfer time in our decision
                move_stay_delta = (move_to_cost + other_execute_cost) - stay_cost
            if move_stay_delta < 0 and (
                min_move_stay_delta is None or move_stay_delta < min_move_stay_delta
            ):
                min_move_stay_delta = move_stay_delta
                best_backend = backend
            emit_metric(
                f"hybrid.auto.candidate.{backend}.group.{metrics_group}.move_to_cost",
                move_to_cost,
            )
            emit_metric(
                f"hybrid.auto.candidate.{backend}.group.{metrics_group}.other_execute_cost",
                other_execute_cost,
            )
            emit_metric(
                f"hybrid.auto.candidate.{backend}.group.{metrics_group}.delta",
                move_stay_delta,
            )

            get_logger().info(
                f"After {_normalize_class_name(class_of_wrapped_fn)} function {function_name}, "
                + f"considered moving to backend {backend} with "
                + f"(transfer_cost {move_to_cost} + other_execution_cost {other_execute_cost}) "
                + f", stay_cost {stay_cost}, and move-stay delta "
                + f"{move_stay_delta}"
            )

    if best_backend == starting_backend:
        emit_metric(f"hybrid.auto.decision.{best_backend}.group.{metrics_group}", 0)
        get_logger().info(
            f"Chose not to switch backends after operation {function_name}"
        )
    else:
        emit_metric(f"hybrid.auto.decision.{best_backend}.group.{metrics_group}", 1)
        get_logger().info(f"Chose to move to backend {best_backend}")
    return best_backend


def _get_extension_for_method(
    name: str,
    extensions: EXTENSION_DICT_TYPE,
    backend: str,
    args: tuple,
    wrapping_function_type: Optional[
        Union[type[classmethod], type[staticmethod], type[MethodType]]
    ],
) -> callable:
    """
    Get the extension implementation for a method.

    Parameters
    ----------
    name : str
        The name of the method.
    extensions : EXTENSION_DICT_TYPE
        The extension dictionary for the modin-API-level object (e.g. class
        DataFrame or module modin.pandas) that the method belongs to.
    backend : str
        The backend to use for this method call.
    args : tuple
        The arguments to the method.
    wrapping_function_type : Union[type[classmethod], type[staticmethod], type[MethodType]]
        The type of the original function that `f` implements.
        - `None` means we are wrapping a free function, e.g. pd.concat()
        - `classmethod` means we are wrapping a classmethod.
        - `staticmethod` means we are wrapping a staticmethod.
        - `MethodType` means we are wrapping a regular method of a class.

    Returns
    -------
    callable
        The implementation of the method for the given backend.
    """
    if name in extensions[backend]:
        f_to_apply = extensions[backend][name]
    else:
        if name not in extensions[None]:
            raise AttributeError(
                (
                    # When python invokes a method on an object, it passes the object as
                    # the first positional argument.
                    (
                        f"{(type(args[0]).__name__)} object"
                        if wrapping_function_type is MethodType
                        else "module 'modin.pandas'"
                    )
                    + f" has no attribute {name}"
                )
            )
        f_to_apply = extensions[None][name]
    return f_to_apply


def wrap_function_in_argument_caster(
    klass: Optional[type],
    f: callable,
    name: str,
    wrapping_function_type: Optional[
        Union[type[classmethod], type[staticmethod], type[MethodType]]
    ],
    extensions: EXTENSION_DICT_TYPE,
) -> callable:
    """
    Wrap a function so that it casts all castable arguments to a consistent query compiler, and uses the correct extension implementation for methods.

    Also propagates pin behavior across operations.

    Parameters
    ----------
    klass : Optional[type]
        Class of the function being wrapped.
    f : callable
        The function to wrap.
    name : str
        The name of the function.
    wrapping_function_type : Optional[Union[type[classmethod], type[staticmethod], type[MethodType]]
        The type of the original function that `f` implements.
        - `None` means we are wrapping a free function, e.g. pd.concat()
        - `classmethod` means we are wrapping a classmethod.
        - `staticmethod` means we are wrapping a staticmethod.
        - `MethodType` means we are wrapping a regular method of a class.
    extensions : EXTENSION_DICT_TYPE
        The class of the function we are wrapping. This should be None if
        and only if `wrapping_function_type` is None.

    Returns
    -------
    callable
        The wrapped function.
    """

    @functools.wraps(f)
    def f_with_argument_casting(*args: Tuple, **kwargs: Dict) -> Any:
        """
        Add casting for query compiler arguments.

        Parameters
        ----------
        *args : tuple
            The function arguments.
        **kwargs : dict
            The function keyword arguments.

        Returns
        -------
        Any
        """
        if wrapping_function_type in (classmethod, staticmethod):
            # TODO: currently we don't support any kind of casting or extension
            # for classmethod or staticmethod.
            return f(*args, **kwargs)

        # f() may make in-place updates to some of its arguments. If we cast
        # an argument and then f() updates it in place, the updates will not
        # be reflected in the original object. As a fix, we keep track of all
        # the in-place updates that f() makes, and once f() is finished, we
        # copy the updates back into the original objects. The query compiler
        # interface is mostly immutable (the only exceptions being the mutable
        # index and column properties), so to check for an in-place update, we
        # check whether an input's query compiler has changed its identity.
        InplaceUpdateTracker = namedtuple(
            "InplaceUpdateTracker",
            ["input_castable", "original_query_compiler", "new_castable"],
        )
        inplace_update_trackers: list[InplaceUpdateTracker] = []
        # The function name and class name of the function are passed to the calculator as strings
        class_of_wrapped_fn = klass.__name__ if klass is not None else None

        input_query_compilers: list[BaseQueryCompiler] = []

        pin_target_backend = None

        input_backends: set[str] = set()

        def register_query_compilers(arg):
            nonlocal pin_target_backend
            if (
                isinstance(arg, QueryCompilerCaster)
                and (qc := arg._get_query_compiler()) is not None
            ):
                arg_backend = arg.get_backend()
                input_backends.add(arg_backend)
                if pin_target_backend is not None:
                    if arg.is_backend_pinned() and arg_backend != pin_target_backend:
                        raise ValueError(
                            f"Cannot combine arguments that are pinned to conflicting backends ({pin_target_backend}, {arg_backend})"
                        )
                elif arg.is_backend_pinned():
                    pin_target_backend = arg_backend
                input_query_compilers.append(qc)
            elif isinstance(arg, BaseQueryCompiler):
                # We might get query compiler arguments in __init__()
                input_query_compilers.append(arg)
            return arg

        visit_nested_args(args, register_query_compilers)
        visit_nested_args(kwargs, register_query_compilers)

        # Before determining any automatic switches, we perform the following checks:
        # 1. If the global AutoSwitchBackend configuration variable is set to False, do not switch.
        # 2. If there's only one query compiler and it's pinned, do not switch.
        # 3. If there are multiple query compilers, and at least one is pinned to a particular
        #    backend, then switch to that backend.
        # 4. If there are multiple query compilers, at least two of which are pinned to distinct
        #    backends, raise a ValueError.

        if len(input_query_compilers) == 0:
            input_backend = Backend.get()
            # For nullary functions, we need to create a dummy query compiler
            # to calculate the cost of switching backends. We should only
            # create the dummy query compiler once per backend.
            input_qc_for_pre_op_switch = _BACKEND_TO_EMPTY_QC[input_backend]
        else:
            input_qc_for_pre_op_switch = input_query_compilers[0]
            input_backend = input_qc_for_pre_op_switch.get_backend()

        # Skip the casting code if there are < 2 input backends and either
        # auto-switching is disabled or the inputs are pinned to the input
        # backend.
        if len(input_backends) < 2 and (
            not AutoSwitchBackend.get() or pin_target_backend is not None
        ):
            f_to_apply = _get_extension_for_method(
                name=name,
                extensions=extensions,
                backend=(
                    pin_target_backend
                    if pin_target_backend is not None
                    else input_backend
                ),
                args=args,
                wrapping_function_type=wrapping_function_type,
            )
            result = f_to_apply(*args, **kwargs)
            if (
                isinstance(result, QueryCompilerCaster)
                and pin_target_backend is not None
            ):
                result._set_backend_pinned(True, inplace=True)
            return result

        # Bind the arguments using the function implementation for the input
        # backend. TODO(https://github.com/modin-project/modin/issues/7525):
        # Ideally every implementation would have the same signature.
        bound_arguments = inspect.signature(
            _get_extension_for_method(
                name=name,
                extensions=extensions,
                backend=input_backend,
                args=args,
                wrapping_function_type=wrapping_function_type,
            ),
        ).bind(*args, **kwargs)
        bound_arguments.apply_defaults()
        args_dict = MappingProxyType(bound_arguments.arguments)

        if len(input_query_compilers) < 2:
            # No need to check should_pin_result() again, since we have already done so above.
            result_backend, cast_to_qc = _maybe_switch_backend_pre_op(
                name,
                input_qc=input_qc_for_pre_op_switch,
                class_of_wrapped_fn=class_of_wrapped_fn,
                arguments=args_dict,
            )
        else:
            preop_switch = (
                name
                in _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS[
                    BackendAndClassName(
                        backend=input_backend,
                        class_name=class_of_wrapped_fn,
                    )
                ]
            )
            calculator: BackendCostCalculator = BackendCostCalculator(
                operation_arguments=args_dict,
                api_cls_name=class_of_wrapped_fn,
                operation=name,
                query_compilers=input_query_compilers,
                preop_switch=preop_switch,
            )

            if pin_target_backend is None:
                result_backend = calculator.calculate()
            else:
                result_backend = pin_target_backend

            def cast_to_qc(arg):
                if not (
                    isinstance(arg, QueryCompilerCaster)
                    and arg._get_query_compiler() is not None
                    and arg.get_backend() != result_backend
                ):
                    return arg
                if BackendMergeCastInPlace.get():
                    arg.set_backend(
                        result_backend,
                        switch_operation=f"{_normalize_class_name(class_of_wrapped_fn)}.{name}",
                        inplace=True,
                    )
                    assert arg.get_backend() == result_backend
                    cast = arg
                else:
                    cast = arg.set_backend(
                        result_backend,
                        switch_operation=f"{_normalize_class_name(class_of_wrapped_fn)}.{name}",
                        inplace=False,
                    )
                inplace_update_trackers.append(
                    InplaceUpdateTracker(
                        input_castable=arg,
                        original_query_compiler=cast._get_query_compiler(),
                        new_castable=cast,
                    )
                )
                return cast

        args = visit_nested_args(args, cast_to_qc)
        kwargs = visit_nested_args(kwargs, cast_to_qc)

        # `result_backend` may be different from `input_backend`, so we have to
        # look up the correct implementation based on `result_backend`.
        f_to_apply = _get_extension_for_method(
            name=name,
            extensions=extensions,
            backend=result_backend,
            args=args,
            wrapping_function_type=wrapping_function_type,
        )

        # We have to set the global Backend correctly for I/O methods like
        # read_json() to use the correct backend.
        with config_context(Backend=result_backend):
            result = f_to_apply(*args, **kwargs)
        for (
            original_castable,
            original_qc,
            new_castable,
        ) in inplace_update_trackers:
            new_qc = new_castable._get_query_compiler()
            if BackendMergeCastInPlace.get() or original_qc is not new_qc:
                new_castable._copy_into(original_castable)

        return _maybe_switch_backend_post_op(
            result,
            function_name=name,
            qc_list=input_query_compilers,
            starting_backend=result_backend,
            class_of_wrapped_fn=class_of_wrapped_fn,
            pin_backend=pin_target_backend is not None,
            arguments=args_dict,
        )

    f_with_argument_casting._wrapped_method_for_casting = f
    return f_with_argument_casting


_GENERAL_EXTENSIONS: EXTENSION_DICT_TYPE = defaultdict(dict)


def wrap_free_function_in_argument_caster(name: str) -> callable:
    """
    Get a wrapper for a free function that casts all castable arguments to a consistent query compiler.

    Parameters
    ----------
    name : str
        The name of the function.

    Returns
    -------
    callable
        A wrapper for a free function that casts all castable arguments to a consistent query compiler.
    """

    def wrapper(f):
        if name not in _GENERAL_EXTENSIONS[None]:
            _GENERAL_EXTENSIONS[None][name] = f

        return wrap_function_in_argument_caster(
            klass=None,
            f=f,
            wrapping_function_type=None,
            extensions=_GENERAL_EXTENSIONS,
            name=name,
        )

    return wrapper


def register_function_for_post_op_switch(
    class_name: Optional[str], backend: str, method: str
) -> None:
    """
    Register a function for post-operation backend switch.

    Parameters
    ----------
    class_name : Optional[str]
        The name of the class that the function belongs to. `None` for functions
        in the modin.pandas module.
    backend : str
        Only consider switching when the starting backend is this one.
    method : str
        The name of the method to register.
    """
    _CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS[
        BackendAndClassName(backend=backend, class_name=class_name)
    ].add(method)


def register_function_for_pre_op_switch(
    class_name: Optional[str], backend: str, method: str
) -> None:
    """
    Register a function for pre-operation backend switch.

    Parameters
    ----------
    class_name : Optional[str]
        The name of the class that the function belongs to. `None` for functions
        in the modin.pandas module.
    backend : str
        Only consider switching when the starting backend is this one.
    method : str
        The name of the method to register.
    """
    _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS[
        BackendAndClassName(backend=backend, class_name=class_name)
    ].add(method)


================================================
FILE: modin/core/storage_formats/pandas/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Contains utility functions for frame partitioning."""

from __future__ import annotations

import re
from math import ceil
from typing import Generator, Hashable, List, Optional

import numpy as np
import pandas

from modin.config import MinColumnPartitionSize, MinRowPartitionSize, NPartitions


def compute_chunksize(axis_len: int, num_splits: int, min_block_size: int) -> int:
    """
    Compute the number of elements (rows/columns) to include in each partition.

    Chunksize is defined the same for both axes.

    Parameters
    ----------
    axis_len : int
        Element count in an axis.
    num_splits : int
        The number of splits.
    min_block_size : int
        Minimum number of rows/columns in a single split.

    Returns
    -------
    int
        Integer number of rows/columns to split the DataFrame will be returned.
    """
    if not isinstance(min_block_size, int) or min_block_size <= 0:
        raise ValueError(
            f"'min_block_size' should be int > 0, passed: {min_block_size=}"
        )

    chunksize = axis_len // num_splits
    if axis_len % num_splits:
        chunksize += 1
    # chunksize shouldn't be less than `min_block_size` to avoid a
    # large amount of small partitions.
    return max(chunksize, min_block_size)


def split_result_of_axis_func_pandas(
    axis: int,
    num_splits: int,
    result: pandas.DataFrame,
    min_block_size: int,
    length_list: Optional[list] = None,
) -> list[pandas.DataFrame]:
    """
    Split pandas DataFrame evenly based on the provided number of splits.

    Parameters
    ----------
    axis : {0, 1}
        Axis to split across. 0 means index axis when 1 means column axis.
    num_splits : int
        Number of splits to separate the DataFrame into.
        This parameter is ignored if `length_list` is specified.
    result : pandas.DataFrame
        DataFrame to split.
    min_block_size : int
        Minimum number of rows/columns in a single split.
    length_list : list of ints, optional
        List of slice lengths to split DataFrame into. This is used to
        return the DataFrame to its original partitioning schema.

    Returns
    -------
    list of pandas.DataFrames
        Splitted dataframe represented by list of frames.
    """
    return list(
        generate_result_of_axis_func_pandas(
            axis, num_splits, result, min_block_size, length_list
        )
    )


def generate_result_of_axis_func_pandas(
    axis: int,
    num_splits: int,
    result: pandas.DataFrame,
    min_block_size: int,
    length_list: Optional[list] = None,
) -> Generator:
    """
    Generate pandas DataFrame evenly based on the provided number of splits.

    Parameters
    ----------
    axis : {0, 1}
        Axis to split across. 0 means index axis when 1 means column axis.
    num_splits : int
        Number of splits to separate the DataFrame into.
        This parameter is ignored if `length_list` is specified.
    result : pandas.DataFrame
        DataFrame to split.
    min_block_size : int
        Minimum number of rows/columns in a single split.
    length_list : list of ints, optional
        List of slice lengths to split DataFrame into. This is used to
        return the DataFrame to its original partitioning schema.

    Yields
    ------
    Generator
        Generates 'num_splits' dataframes as a result of axis function.
    """
    if num_splits == 1:
        yield result
    else:
        if length_list is None:
            length_list = get_length_list(
                result.shape[axis], num_splits, min_block_size
            )
        # Inserting the first "zero" to properly compute cumsum indexing slices
        length_list = np.insert(length_list, obj=0, values=[0])
        sums = np.cumsum(length_list)
        axis = 0 if isinstance(result, pandas.Series) else axis

        for i in range(len(sums) - 1):
            # We do this to restore block partitioning
            if axis == 0:
                chunk = result.iloc[sums[i] : sums[i + 1]]
            else:
                chunk = result.iloc[:, sums[i] : sums[i + 1]]

            # Sliced MultiIndex still stores all encoded values of the original index, explicitly
            # asking it to drop unused values in order to save memory.
            if isinstance(chunk.axes[axis], pandas.MultiIndex):
                chunk = chunk.set_axis(
                    chunk.axes[axis].remove_unused_levels(), axis=axis, copy=False
                )
            yield chunk


def get_length_list(axis_len: int, num_splits: int, min_block_size: int) -> list:
    """
    Compute partitions lengths along the axis with the specified number of splits.

    Parameters
    ----------
    axis_len : int
        Element count in an axis.
    num_splits : int
        Number of splits along the axis.
    min_block_size : int
        Minimum number of rows/columns in a single split.

    Returns
    -------
    list of ints
        List of integer lengths of partitions.
    """
    chunksize = compute_chunksize(axis_len, num_splits, min_block_size)
    return [
        (
            chunksize
            if (i + 1) * chunksize <= axis_len
            else max(0, axis_len - i * chunksize)
        )
        for i in range(num_splits)
    ]


def length_fn_pandas(df):
    """
    Compute number of rows of passed `pandas.DataFrame`.

    Parameters
    ----------
    df : pandas.DataFrame

    Returns
    -------
    int
    """
    assert isinstance(df, pandas.DataFrame)
    return len(df) if len(df) > 0 else 0


def width_fn_pandas(df):
    """
    Compute number of columns of passed `pandas.DataFrame`.

    Parameters
    ----------
    df : pandas.DataFrame

    Returns
    -------
    int
    """
    assert isinstance(df, pandas.DataFrame)
    return len(df.columns) if len(df.columns) > 0 else 0


def get_group_names(regex: "re.Pattern") -> "List[Hashable]":
    """
    Get named groups from compiled regex.

    Unnamed groups are numbered.

    Parameters
    ----------
    regex : compiled regex

    Returns
    -------
    list of column labels
    """
    names = {v: k for k, v in regex.groupindex.items()}
    return [names.get(1 + i, i) for i in range(regex.groups)]


def merge_partitioning(left, right, axis=1):
    """
    Get the number of splits across the `axis` for the two dataframes being concatenated.

    Parameters
    ----------
    left : PandasDataframe
    right : PandasDataframe
    axis : int, default: 1

    Returns
    -------
    int
    """
    lshape = left._row_lengths_cache if axis == 0 else left._column_widths_cache
    rshape = right._row_lengths_cache if axis == 0 else right._column_widths_cache

    if lshape is not None and rshape is not None:
        res_shape = sum(lshape) + sum(rshape)
        chunk_size = compute_chunksize(
            axis_len=res_shape,
            num_splits=NPartitions.get(),
            min_block_size=(
                MinRowPartitionSize.get() if axis == 0 else MinColumnPartitionSize.get()
            ),
        )
        return ceil(res_shape / chunk_size)
    else:
        lsplits = left._partitions.shape[axis]
        rsplits = right._partitions.shape[axis]
        return min(lsplits + rsplits, NPartitions.get())


================================================
FILE: modin/db_conn.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module houses `ModinDatabaseConnection` class.

`ModinDatabaseConnection` lets a single process make its own connection to a
database to read from it. Whereas it's possible in pandas to pass an open
connection directly to `read_sql`, the open connection is not pickleable
in Modin, so each worker must open its own connection.
`ModinDatabaseConnection` saves the arguments that would normally be used to
make a db connection. It can make and provide a connection whenever the Modin
driver or a worker wants one.
"""

from typing import Any, Dict, Optional, Sequence

_PSYCOPG_LIB_NAME = "psycopg2"
_SQLALCHEMY_LIB_NAME = "sqlalchemy"


class UnsupportedDatabaseException(Exception):
    """Modin can't create a particular kind of database connection."""

    pass


class ModinDatabaseConnection:
    """
    Creates a SQL database connection.

    Parameters
    ----------
    lib : str
        The library for the SQL connection.
    *args : iterable
        Positional arguments to pass when creating the connection.
    **kwargs : dict
        Keyword arguments to pass when creating the connection.
    """

    lib: str
    args: Sequence
    kwargs: Dict
    _dialect_is_microsoft_sql_cache: Optional[bool]

    def __init__(self, lib: str, *args: Any, **kwargs: Any) -> None:
        lib = lib.lower()
        if lib not in (_PSYCOPG_LIB_NAME, _SQLALCHEMY_LIB_NAME):
            raise UnsupportedDatabaseException(f"Unsupported database library {lib}")
        self.lib = lib
        self.args = args
        self.kwargs = kwargs
        self._dialect_is_microsoft_sql_cache = None

    def _dialect_is_microsoft_sql(self) -> bool:
        """
        Tell whether this connection requires Microsoft SQL dialect.

        If this is a sqlalchemy connection, create an engine from args and
        kwargs. If that engine's driver is pymssql or pyodbc, this
        connection requires Microsoft SQL. Otherwise, it doesn't.

        Returns
        -------
        bool
        """
        if self._dialect_is_microsoft_sql_cache is None:
            self._dialect_is_microsoft_sql_cache = False
            if self.lib == _SQLALCHEMY_LIB_NAME:
                from sqlalchemy import create_engine

                self._dialect_is_microsoft_sql_cache = create_engine(
                    *self.args, **self.kwargs
                ).driver in ("pymssql", "pyodbc")

        return self._dialect_is_microsoft_sql_cache

    def get_connection(self) -> Any:
        """
        Make the database connection and get it.

        For psycopg2, pass all arguments to psycopg2.connect() and return the
        result of psycopg2.connect(). For sqlalchemy, pass all arguments to
        sqlalchemy.create_engine() and return the result of calling connect()
        on the engine.

        Returns
        -------
        Any
            The open database connection.
        """
        if self.lib == _PSYCOPG_LIB_NAME:
            import psycopg2

            return psycopg2.connect(*self.args, **self.kwargs)
        if self.lib == _SQLALCHEMY_LIB_NAME:
            from sqlalchemy import create_engine

            return create_engine(*self.args, **self.kwargs).connect()

        raise UnsupportedDatabaseException("Unsupported database library")

    def get_string(self) -> str:
        """
        Get input connection string.

        Returns
        -------
        str
        """
        return self.args[0]

    def column_names_query(self, query: str) -> str:
        """
        Get a query that gives the names of columns that `query` would produce.

        Parameters
        ----------
        query : str
            The SQL query to check.

        Returns
        -------
        str
        """
        # This query looks odd, but it works in both PostgreSQL and Microsoft
        # SQL, which doesn't let you use a "limit" clause to select 0 rows.
        return f"SELECT * FROM ({query}) AS _MODIN_COUNT_QUERY WHERE 1 = 0"

    def row_count_query(self, query: str) -> str:
        """
        Get a query that gives the names of rows that `query` would produce.

        Parameters
        ----------
        query : str
            The SQL query to check.

        Returns
        -------
        str
        """
        return f"SELECT COUNT(*) FROM ({query}) AS _MODIN_COUNT_QUERY"

    def partition_query(self, query: str, limit: int, offset: int) -> str:
        """
        Get a query that partitions the original `query`.

        Parameters
        ----------
        query : str
            The SQL query to get a partition.
        limit : int
            The size of the partition.
        offset : int
            Where the partition begins.

        Returns
        -------
        str
        """
        return (
            (
                f"SELECT * FROM ({query}) AS _MODIN_COUNT_QUERY ORDER BY(SELECT NULL)"
                + f" OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY"
            )
            if self._dialect_is_microsoft_sql()
            else f"SELECT * FROM ({query}) AS _MODIN_COUNT_QUERY LIMIT "
            + f"{limit} OFFSET {offset}"
        )


================================================
FILE: modin/distributed/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""API to operate on distributed objects."""


================================================
FILE: modin/distributed/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""API to operate on distributed DataFrame objects."""


================================================
FILE: modin/distributed/dataframe/pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""API to operate on distributed pandas DataFrame objects."""

from .partitions import from_partitions, unwrap_partitions

__all__ = ["unwrap_partitions", "from_partitions"]


================================================
FILE: modin/distributed/dataframe/pandas/partitions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses API to operate on Modin DataFrame partitions that are pandas DataFrame(s)."""

from typing import TYPE_CHECKING, Optional, Union

import numpy as np
from pandas._typing import Axes

from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
from modin.pandas.dataframe import DataFrame, Series

if TYPE_CHECKING:
    from modin.core.execution.dask.implementations.pandas_on_dask.partitioning import (
        PandasOnDaskDataframeColumnPartition,
        PandasOnDaskDataframePartition,
        PandasOnDaskDataframeRowPartition,
    )
    from modin.core.execution.ray.implementations.pandas_on_ray.partitioning import (
        PandasOnRayDataframeColumnPartition,
        PandasOnRayDataframePartition,
        PandasOnRayDataframeRowPartition,
    )
    from modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning import (
        PandasOnUnidistDataframeColumnPartition,
        PandasOnUnidistDataframePartition,
        PandasOnUnidistDataframeRowPartition,
    )

    PartitionUnionType = Union[
        PandasOnRayDataframePartition,
        PandasOnDaskDataframePartition,
        PandasOnUnidistDataframePartition,
        PandasOnRayDataframeColumnPartition,
        PandasOnRayDataframeRowPartition,
        PandasOnDaskDataframeColumnPartition,
        PandasOnDaskDataframeRowPartition,
        PandasOnUnidistDataframeColumnPartition,
        PandasOnUnidistDataframeRowPartition,
    ]
else:
    from typing import Any

    PartitionUnionType = Any


def unwrap_partitions(
    api_layer_object: Union[DataFrame, Series],
    axis: Optional[int] = None,
    get_ip: bool = False,
) -> list:
    """
    Unwrap partitions of the ``api_layer_object``.

    Parameters
    ----------
    api_layer_object : DataFrame or Series
        The API layer object.
    axis : {None, 0, 1}, default: None
        The axis to unwrap partitions for (0 - row partitions, 1 - column partitions).
        If ``axis is None``, the partitions are unwrapped as they are currently stored.
    get_ip : bool, default: False
        Whether to get node ip address to each partition or not.

    Returns
    -------
    list
        A list of Ray.ObjectRef/Dask.Future to partitions of the ``api_layer_object``
        if Ray/Dask is used as an engine.

    Notes
    -----
    If ``get_ip=True``, a list of tuples of Ray.ObjectRef/Dask.Future to node ip addresses and
    partitions of the ``api_layer_object``, respectively, is returned if Ray/Dask is used as an engine
    (i.e. ``[(Ray.ObjectRef/Dask.Future, Ray.ObjectRef/Dask.Future), ...]``).
    """
    if not hasattr(api_layer_object, "_query_compiler"):
        raise ValueError(
            f"Only API Layer objects may be passed in here, got {type(api_layer_object)} instead."
        )

    modin_frame = api_layer_object._query_compiler._modin_frame
    modin_frame._propagate_index_objs(None)
    if axis is None:

        def _unwrap_partitions() -> list:
            [p.drain_call_queue() for p in modin_frame._partitions.flatten()]

            def get_block(partition: PartitionUnionType) -> np.ndarray:
                if hasattr(partition, "force_materialization"):
                    blocks = partition.force_materialization().list_of_blocks
                else:
                    blocks = partition.list_of_blocks
                assert (
                    len(blocks) == 1
                ), f"Implementation assumes that partition contains a single block, but {len(blocks)} received."
                return blocks[0]

            if get_ip:
                return [
                    [
                        (partition.ip(materialize=False), get_block(partition))
                        for partition in row
                    ]
                    for row in modin_frame._partitions
                ]
            else:
                return [
                    [get_block(partition) for partition in row]
                    for row in modin_frame._partitions
                ]

        actual_engine = type(
            api_layer_object._query_compiler._modin_frame._partitions[0][0]
        ).__name__
        if actual_engine in (
            "PandasOnRayDataframePartition",
            "PandasOnDaskDataframePartition",
            "PandasOnUnidistDataframePartition",
            "PandasOnRayDataframeColumnPartition",
            "PandasOnRayDataframeRowPartition",
            "PandasOnDaskDataframeColumnPartition",
            "PandasOnDaskDataframeRowPartition",
            "PandasOnUnidistDataframeColumnPartition",
            "PandasOnUnidistDataframeRowPartition",
        ):
            return _unwrap_partitions()
        raise ValueError(
            f"Do not know how to unwrap '{actual_engine}' underlying partitions"
        )
    else:
        partitions = modin_frame._partition_mgr_cls.axis_partition(
            modin_frame._partitions, axis ^ 1
        )
        return [
            part.force_materialization(get_ip=get_ip).unwrap(
                squeeze=True, get_ip=get_ip
            )
            for part in partitions
        ]


def from_partitions(
    partitions: list,
    axis: Optional[int],
    index: Optional[Axes] = None,
    columns: Optional[Axes] = None,
    row_lengths: Optional[list] = None,
    column_widths: Optional[list] = None,
) -> DataFrame:
    """
    Create DataFrame from remote partitions.

    Parameters
    ----------
    partitions : list
        A list of Ray.ObjectRef/Dask.Future to partitions depending on the engine used.
        Or a list of tuples of Ray.ObjectRef/Dask.Future to node ip addresses and partitions
        depending on the engine used (i.e. ``[(Ray.ObjectRef/Dask.Future, Ray.ObjectRef/Dask.Future), ...]``).
    axis : {None, 0 or 1}
        The ``axis`` parameter is used to identify what are the partitions passed.
        You have to set:

        * ``axis=0`` if you want to create DataFrame from row partitions
        * ``axis=1`` if you want to create DataFrame from column partitions
        * ``axis=None`` if you want to create DataFrame from 2D list of partitions
    index : sequence, optional
        The index for the DataFrame. Is computed if not provided.
    columns : sequence, optional
        The columns for the DataFrame. Is computed if not provided.
    row_lengths : list, optional
        The length of each partition in the rows. The "height" of
        each of the block partitions. Is computed if not provided.
    column_widths : list, optional
        The width of each partition in the columns. The "width" of
        each of the block partitions. Is computed if not provided.

    Returns
    -------
    modin.pandas.DataFrame
        DataFrame instance created from remote partitions.

    Notes
    -----
    Pass `index`, `columns`, `row_lengths` and `column_widths` to avoid triggering
    extra computations of the metadata when creating a DataFrame.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    factory = FactoryDispatcher.get_factory()
    # TODO(https://github.com/modin-project/modin/issues/5127):
    # Remove these assertions once the dependencies of this function all have types.
    assert factory is not None
    assert factory.io_cls is not None
    assert factory.io_cls.frame_cls is not None
    assert factory.io_cls.frame_cls._partition_mgr_cls is not None  # type: ignore[unreachable]
    partition_class = factory.io_cls.frame_cls._partition_mgr_cls._partition_class
    partition_frame_class = factory.io_cls.frame_cls
    partition_mgr_class = factory.io_cls.frame_cls._partition_mgr_cls

    # Since we store partitions of Modin DataFrame as a 2D NumPy array we need to place
    # passed partitions to 2D NumPy array to pass it to internal Modin Frame class.
    # `axis=None` - convert 2D list to 2D NumPy array
    if axis is None:
        if isinstance(partitions[0][0], tuple):
            parts = np.array(
                [
                    [partition_class(partition, ip=ip) for ip, partition in row]
                    for row in partitions
                ]
            )
        else:
            parts = np.array(
                [
                    [partition_class(partition) for partition in row]
                    for row in partitions
                ]
            )
    # `axis=0` - place row partitions to 2D NumPy array so that each row of the array is one row partition.
    elif axis == 0:
        if isinstance(partitions[0], tuple):
            parts = np.array(
                [[partition_class(partition, ip=ip)] for ip, partition in partitions]
            )
        else:
            parts = np.array([[partition_class(partition)] for partition in partitions])
    # `axis=1` - place column partitions to 2D NumPy array so that each column of the array is one column partition.
    elif axis == 1:
        if isinstance(partitions[0], tuple):
            parts = np.array(
                [[partition_class(partition, ip=ip) for ip, partition in partitions]]
            )
        else:
            parts = np.array([[partition_class(partition) for partition in partitions]])
    else:
        raise ValueError(
            f"Got unacceptable value of axis {axis}. Possible values are {0}, {1} or {None}."
        )

    labels_axis_to_sync = None
    if index is None:
        labels_axis_to_sync = 1
        index, internal_indices = partition_mgr_class.get_indices(0, parts)
        if row_lengths is None:
            row_lengths = [len(idx) for idx in internal_indices]

    if columns is None:
        labels_axis_to_sync = 0 if labels_axis_to_sync is None else -1
        columns, internal_indices = partition_mgr_class.get_indices(1, parts)
        if column_widths is None:
            column_widths = [len(idx) for idx in internal_indices]

    frame = partition_frame_class(
        parts,
        index,
        columns,
        row_lengths=row_lengths,
        column_widths=column_widths,
    )

    if labels_axis_to_sync != -1:
        frame.synchronize_labels(axis=labels_axis_to_sync)

    return DataFrame(query_compiler=PandasQueryCompiler(frame))


================================================
FILE: modin/error_message.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import warnings
from typing import NoReturn, Optional, Set

from modin.logging import get_logger
from modin.utils import get_current_execution


class ErrorMessage(object):
    # Only print full ``default to pandas`` warning one time.
    printed_default_to_pandas = False
    printed_warnings: Set[int] = set()  # Set of hashes of printed warnings

    @classmethod
    def not_implemented(cls, message: str = "") -> NoReturn:
        if message == "":
            message = "This functionality is not yet available in Modin."
        get_logger().info(f"Modin Error: NotImplementedError: {message}")
        raise NotImplementedError(
            f"{message}\n"
            + "To request implementation, file an issue at "
            + "https://github.com/modin-project/modin/issues or, if that's "
            + "not possible, send an email to feature_requests@modin.org."
        )

    @classmethod
    def single_warning(
        cls, message: str, category: Optional[type[Warning]] = None
    ) -> None:
        # note that there should not be identical messages with different categories since
        # only the message is used as the hash key.
        message_hash = hash(message)
        logger = get_logger()
        if message_hash in cls.printed_warnings:
            logger.debug(
                f"Modin Warning: Single Warning: {message} was raised and suppressed."
            )
            return

        logger.debug(f"Modin Warning: Single Warning: {message} was raised.")
        warnings.warn(message, category=category)
        cls.printed_warnings.add(message_hash)

    @classmethod
    def default_to_pandas(cls, message: str = "", reason: str = "") -> None:
        # TODO(https://github.com/modin-project/modin/issues/7429): Use
        # frame-level engine config.

        if message != "":
            execution_str = get_current_execution()
            message = (
                f"{message} is not currently supported by {execution_str}, "
                + "defaulting to pandas implementation."
            )
        else:
            message = "Defaulting to pandas implementation."

        if not cls.printed_default_to_pandas:
            message = (
                f"{message}\n"
                + "Please refer to "
                + "https://modin.readthedocs.io/en/stable/supported_apis/defaulting_to_pandas.html for explanation."
            )
            cls.printed_default_to_pandas = True
        if reason:
            message += f"\nReason: {reason}"
        get_logger().debug(f"Modin Warning: Default to pandas: {message}")
        warnings.warn(message)

    @classmethod
    def catch_bugs_and_request_email(
        cls, failure_condition: bool, extra_log: str = ""
    ) -> None:
        if failure_condition:
            get_logger().info(f"Modin Error: Internal Error: {extra_log}")
            raise Exception(
                "Internal Error. "
                + "Please visit https://github.com/modin-project/modin/issues "
                + "to file an issue with the traceback and the command that "
                + "caused this error. If you can't file a GitHub issue, "
                + f"please email bug_reports@modin.org.\n{extra_log}"
            )

    @classmethod
    def non_verified_udf(cls) -> None:
        get_logger().debug("Modin Warning: Non Verified UDF")
        warnings.warn(
            "User-defined function verification is still under development in Modin. "
            + "The function provided is not verified."
        )

    @classmethod
    def bad_type_for_numpy_op(cls, function_name: str, operand_type: type) -> None:
        cls.single_warning(
            f"Modin NumPy only supports objects of modin.numpy.array types for {function_name}, not {operand_type}. Defaulting to NumPy."
        )

    @classmethod
    def mismatch_with_pandas(cls, operation: str, message: str) -> None:
        get_logger().debug(
            f"Modin Warning: {operation} mismatch with pandas: {message}"
        )
        cls.single_warning(
            f"`{operation}` implementation has mismatches with pandas:\n{message}."
        )

    @classmethod
    def warn(cls, message: str) -> None:
        warnings.warn(message)

    @classmethod
    def not_initialized(cls, engine: str, code: str) -> None:
        get_logger().debug(f"Modin Warning: Not Initialized: {engine}")
        warnings.warn(
            f"{engine} execution environment not yet initialized. Initializing...\n"
            + "To remove this warning, run the following python code before doing dataframe operations:\n"
            + f"{code}"
        )


================================================
FILE: modin/experimental/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/experimental/batch/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from .pipeline import PandasQueryPipeline

__all__ = [
    "PandasQueryPipeline",
]


================================================
FILE: modin/experimental/batch/pipeline.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses ``PandasQueryPipeline`` and ``PandasQuery`` classes, that implement a batch pipeline protocol for Modin Dataframes."""

from typing import Callable, Optional

import numpy as np

import modin.pandas as pd
from modin.config import NPartitions
from modin.core.execution.ray.implementations.pandas_on_ray.dataframe.dataframe import (
    PandasOnRayDataframe,
)
from modin.core.storage_formats.pandas import PandasQueryCompiler
from modin.error_message import ErrorMessage
from modin.utils import get_current_execution


class PandasQuery(object):
    """
    Internal representation of a single query in a pipeline.

    This object represents a single function to be pipelined in a batch pipeline.

    Parameters
    ----------
    func : Callable
        The function to apply to the dataframe.
    is_output : bool, default: False
        Whether this query is an output query and should be passed both to the next query, and
        directly to postprocessing.
    repartition_after : bool, default: False
        Whether to repartition after this query is computed. Currently, repartitioning is only
        supported if there is 1 partition prior to repartitioning.
    fan_out : bool, default: False
        Whether to fan out this node. If True and only 1 partition is passed as input, the partition
        is replicated `PandasQueryPipeline.num_partitions` (default: `NPartitions.get`) times, and
        the function is called on each. The `reduce_fn` must also be specified.
    pass_partition_id : bool, default: False
        Whether to pass the numerical partition id to the query.
    reduce_fn : Callable, default: None
        The reduce function to apply if `fan_out` is set to True. This takes the
        `PandasQueryPipeline.num_partitions` (default: `NPartitions.get`) partitions that result from
        this query, and combines them into 1 partition.
    output_id : int, default: None
            An id to assign to this node if it is an output.

    Notes
    -----
    `func` must be a function that is applied along an axis of the dataframe.

    Use `pandas` for any module level functions inside `func` since it operates directly on
    partitions.
    """

    def __init__(
        self,
        func: Callable,
        is_output: bool = False,
        repartition_after: bool = False,
        fan_out: bool = False,
        pass_partition_id: bool = False,
        reduce_fn: Optional[Callable] = None,
        output_id: Optional[int] = None,
    ):
        self.func = func
        self.is_output = is_output
        self.repartition_after = repartition_after
        self.fan_out = fan_out
        self.pass_partition_id = pass_partition_id
        self.reduce_fn = reduce_fn
        self.output_id = output_id
        # List of sub-queries to feed into this query, if this query is an output node.
        self.operators = None


class PandasQueryPipeline(object):
    """
    Internal representation of a query pipeline.

    This object keeps track of the functions that compose to form a query pipeline.

    Parameters
    ----------
    df : modin.pandas.Dataframe
        The dataframe to perform this pipeline on.
    num_partitions : int, optional
        The number of partitions to maintain for the batched dataframe.
        If not specified, the value is assumed equal to ``NPartitions.get()``.

    Notes
    -----
    Only row-parallel pipelines are supported. All queries will be applied along the row axis.
    """

    def __init__(self, df, num_partitions: Optional[int] = None):
        if get_current_execution() != "PandasOnRay" or (
            not isinstance(df._query_compiler._modin_frame, PandasOnRayDataframe)
        ):  # pragma: no cover
            ErrorMessage.not_implemented(
                "Batch Pipeline API is only implemented for `PandasOnRay` execution."
            )
        ErrorMessage.single_warning(
            "The Batch Pipeline API is an experimental feature and still under development in Modin."
        )
        self.df = df
        self.num_partitions = num_partitions if num_partitions else NPartitions.get()
        self.outputs = []  # List of output queries.
        self.query_list = []  # List of all queries.
        self.is_output_id_specified = (
            False  # Flag to indicate that `output_id` has been specified for a node.
        )

    def update_df(self, df):
        """
        Update the dataframe to perform this pipeline on.

        Parameters
        ----------
        df : modin.pandas.DataFrame
            The new dataframe to perform this pipeline on.
        """
        if get_current_execution() != "PandasOnRay" or (
            not isinstance(df._query_compiler._modin_frame, PandasOnRayDataframe)
        ):  # pragma: no cover
            ErrorMessage.not_implemented(
                "Batch Pipeline API is only implemented for `PandasOnRay` execution."
            )
        self.df = df

    def add_query(
        self,
        func: Callable,
        is_output: bool = False,
        repartition_after: bool = False,
        fan_out: bool = False,
        pass_partition_id: bool = False,
        reduce_fn: Optional[Callable] = None,
        output_id: Optional[int] = None,
    ):
        """
        Add a query to the current pipeline.

        Parameters
        ----------
        func : Callable
            DataFrame query to perform.
        is_output : bool, default: False
            Whether this query should be designated as an output query. If `True`, the output of
            this query is passed both to the next query and directly to postprocessing.
        repartition_after : bool, default: False
            Whether the dataframe should be repartitioned after this query. Currently,
            repartitioning is only supported if there is 1 partition prior.
        fan_out : bool, default: False
            Whether to fan out this node. If True and only 1 partition is passed as input, the
            partition is replicated `self.num_partitions` (default: `NPartitions.get`) times,
            and the function is called on each. The `reduce_fn` must also be specified.
        pass_partition_id : bool, default: False
            Whether to pass the numerical partition id to the query.
        reduce_fn : Callable, default: None
            The reduce function to apply if `fan_out` is set to True. This takes the
            `self.num_partitions` (default: `NPartitions.get`) partitions that result from this
            query, and combines them into 1 partition.
        output_id : int, default: None
            An id to assign to this node if it is an output.

        Notes
        -----
        Use `pandas` for any module level functions inside `func` since it operates directly on
        partitions.
        """
        if not is_output and output_id is not None:
            raise ValueError("Output ID cannot be specified for non-output node.")
        if is_output:
            if not self.is_output_id_specified and output_id is not None:
                if len(self.outputs) != 0:
                    raise ValueError("Output ID must be specified for all nodes.")
            if output_id is None and self.is_output_id_specified:
                raise ValueError("Output ID must be specified for all nodes.")
        self.query_list.append(
            PandasQuery(
                func,
                is_output,
                repartition_after,
                fan_out,
                pass_partition_id,
                reduce_fn,
                output_id,
            )
        )
        if is_output:
            self.outputs.append(self.query_list[-1])
            if output_id is not None:
                self.is_output_id_specified = True
            self.outputs[-1].operators = self.query_list[:-1]
            self.query_list = []

    def _complete_nodes(self, list_of_nodes, partitions):
        """
        Run a sub-query end to end.

        Parameters
        ----------
        list_of_nodes : list of PandasQuery
            The functions that compose this query.
        partitions : list of PandasOnRayDataframeVirtualPartition
            The partitions that compose the dataframe that is input to this sub-query.

        Returns
        -------
        list of PandasOnRayDataframeVirtualPartition
            The partitions that result from computing the functions represented by `list_of_nodes`.
        """
        for node in list_of_nodes:
            if node.fan_out:
                if len(partitions) > 1:
                    ErrorMessage.not_implemented(
                        "Fan out is only supported with DataFrames with 1 partition."
                    )
                partitions[0] = partitions[0].force_materialization()
                partition_list = partitions[0].list_of_block_partitions
                partitions[0] = partitions[0].add_to_apply_calls(node.func, 0)
                partitions[0].drain_call_queue(num_splits=1)
                new_dfs = []
                for i in range(1, self.num_partitions):
                    new_dfs.append(
                        type(partitions[0])(
                            partition_list,
                            full_axis=partitions[0].full_axis,
                        ).add_to_apply_calls(node.func, i)
                    )
                    new_dfs[-1].drain_call_queue(num_splits=1)

                def reducer(df):
                    df_inputs = [df]
                    for df in new_dfs:
                        df_inputs.append(df.to_pandas())
                    return node.reduce_fn(df_inputs)

                partitions = [partitions[0].add_to_apply_calls(reducer)]
            elif node.repartition_after:
                if len(partitions) > 1:
                    ErrorMessage.not_implemented(
                        "Dynamic repartitioning is currently only supported for DataFrames with 1 partition."
                    )
                partitions[0] = (
                    partitions[0].add_to_apply_calls(node.func).force_materialization()
                )
                new_dfs = []

                def mask_partition(df, i):  # pragma: no cover
                    new_length = len(df.index) // self.num_partitions
                    if i == self.num_partitions - 1:
                        return df.iloc[i * new_length :]
                    return df.iloc[i * new_length : (i + 1) * new_length]

                for i in range(self.num_partitions):
                    new_dfs.append(
                        type(partitions[0])(
                            partitions[0].list_of_block_partitions,
                            full_axis=partitions[0].full_axis,
                        ).add_to_apply_calls(mask_partition, i)
                    )
                partitions = new_dfs
            else:
                if node.pass_partition_id:
                    partitions = [
                        part.add_to_apply_calls(node.func, i)
                        for i, part in enumerate(partitions)
                    ]
                else:
                    partitions = [
                        part.add_to_apply_calls(node.func) for part in partitions
                    ]
        return partitions

    def compute_batch(
        self,
        postprocessor: Optional[Callable] = None,
        pass_partition_id: Optional[bool] = False,
        pass_output_id: Optional[bool] = False,
    ):
        """
        Run the completed pipeline + any postprocessing steps end to end.

        Parameters
        ----------
        postprocessor : Callable, default: None
            A postprocessing function to be applied to each output partition.
            The order of arguments passed is `df` (the partition), `output_id`
            (if `pass_output_id=True`), and `partition_id` (if `pass_partition_id=True`).
        pass_partition_id : bool, default: False
            Whether or not to pass the numerical partition id to the postprocessing function.
        pass_output_id : bool, default: False
            Whether or not to pass the output ID associated with output queries to the
            postprocessing function.

        Returns
        -------
        list or dict or DataFrame
            If output ids are specified, a dictionary mapping output id to the resulting dataframe
            is returned, otherwise, a list of the resulting dataframes is returned.
        """
        if len(self.outputs) == 0:
            ErrorMessage.single_warning(
                "No outputs to compute. Returning an empty list. Please specify outputs by calling `add_query` with `is_output=True`."
            )
            return []
        if not self.is_output_id_specified and pass_output_id:
            raise ValueError(
                "`pass_output_id` is set to True, but output ids have not been specified. "
                + "To pass output ids, please specify them using the `output_id` kwarg with pipeline.add_query"
            )
        if self.is_output_id_specified:
            outs = {}
        else:
            outs = []
        modin_frame = self.df._query_compiler._modin_frame
        partitions = modin_frame._partition_mgr_cls.row_partitions(
            modin_frame._partitions
        )
        for node in self.outputs:
            partitions = self._complete_nodes(node.operators + [node], partitions)
            for part in partitions:
                part.drain_call_queue(num_splits=1)
            if postprocessor:
                output_partitions = []
                for partition_id, partition in enumerate(partitions):
                    args = []
                    if pass_output_id:
                        args.append(node.output_id)
                    if pass_partition_id:
                        args.append(partition_id)
                    output_partitions.append(
                        partition.add_to_apply_calls(postprocessor, *args)
                    )
            else:
                output_partitions = [
                    part.add_to_apply_calls(lambda df: df) for part in partitions
                ]
            [
                part.drain_call_queue(num_splits=self.num_partitions)
                for part in output_partitions
            ]  # Ensures our result df is block partitioned.
            if not self.is_output_id_specified:
                outs.append(output_partitions)
            else:
                outs[node.output_id] = output_partitions
        if self.is_output_id_specified:
            final_results = {}
            id_df_iter = outs.items()
        else:
            final_results = [None] * len(outs)
            id_df_iter = enumerate(outs)

        for id, df in id_df_iter:
            partitions = []
            for row_partition in df:
                partitions.append(row_partition.list_of_block_partitions)
            partitions = np.array(partitions)
            partition_mgr_class = PandasOnRayDataframe._partition_mgr_cls
            index, internal_rows = partition_mgr_class.get_indices(0, partitions)
            columns, internal_cols = partition_mgr_class.get_indices(1, partitions)
            result_modin_frame = PandasOnRayDataframe(
                partitions,
                index,
                columns,
                row_lengths=list(map(len, internal_rows)),
                column_widths=list(map(len, internal_cols)),
            )
            query_compiler = PandasQueryCompiler(result_modin_frame)
            result_df = pd.DataFrame(query_compiler=query_compiler)
            final_results[id] = result_df

        return final_results


================================================
FILE: modin/experimental/core/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental Modin's core functionality."""


================================================
FILE: modin/experimental/core/execution/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental Modin's functionality related to execution engines supported."""


================================================
FILE: modin/experimental/core/execution/dask/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental Modin's functionality related to Dask execution engine."""


================================================
FILE: modin/experimental/core/execution/dask/implementations/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental functionality related to Dask execution engine and optimized for specific storage formats."""


================================================
FILE: modin/experimental/core/execution/dask/implementations/pandas_on_dask/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental functionality related to Dask execution engine and optimized for pandas storage format."""


================================================
FILE: modin/experimental/core/execution/ray/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental Modin's functionality related to Ray execution engine."""


================================================
FILE: modin/experimental/core/execution/ray/implementations/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental functionality related to Ray execution engine and optimized for specific storage formats."""


================================================
FILE: modin/experimental/core/execution/unidist/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental Modin's functionality related to unidist execution engine."""


================================================
FILE: modin/experimental/core/execution/unidist/implementations/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental Modin's functionality related to unidist execution engine and optimized for specific storage formats."""


================================================
FILE: modin/experimental/core/execution/unidist/implementations/pandas_on_unidist/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental functionality related to unidist execution engine and optimized for pandas storage format."""


================================================
FILE: modin/experimental/core/io/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental IO functions implementations."""

from .glob.glob_dispatcher import ExperimentalGlobDispatcher
from .sql.sql_dispatcher import ExperimentalSQLDispatcher
from .text.csv_glob_dispatcher import ExperimentalCSVGlobDispatcher
from .text.custom_text_dispatcher import ExperimentalCustomTextDispatcher

__all__ = [
    "ExperimentalCSVGlobDispatcher",
    "ExperimentalSQLDispatcher",
    "ExperimentalGlobDispatcher",
    "ExperimentalCustomTextDispatcher",
]


================================================
FILE: modin/experimental/core/io/glob/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental module that allows to work with various formats using glob syntax."""


================================================
FILE: modin/experimental/core/io/glob/glob_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses ``ExperimentalGlobDispatcher`` class that is used to read/write files of different formats in parallel."""

import glob
import warnings

import pandas
from pandas.io.common import stringify_path

from modin.config import NPartitions
from modin.core.io.file_dispatcher import FileDispatcher
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class ExperimentalGlobDispatcher(FileDispatcher):
    """Class implements reading/writing different formats, parallelizing by the number of files."""

    @classmethod
    def _read(cls, **kwargs):
        """
        Read data from `filepath_or_buffer` according to `kwargs` parameters.

        Parameters
        ----------
        filepath_or_buffer : str, path object or file-like object
            `filepath_or_buffer` parameter of `read_*` function.
        **kwargs : dict
            Parameters of `read_*` function.

        Returns
        -------
        new_query_compiler : BaseQueryCompiler
            Query compiler with imported data for further processing.

        Notes
        -----
        The number of partitions is equal to the number of input files.
        """
        if "filepath_or_buffer" in kwargs:
            path_key = "filepath_or_buffer"
        elif "path" in kwargs:
            path_key = "path"
        elif "path_or_buf" in kwargs:
            path_key = "path_or_buf"
        elif "path_or_buffer" in kwargs:
            path_key = "path_or_buffer"
        filepath_or_buffer = kwargs.pop(path_key)
        filepath_or_buffer = stringify_path(filepath_or_buffer)
        if not (isinstance(filepath_or_buffer, str) and "*" in filepath_or_buffer):
            return cls.single_worker_read(
                filepath_or_buffer,
                single_worker_read=True,
                reason="Buffers and single files are not supported",
                **kwargs,
            )
        filepath_or_buffer = sorted(glob.glob(filepath_or_buffer))

        if len(filepath_or_buffer) == 0:
            raise ValueError(
                f"There are no files matching the pattern: {filepath_or_buffer}"
            )

        partition_ids = [None] * len(filepath_or_buffer)
        lengths_ids = [None] * len(filepath_or_buffer)
        widths_ids = [None] * len(filepath_or_buffer)

        if len(filepath_or_buffer) != NPartitions.get():
            # do we need to do a repartitioning?
            warnings.warn("can be inefficient partitioning")

        for idx, file_name in enumerate(filepath_or_buffer):
            *partition_ids[idx], lengths_ids[idx], widths_ids[idx] = cls.deploy(
                func=cls.parse,
                f_kwargs={
                    "fname": file_name,
                    **kwargs,
                },
                num_returns=3,
            )
        lengths = cls.materialize(lengths_ids)
        widths = cls.materialize(widths_ids)

        # while num_splits is 1, need only one value
        partition_ids = cls.build_partition(partition_ids, lengths, [widths[0]])

        new_index, _ = cls.frame_cls._partition_mgr_cls.get_indices(0, partition_ids)
        new_columns, _ = cls.frame_cls._partition_mgr_cls.get_indices(1, partition_ids)

        return cls.query_compiler_cls(
            cls.frame_cls(partition_ids, new_index, new_columns)
        )

    @classmethod
    def write(cls, qc, **kwargs):
        """
        When `*` is in the filename, all partitions are written to their own separate file.

        The filenames is determined as follows:
        - if `*` is in the filename, then it will be replaced by the ascending sequence 0, 1, 2, …
        - if `*` is not in the filename, then the default implementation will be used.

        Parameters
        ----------
        qc : BaseQueryCompiler
            The query compiler of the Modin dataframe that we want
            to run ``to_<format>_glob`` on.
        **kwargs : dict
            Parameters for ``pandas.to_<format>(**kwargs)``.
        """
        if "filepath_or_buffer" in kwargs:
            path_key = "filepath_or_buffer"
        elif "path" in kwargs:
            path_key = "path"
        elif "path_or_buf" in kwargs:
            path_key = "path_or_buf"
        elif "path_or_buffer" in kwargs:
            path_key = "path_or_buffer"
        filepath_or_buffer = kwargs.pop(path_key)
        filepath_or_buffer = stringify_path(filepath_or_buffer)
        if not (
            isinstance(filepath_or_buffer, str) and "*" in filepath_or_buffer
        ) or not isinstance(qc, PandasQueryCompiler):
            warnings.warn("Defaulting to Modin core implementation")
            cls.base_write(qc, filepath_or_buffer, **kwargs)
            return

        # Be careful, this is a kind of limitation, but at the time of the first implementation,
        # getting a name in this way is quite convenient.
        # We can use this attribute because the names of the BaseIO's methods match pandas API.
        write_func_name = cls.base_write.__name__

        def func(df, **kw):  # pragma: no cover
            idx = str(kw["partition_idx"])
            path = filepath_or_buffer.replace("*", idx)
            getattr(df, write_func_name)(path, **kwargs)
            return pandas.DataFrame()

        result = qc._modin_frame.apply_full_axis(
            1, func, new_index=[], new_columns=[], enumerate_partitions=True
        )
        cls.materialize(
            [part.list_of_blocks[0] for row in result._partitions for part in row]
        )


================================================
FILE: modin/experimental/core/io/sql/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental SQL format type IO functions implementations."""


================================================
FILE: modin/experimental/core/io/sql/sql_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `ExperimentalSQLDispatcher` class."""

import warnings

import numpy as np
import pandas

from modin.config import NPartitions
from modin.core.io import SQLDispatcher


class ExperimentalSQLDispatcher(SQLDispatcher):
    """Class handles experimental utils for reading SQL queries or database tables."""

    __read_sql_with_offset = None

    @classmethod
    def preprocess_func(cls):  # noqa: RT01
        """Prepare a function for transmission to remote workers."""
        if cls.__read_sql_with_offset is None:
            # sql deps are optional, so import only when needed
            from modin.experimental.core.io.sql.utils import read_sql_with_offset

            cls.__read_sql_with_offset = cls.put(read_sql_with_offset)
        return cls.__read_sql_with_offset

    @classmethod
    def _read(
        cls,
        sql,
        con,
        index_col,
        coerce_float,
        params,
        parse_dates,
        columns,
        chunksize,
        dtype_backend,
        dtype,
        partition_column,
        lower_bound,
        upper_bound,
        max_sessions,
    ):  # noqa: PR01
        """
        Read SQL query or database table into a DataFrame.

        Documentation for parameters can be found at `modin.read_sql`.

        Returns
        -------
        BaseQueryCompiler
            A new query compiler with imported data for further processing.
        """
        # sql deps are optional, so import only when needed
        from modin.experimental.core.io.sql.utils import get_query_info, is_distributed

        if not is_distributed(partition_column, lower_bound, upper_bound):
            message = "Defaulting to Modin core implementation; \
                'partition_column', 'lower_bound', 'upper_bound' must be different from None"
            warnings.warn(message)
            return cls.base_read(
                sql,
                con,
                index_col,
                coerce_float=coerce_float,
                params=params,
                parse_dates=parse_dates,
                columns=columns,
                chunksize=chunksize,
                dtype_backend=dtype_backend,
                dtype=dtype,
            )
        #  starts the distributed alternative
        cols_names, query = get_query_info(sql, con, partition_column)
        num_parts = min(NPartitions.get(), max_sessions if max_sessions else 1)
        num_splits = min(len(cols_names), num_parts)
        diff = (upper_bound - lower_bound) + 1
        min_size = diff // num_parts
        rest = diff % num_parts
        partition_ids = []
        index_ids = []
        end = lower_bound - 1
        func = cls.preprocess_func()
        for part in range(num_parts):
            if rest:
                size = min_size + 1
                rest -= 1
            else:
                size = min_size
            start = end + 1
            end = start + size - 1
            partition_id = cls.deploy(
                func,
                f_args=(
                    partition_column,
                    start,
                    end,
                    num_splits,
                    query,
                    con,
                    index_col,
                    coerce_float,
                    params,
                    parse_dates,
                    columns,
                    chunksize,
                    dtype_backend,
                    dtype,
                ),
                num_returns=num_splits + 1,
            )
            partition_ids.append(
                [cls.frame_partition_cls(obj) for obj in partition_id[:-1]]
            )
            index_ids.append(partition_id[-1])
        new_index = pandas.RangeIndex(sum(cls.materialize(index_ids)))
        new_query_compiler = cls.query_compiler_cls(
            cls.frame_cls(np.array(partition_ids), new_index, cols_names)
        )
        new_query_compiler._modin_frame.synchronize_labels(axis=0)
        return new_query_compiler


================================================
FILE: modin/experimental/core/io/sql/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Utilities for experimental SQL format type IO functions implementations."""

import pandas
import pandas._libs.lib as lib
from sqlalchemy import MetaData, Table, create_engine, inspect, text

from modin.core.storage_formats.pandas.parsers import _split_result_for_readers


def is_distributed(partition_column, lower_bound, upper_bound):
    """
    Check if is possible to distribute a query with the given args.

    Parameters
    ----------
    partition_column : str
        Column name used for data partitioning between the workers.
    lower_bound : int
        The minimum value to be requested from the `partition_column`.
    upper_bound : int
        The maximum value to be requested from the `partition_column`.

    Returns
    -------
    bool
        Whether the given query is distributable or not.
    """
    if (
        (partition_column is not None)
        and (lower_bound is not None)
        and (upper_bound is not None)
    ):
        if upper_bound > lower_bound:
            return True
        raise InvalidArguments("upper_bound must be greater than lower_bound.")
    elif (partition_column is None) and (lower_bound is None) and (upper_bound is None):
        return False
    else:
        raise InvalidArguments(
            "Invalid combination of partition_column, lower_bound, upper_bound."
            + "All these arguments should be passed (distributed) or none of them (standard pandas)."
        )


def is_table(engine, sql):
    """
    Check if given `sql` parameter is a table name.

    Parameters
    ----------
    engine : sqlalchemy.engine.base.Engine
        SQLAlchemy connection engine.
    sql : str
        SQL query to be executed or a table name.

    Returns
    -------
    bool
        Whether `sql` a table name or not.
    """
    return inspect(engine).has_table(sql)


def get_table_metadata(engine, table):
    """
    Extract all useful data from the given table.

    Parameters
    ----------
    engine : sqlalchemy.engine.base.Engine
        SQLAlchemy connection engine.
    table : str
        Table name.

    Returns
    -------
    sqlalchemy.sql.schema.Table
        Extracted metadata.
    """
    metadata = MetaData()
    metadata.reflect(bind=engine, only=[table])
    table_metadata = Table(table, metadata, autoload=True)
    return table_metadata


def get_table_columns(metadata):
    """
    Extract columns names and python types from the `metadata`.

    Parameters
    ----------
    metadata : sqlalchemy.sql.schema.Table
        Table metadata.

    Returns
    -------
    dict
        Dictionary with columns names and python types.
    """
    cols = dict()
    for col in metadata.c:
        name = str(col).rpartition(".")[2]
        cols[name] = col.type.python_type.__name__
    return cols


def build_query_from_table(name):
    """
    Create a query from the given table name.

    Parameters
    ----------
    name : str
        Table name.

    Returns
    -------
    str
        Query string.
    """
    return "SELECT * FROM {0}".format(name)


def check_query(query):
    """
    Check query sanity.

    Parameters
    ----------
    query : str
        Query string.
    """
    q = query.lower()
    if "select " not in q:
        raise InvalidQuery("SELECT word not found in the query: {0}".format(query))
    if " from " not in q:
        raise InvalidQuery("FROM word not found in the query: {0}".format(query))


def get_query_columns(engine, query):
    """
    Extract columns names and python types from the `query`.

    Parameters
    ----------
    engine : sqlalchemy.engine.base.Engine
        SQLAlchemy connection engine.
    query : str
        SQL query.

    Returns
    -------
    dict
        Dictionary with columns names and python types.
    """
    con = engine.connect()
    result = con.execute(text(query))
    cols_names = list(result.keys())
    values = list(result.first())
    cols = dict()
    for i in range(len(cols_names)):
        cols[cols_names[i]] = type(values[i]).__name__
    return cols


def check_partition_column(partition_column, cols):
    """
    Check `partition_column` existence and it's type.

    Parameters
    ----------
    partition_column : str
        Column name used for data partitioning between the workers.
    cols : dict
        Dictionary with columns names and python types.
    """
    for k, v in cols.items():
        if k == partition_column:
            if v == "int":
                return
            raise InvalidPartitionColumn(f"partition_column must be int, and not {v}")
    raise InvalidPartitionColumn(
        f"partition_column {partition_column} not found in the query"
    )


def get_query_info(sql, con, partition_column):
    """
    Compute metadata needed for query distribution.

    Parameters
    ----------
    sql : str
        SQL query to be executed or a table name.
    con : SQLAlchemy connectable or str
        Database connection or url string.
    partition_column : str
        Column name used for data partitioning between the workers.

    Returns
    -------
    list
        Columns names list.
    str
        Query string.
    """
    engine = create_engine(con)
    if is_table(engine, sql):
        table_metadata = get_table_metadata(engine, sql)
        query = build_query_from_table(sql)
        cols = get_table_columns(table_metadata)
    else:
        check_query(sql)
        query = sql.replace(";", "")
        cols = get_query_columns(engine, query)
    # TODO allow validation that takes into account edge cases of pandas e.g. "[index]"
    # check_partition_column(partition_column, cols)
    # TODO partition_column isn't used; we need to use it;
    return list(cols.keys()), query


def query_put_bounders(query, partition_column, start, end):  # pragma: no cover
    """
    Put partition boundaries into the query.

    Parameters
    ----------
    query : str
        SQL query string.
    partition_column : str
        Column name used for data partitioning between the workers.
    start : int
        Lowest value to request from the `partition_column`.
    end : int
        Highest value to request from the `partition_column`.

    Returns
    -------
    str
        Query string with boundaries.
    """
    where = " WHERE TMP_TABLE.{0} >= {1} AND TMP_TABLE.{0} <= {2}".format(
        partition_column, start, end
    )
    query_with_bounders = "SELECT * FROM ({0}) AS TMP_TABLE {1}".format(query, where)
    return query_with_bounders


class InvalidArguments(Exception):
    """Exception that should be raised if invalid arguments combination was found."""


class InvalidQuery(Exception):
    """Exception that should be raised if invalid query statement was found."""


class InvalidPartitionColumn(Exception):
    """Exception that should be raised if `partition_column` doesn't satisfy predefined requirements."""


def read_sql_with_offset(
    partition_column,
    start,
    end,
    num_splits,
    sql,
    con,
    index_col=None,
    coerce_float=True,
    params=None,
    parse_dates=None,
    columns=None,
    chunksize=None,
    dtype_backend=lib.no_default,
    dtype=None,
):  # pragma: no cover
    """
    Read a chunk of SQL query or table into a pandas DataFrame.

    Parameters
    ----------
    partition_column : str
        Column name used for data partitioning between the workers.
    start : int
        Lowest value to request from the `partition_column`.
    end : int
        Highest value to request from the `partition_column`.
    num_splits : int
        The number of partitions to split the column into.
    sql : str or SQLAlchemy Selectable (select or text object)
        SQL query to be executed or a table name.
    con : SQLAlchemy connectable or str
        Connection to database (sqlite3 connections are not supported).
    index_col : str or list of str, optional
        Column(s) to set as index(MultiIndex).
    coerce_float : bool, default: True
        Attempts to convert values of non-string, non-numeric objects
        (like decimal.Decimal) to floating point, useful for SQL result sets.
    params : list, tuple or dict, optional
        List of parameters to pass to ``execute`` method. The syntax used
        to pass parameters is database driver dependent. Check your
        database driver documentation for which of the five syntax styles,
        described in PEP 249's paramstyle, is supported.
    parse_dates : list or dict, optional
        The behavior is as follows:

        - List of column names to parse as dates.
        - Dict of `{column_name: format string}` where format string is
          strftime compatible in case of parsing string times, or is one of
          (D, s, ns, ms, us) in case of parsing integer timestamps.
        - Dict of `{column_name: arg dict}`, where the arg dict corresponds
          to the keyword arguments of ``pandas.to_datetime``
          Especially useful with databases without native Datetime support,
          such as SQLite.
    columns : list, optional
        List of column names to select from SQL table (only used when reading a
        table).
    chunksize : int, optional
        If specified, return an iterator where `chunksize` is the number of rows
        to include in each chunk.
    dtype_backend : {"numpy_nullable", "pyarrow"}, default: NumPy backed DataFrames
        Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,
        nullable dtypes are used for all dtypes that have a nullable implementation when
        "numpy_nullable" is set, PyArrow is used for all dtypes if "pyarrow" is set.
        The dtype_backends are still experimential.
    dtype : Type name or dict of columns, optional
        Data type for data or columns. E.g. np.float64 or {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query.

    Returns
    -------
    list
        List with split read results and it's metadata (index, dtypes, etc.).
    """
    query_with_bounders = query_put_bounders(sql, partition_column, start, end)
    pandas_df = pandas.read_sql(
        query_with_bounders,
        con,
        index_col=index_col,
        coerce_float=coerce_float,
        params=params,
        parse_dates=parse_dates,
        columns=columns,
        chunksize=chunksize,
        dtype_backend=dtype_backend,
        dtype=dtype,
    )
    index = len(pandas_df)
    return _split_result_for_readers(1, num_splits, pandas_df) + [index]


================================================
FILE: modin/experimental/core/io/text/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental text format type IO functions implementations."""


================================================
FILE: modin/experimental/core/io/text/csv_glob_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `ExperimentalCSVGlobDispatcher` class, that is used for reading multiple `.csv` files simultaneously."""

import csv
import glob
import os
import warnings
from contextlib import ExitStack
from typing import List, Tuple

import fsspec
import pandas
import pandas._libs.lib as lib
from pandas.io.common import is_fsspec_url, is_url, stringify_path

from modin.config import NPartitions
from modin.core.io.file_dispatcher import OpenFile
from modin.core.io.text.csv_dispatcher import CSVDispatcher


class ExperimentalCSVGlobDispatcher(CSVDispatcher):
    """Class contains utils for reading multiple `.csv` files simultaneously."""

    @classmethod
    def _read(cls, filepath_or_buffer, **kwargs):
        """
        Read data from multiple `.csv` files passed with `filepath_or_buffer` simultaneously.

        Parameters
        ----------
        filepath_or_buffer : str, path object or file-like object
            `filepath_or_buffer` parameter of ``read_csv`` function.
        **kwargs : dict
            Parameters of ``read_csv`` function.

        Returns
        -------
        new_query_compiler : BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        # Ensures that the file is a string file path. Otherwise, default to pandas.
        filepath_or_buffer = cls.get_path_or_buffer(stringify_path(filepath_or_buffer))
        if isinstance(filepath_or_buffer, str):
            # os.altsep == None on Linux
            is_folder = any(
                filepath_or_buffer.endswith(sep) for sep in (os.sep, os.altsep) if sep
            )
            if "*" not in filepath_or_buffer and not is_folder:
                warnings.warn(
                    "Shell-style wildcard '*' must be in the filename pattern in order to read multiple "
                    + f"files at once. Did you forget it? Passed filename: '{filepath_or_buffer}'"
                )
            if not cls.file_exists(filepath_or_buffer, kwargs.get("storage_options")):
                return cls.single_worker_read(
                    filepath_or_buffer,
                    reason=cls._file_not_found_msg(filepath_or_buffer),
                    **kwargs,
                )
            filepath_or_buffer = cls.get_path(
                filepath_or_buffer, kwargs.get("storage_options")
            )
        elif not cls.pathlib_or_pypath(filepath_or_buffer):
            return cls.single_worker_read(
                filepath_or_buffer,
                reason=cls.BUFFER_UNSUPPORTED_MSG,
                **kwargs,
            )

        # We read multiple csv files when the file path is a list of absolute file paths. We assume that all of the files will be essentially replicas of the
        # first file but with different data values.
        glob_filepaths = filepath_or_buffer
        filepath_or_buffer = filepath_or_buffer[0]

        compression_type = cls.infer_compression(
            filepath_or_buffer, kwargs.get("compression")
        )

        chunksize = kwargs.get("chunksize")
        if chunksize is not None:
            return cls.single_worker_read(
                filepath_or_buffer,
                reason="`chunksize` parameter is not supported",
                **kwargs,
            )

        skiprows = kwargs.get("skiprows")
        if skiprows is not None and not isinstance(skiprows, int):
            return cls.single_worker_read(
                filepath_or_buffer,
                reason="Non-integer `skiprows` value not supported",
                **kwargs,
            )

        nrows = kwargs.pop("nrows", None)
        names = kwargs.get("names", lib.no_default)
        index_col = kwargs.get("index_col", None)
        usecols = kwargs.get("usecols", None)
        encoding = kwargs.get("encoding", None)
        if names in [lib.no_default, None]:
            # For the sake of the empty df, we assume no `index_col` to get the correct
            # column names before we build the index. Because we pass `names` in, this
            # step has to happen without removing the `index_col` otherwise it will not
            # be assigned correctly.
            names = pandas.read_csv(
                filepath_or_buffer,
                **dict(kwargs, usecols=None, nrows=0, skipfooter=0, index_col=None),
            ).columns
        elif index_col is None and not usecols:
            # When names is set to some list that is smaller than the number of columns
            # in the file, the first columns are built as a hierarchical index.
            empty_pd_df = pandas.read_csv(
                filepath_or_buffer, nrows=0, encoding=encoding
            )
            num_cols = len(empty_pd_df.columns)
            if num_cols > len(names):
                index_col = list(range(num_cols - len(names)))
                if len(index_col) == 1:
                    index_col = index_col[0]
                kwargs["index_col"] = index_col
        pd_df_metadata = pandas.read_csv(
            filepath_or_buffer, **dict(kwargs, nrows=1, skipfooter=0)
        )
        column_names = pd_df_metadata.columns
        skipfooter = kwargs.get("skipfooter", None)
        skiprows = kwargs.pop("skiprows", None)
        usecols_md = cls._validate_usecols_arg(usecols)
        if usecols is not None and usecols_md[1] != "integer":
            del kwargs["usecols"]
            all_cols = pandas.read_csv(
                filepath_or_buffer,
                **dict(kwargs, nrows=0, skipfooter=0),
            ).columns
            usecols = all_cols.get_indexer_for(list(usecols_md[0]))
        parse_dates = kwargs.pop("parse_dates", False)
        partition_kwargs = dict(
            kwargs,
            header=None,
            names=names,
            skipfooter=0,
            skiprows=None,
            parse_dates=parse_dates,
            usecols=usecols,
        )
        encoding = kwargs.get("encoding", None)
        quotechar = kwargs.get("quotechar", '"').encode(
            encoding if encoding is not None else "UTF-8"
        )
        is_quoting = kwargs.get("quoting", "") != csv.QUOTE_NONE

        with ExitStack() as stack:
            files = [
                stack.enter_context(
                    OpenFile(
                        fname,
                        "rb",
                        compression_type,
                        **(kwargs.get("storage_options", None) or {}),
                    )
                )
                for fname in glob_filepaths
            ]

            # Skip the header since we already have the header information and skip the
            # rows we are told to skip.
            if isinstance(skiprows, int) or skiprows is None:
                if skiprows is None:
                    skiprows = 0
                header = kwargs.get("header", "infer")
                if header == "infer" and kwargs.get("names", lib.no_default) in [
                    lib.no_default,
                    None,
                ]:
                    skip_header = 1
                elif isinstance(header, int):
                    skip_header = header + 1
                elif hasattr(header, "__iter__") and not isinstance(header, str):
                    skip_header = max(header) + 1
                else:
                    skip_header = 0
            if kwargs.get("encoding", None) is not None:
                partition_kwargs["skiprows"] = 1
            # Launch tasks to read partitions
            column_widths, num_splits = cls._define_metadata(
                pd_df_metadata, column_names
            )

            args = {
                "num_splits": num_splits,
                **partition_kwargs,
            }

            splits = cls.partitioned_file(
                files,
                glob_filepaths,
                num_partitions=NPartitions.get(),
                nrows=nrows,
                skiprows=skiprows,
                skip_header=skip_header,
                quotechar=quotechar,
                is_quoting=is_quoting,
            )
            partition_ids = [None] * len(splits)
            index_ids = [None] * len(splits)
            dtypes_ids = [None] * len(splits)
            for idx, chunks in enumerate(splits):
                args.update({"chunks": chunks})
                *partition_ids[idx], index_ids[idx], dtypes_ids[idx] = cls.deploy(
                    func=cls.parse,
                    f_kwargs=args,
                    num_returns=num_splits + 2,
                )

        # Compute the index based on a sum of the lengths of each partition (by default)
        # or based on the column(s) that were requested.
        if index_col is None:
            row_lengths = cls.materialize(index_ids)
            new_index = pandas.RangeIndex(sum(row_lengths))
        else:
            index_objs = cls.materialize(index_ids)
            row_lengths = [len(o) for o in index_objs]
            new_index = index_objs[0].append(index_objs[1:])
            new_index.name = pd_df_metadata.index.name

        partition_ids = cls.build_partition(partition_ids, row_lengths, column_widths)

        # Compute dtypes by getting collecting and combining all of the partitions. The
        # reported dtypes from differing rows can be different based on the inference in
        # the limited data seen by each worker. We use pandas to compute the exact dtype
        # over the whole column for each column. The index is set below.
        dtypes = cls.get_dtypes(dtypes_ids, column_names)

        new_frame = cls.frame_cls(
            partition_ids,
            new_index,
            column_names,
            row_lengths,
            column_widths,
            dtypes=dtypes,
        )
        new_query_compiler = cls.query_compiler_cls(new_frame)

        if skipfooter:
            new_query_compiler = new_query_compiler.drop(
                new_query_compiler.index[-skipfooter:]
            )
        if kwargs.get("squeeze", False) and len(new_query_compiler.columns) == 1:
            return new_query_compiler[new_query_compiler.columns[0]]
        if index_col is None:
            new_query_compiler._modin_frame.synchronize_labels(axis=0)
        return new_query_compiler

    @classmethod
    def file_exists(cls, file_path: str, storage_options=None) -> bool:
        """
        Check if the `file_path` is valid.

        Parameters
        ----------
        file_path : str
            String representing a path.
        storage_options : dict, optional
            Keyword from `read_*` functions.

        Returns
        -------
        bool
            True if the path is valid.
        """
        if is_url(file_path):
            raise NotImplementedError("`read_csv_glob` does not support urllib paths.")

        if not is_fsspec_url(file_path):
            return len(glob.glob(file_path)) > 0

        try:
            from botocore.exceptions import (
                ConnectTimeoutError,
                EndpointConnectionError,
                NoCredentialsError,
            )

            credential_error_type = (
                NoCredentialsError,
                PermissionError,
                EndpointConnectionError,
                ConnectTimeoutError,
            )
        except ModuleNotFoundError:
            credential_error_type = (PermissionError,)

        if storage_options is not None:
            new_storage_options = dict(storage_options)
            new_storage_options.pop("anon", None)
        else:
            new_storage_options = {}

        fs, _ = fsspec.core.url_to_fs(file_path, **new_storage_options)
        exists = False
        try:
            exists = fs.exists(file_path)
        except credential_error_type:
            fs, _ = fsspec.core.url_to_fs(file_path, anon=True, **new_storage_options)
            exists = fs.exists(file_path)
        return exists or len(fs.glob(file_path)) > 0

    @classmethod
    def get_path(cls, file_path: str, storage_options=None) -> list:
        """
        Return the path of the file(s).

        Parameters
        ----------
        file_path : str
            String representing a path.
        storage_options : dict, optional
            Keyword from `read_*` functions.

        Returns
        -------
        list
            List of strings of absolute file paths.
        """
        if not is_fsspec_url(file_path) and not is_url(file_path):
            relative_paths = glob.glob(file_path)
            abs_paths = [os.path.abspath(path) for path in relative_paths]
            return abs_paths

        try:
            from botocore.exceptions import (
                ConnectTimeoutError,
                EndpointConnectionError,
                NoCredentialsError,
            )

            credential_error_type = (
                NoCredentialsError,
                PermissionError,
                EndpointConnectionError,
                ConnectTimeoutError,
            )
        except ModuleNotFoundError:
            credential_error_type = (PermissionError,)

        def get_file_path(fs_handle) -> List[str]:
            if "*" in file_path:
                file_paths = fs_handle.glob(file_path)
            else:
                file_paths = [
                    f
                    for f in fs_handle.find(file_path)
                    if not f.endswith("/")  # exclude folder
                ]
            if len(file_paths) == 0 and not fs_handle.exists(file_path):
                raise FileNotFoundError(f"Path <{file_path}> isn't available.")
            fs_addresses = [fs_handle.unstrip_protocol(path) for path in file_paths]
            return fs_addresses

        if storage_options is not None:
            new_storage_options = dict(storage_options)
            new_storage_options.pop("anon", None)
        else:
            new_storage_options = {}

        fs, _ = fsspec.core.url_to_fs(file_path, **new_storage_options)
        try:
            return get_file_path(fs)
        except credential_error_type:
            fs, _ = fsspec.core.url_to_fs(file_path, anon=True, **new_storage_options)
        return get_file_path(fs)

    @classmethod
    def partitioned_file(
        cls,
        files,
        fnames: List[str],
        num_partitions: int = None,
        nrows: int = None,
        skiprows: int = None,
        skip_header: int = None,
        quotechar: bytes = b'"',
        is_quoting: bool = True,
    ) -> List[List[Tuple[str, int, int]]]:
        """
        Compute chunk sizes in bytes for every partition.

        Parameters
        ----------
        files : file or list of files
            File(s) to be partitioned.
        fnames : str or list of str
            File name(s) to be partitioned.
        num_partitions : int, optional
            For what number of partitions split a file.
            If not specified grabs the value from `modin.config.NPartitions.get()`.
        nrows : int, optional
            Number of rows of file to read.
        skiprows : int, optional
            Specifies rows to skip.
        skip_header : int, optional
            Specifies header rows to skip.
        quotechar : bytes, default: b'"'
            Indicate quote in a file.
        is_quoting : bool, default: True
            Whether or not to consider quotes.

        Returns
        -------
        list
            List, where each element of the list is a list of tuples. The inner lists
            of tuples contains the data file name of the chunk, chunk start offset, and
            chunk end offsets for its corresponding file.

        Notes
        -----
        The logic gets really complicated if we try to use the `TextFileDispatcher.partitioned_file`.
        """
        if type(files) is not list:
            files = [files]

        if num_partitions is None:
            num_partitions = NPartitions.get()

        file_sizes = [cls.file_size(f) for f in files]
        partition_size = max(
            1, num_partitions, (nrows if nrows else sum(file_sizes)) // num_partitions
        )

        result = []
        split_result = []
        split_size = 0
        read_rows_counter = 0
        for f, fname, f_size in zip(files, fnames, file_sizes):
            if skiprows or skip_header:
                skip_amount = (skiprows if skiprows else 0) + (
                    skip_header if skip_header else 0
                )

                # TODO(williamma12): Handle when skiprows > number of rows in file. Currently returns empty df.
                outside_quotes, read_rows = cls._read_rows(
                    f,
                    nrows=skip_amount,
                    quotechar=quotechar,
                    is_quoting=is_quoting,
                )
                if skiprows:
                    skiprows -= read_rows
                    if skiprows > 0:
                        # We have more rows to skip than the amount read in the file.
                        continue

            start = f.tell()

            while f.tell() < f_size:
                if split_size >= partition_size:
                    # Create a new split when the split has reached partition_size.
                    # This is mainly used when we are reading row-wise partitioned files.
                    result.append(split_result)
                    split_result = []
                    split_size = 0

                # We calculate the amount that we need to read based off of how much of the split we have already read.
                read_size = partition_size - split_size

                if nrows:
                    if read_rows_counter >= nrows:
                        # # Finish when we have read enough rows.
                        if len(split_result) > 0:
                            # Add last split into the result.
                            result.append(split_result)
                        return result
                    elif read_rows_counter + read_size > nrows:
                        # Ensure that we will not read more than nrows.
                        read_size = nrows - read_rows_counter

                    outside_quotes, read_rows = cls._read_rows(
                        f,
                        nrows=read_size,
                        quotechar=quotechar,
                        is_quoting=is_quoting,
                    )
                    split_size += read_rows
                    read_rows_counter += read_rows
                else:
                    outside_quotes = cls.offset(
                        f,
                        offset_size=read_size,
                        quotechar=quotechar,
                        is_quoting=is_quoting,
                    )

                split_result.append((fname, start, f.tell()))
                split_size += f.tell() - start
                start = f.tell()

                # Add outside_quotes.
                if is_quoting and not outside_quotes:
                    warnings.warn("File has mismatched quotes")

        # Add last split into the result.
        if len(split_result) > 0:
            result.append(split_result)

        return result


================================================
FILE: modin/experimental/core/io/text/custom_text_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `ExperimentalCustomTextDispatcher` class, that is used for reading custom text files."""

import pandas
from pandas.io.common import stringify_path

from modin.config import NPartitions
from modin.core.io.file_dispatcher import OpenFile
from modin.core.io.text.text_file_dispatcher import TextFileDispatcher


class ExperimentalCustomTextDispatcher(TextFileDispatcher):
    """Class handles utils for reading custom text files."""

    @classmethod
    def _read(cls, filepath_or_buffer, columns, custom_parser, **kwargs):
        r"""
        Read data from `filepath_or_buffer` according to the passed `read_custom_text` `kwargs` parameters.

        Parameters
        ----------
        filepath_or_buffer : str, path object or file-like object
            `filepath_or_buffer` parameter of `read_custom_text` function.
        columns : list or callable(file-like object, \*\*kwargs -> list
            Column names of list type or callable that create column names from opened file
            and passed `kwargs`.
        custom_parser : callable(file-like object, \*\*kwargs -> pandas.DataFrame
            Function that takes as input a part of the `filepath_or_buffer` file loaded into
            memory in file-like object form.
        **kwargs : dict
            Parameters of `read_custom_text` function.

        Returns
        -------
        BaseQueryCompiler
            Query compiler with imported data for further processing.
        """
        filepath_or_buffer = stringify_path(filepath_or_buffer)
        filepath_or_buffer_md = (
            cls.get_path(filepath_or_buffer)
            if isinstance(filepath_or_buffer, str)
            else cls.get_path_or_buffer(filepath_or_buffer)
        )
        compression_infered = cls.infer_compression(
            filepath_or_buffer, kwargs["compression"]
        )

        with OpenFile(filepath_or_buffer_md, "rb", compression_infered) as f:
            splits, _ = cls.partitioned_file(
                f,
                num_partitions=NPartitions.get(),
                is_quoting=kwargs.pop("is_quoting"),
                nrows=kwargs["nrows"],
            )

        if callable(columns):
            with OpenFile(filepath_or_buffer_md, "rb", compression_infered) as f:
                columns = columns(f, **kwargs)
        if not isinstance(columns, pandas.Index):
            columns = pandas.Index(columns)

        empty_pd_df = pandas.DataFrame(columns=columns)
        index_name = empty_pd_df.index.name
        column_widths, num_splits = cls._define_metadata(empty_pd_df, columns)

        # kwargs that will be passed to the workers
        partition_kwargs = dict(
            kwargs,
            fname=filepath_or_buffer_md,
            num_splits=num_splits,
            nrows=None,
            compression=compression_infered,
        )

        partition_ids, index_ids, dtypes_ids = cls._launch_tasks(
            splits, callback=custom_parser, **partition_kwargs
        )

        new_query_compiler = cls._get_new_qc(
            partition_ids=partition_ids,
            index_ids=index_ids,
            dtypes_ids=dtypes_ids,
            index_col=None,
            index_name=index_name,
            column_widths=column_widths,
            column_names=columns,
            nrows=kwargs["nrows"],
        )
        return new_query_compiler


================================================
FILE: modin/experimental/core/storage_formats/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Experimental functionality related to storage formats supported."""


================================================
FILE: modin/experimental/core/storage_formats/pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""The module represents the query compiler level for the pandas storage format (experimental)."""


================================================
FILE: modin/experimental/core/storage_formats/pandas/parsers.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


"""Module houses experimental Modin parser classes, that are used for data parsing on the workers."""

import warnings
from io import BytesIO

import pandas
from pandas.util._decorators import doc

from modin.core.io.file_dispatcher import OpenFile
from modin.core.storage_formats.pandas.parsers import (
    PandasCSVParser,
    PandasParser,
    _doc_pandas_parser_class,
    _doc_parse_func,
    _doc_parse_parameters_common,
    _split_result_for_readers,
)


@doc(_doc_pandas_parser_class, data_type="multiple CSV files simultaneously")
class ExperimentalPandasCSVGlobParser(PandasCSVParser):
    @staticmethod
    @doc(
        _doc_parse_func,
        parameters="""chunks : list
    List, where each element of the list is a list of tuples. The inner lists
    of tuples contains the data file name of the chunk, chunk start offset, and
    chunk end offsets for its corresponding file.""",
    )
    def parse(chunks, **kwargs):
        warnings.filterwarnings("ignore")
        num_splits = kwargs.pop("num_splits", None)
        index_col = kwargs.get("index_col", None)

        # `single_worker_read` just pass filename via chunks; need check
        if isinstance(chunks, str):
            return pandas.read_csv(chunks, **kwargs)

        # pop `compression` from kwargs because `bio` below is uncompressed
        compression = kwargs.pop("compression", "infer")
        storage_options = kwargs.pop("storage_options", None) or {}
        pandas_dfs = []
        for fname, start, end in chunks:
            if start is not None and end is not None:
                with OpenFile(fname, "rb", compression, **storage_options) as bio:
                    if kwargs.get("encoding", None) is not None:
                        header = b"" + bio.readline()
                    else:
                        header = b""
                    bio.seek(start)
                    to_read = header + bio.read(end - start)
                pandas_dfs.append(pandas.read_csv(BytesIO(to_read), **kwargs))
            else:
                # This only happens when we are reading with only one worker (Default)
                return pandas.read_csv(
                    fname,
                    compression=compression,
                    storage_options=storage_options,
                    **kwargs,
                )

        # Combine read in data.
        if len(pandas_dfs) > 1:
            pandas_df = pandas.concat(pandas_dfs)
        elif len(pandas_dfs) > 0:
            pandas_df = pandas_dfs[0]
        else:
            pandas_df = pandas.DataFrame()

        # Set internal index.
        if index_col is not None:
            index = pandas_df.index
        else:
            # The lengths will become the RangeIndex
            index = len(pandas_df)
        return _split_result_for_readers(1, num_splits, pandas_df) + [
            index,
            pandas_df.dtypes,
        ]


@doc(_doc_pandas_parser_class, data_type="pickled pandas objects")
class ExperimentalPandasPickleParser(PandasParser):
    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)
    def parse(fname, **kwargs):
        warnings.filterwarnings("ignore")
        num_splits = 1
        single_worker_read = kwargs.pop("single_worker_read", None)
        df = pandas.read_pickle(fname, **kwargs)
        if single_worker_read:
            return df
        assert isinstance(
            df, pandas.DataFrame
        ), f"Pickled obj type: [{type(df)}] in [{fname}]; works only with pandas.DataFrame"

        length = len(df)
        width = len(df.columns)

        return _split_result_for_readers(1, num_splits, df) + [length, width]


@doc(_doc_pandas_parser_class, data_type="parquet files")
class ExperimentalPandasParquetParser(PandasParser):
    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)
    def parse(fname, **kwargs):
        warnings.filterwarnings("ignore")
        num_splits = 1
        single_worker_read = kwargs.pop("single_worker_read", None)
        df = pandas.read_parquet(fname, **kwargs)
        if single_worker_read:
            return df

        length = len(df)
        width = len(df.columns)

        return _split_result_for_readers(1, num_splits, df) + [length, width]


@doc(_doc_pandas_parser_class, data_type="json files")
class ExperimentalPandasJsonParser(PandasParser):
    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)
    def parse(fname, **kwargs):
        warnings.filterwarnings("ignore")
        num_splits = 1
        single_worker_read = kwargs.pop("single_worker_read", None)
        df = pandas.read_json(fname, **kwargs)
        if single_worker_read:
            return df

        length = len(df)
        width = len(df.columns)

        return _split_result_for_readers(1, num_splits, df) + [length, width]


@doc(_doc_pandas_parser_class, data_type="XML files")
class ExperimentalPandasXmlParser(PandasParser):
    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)
    def parse(fname, **kwargs):
        warnings.filterwarnings("ignore")
        num_splits = 1
        single_worker_read = kwargs.pop("single_worker_read", None)
        df = pandas.read_xml(fname, **kwargs)
        if single_worker_read:
            return df

        length = len(df)
        width = len(df.columns)

        return _split_result_for_readers(1, num_splits, df) + [length, width]


@doc(_doc_pandas_parser_class, data_type="custom text")
class ExperimentalCustomTextParser(PandasParser):
    @staticmethod
    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)
    def parse(fname, **kwargs):
        return PandasParser.generic_parse(fname, **kwargs)


================================================
FILE: modin/experimental/fuzzydata/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module holds experimental fuzzydata specific functionality for Modin."""


================================================
FILE: modin/experimental/pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
The main module through which interaction with the experimental API takes place.

See `Experimental API Reference` for details.

Notes
-----
* Some of experimental APIs deviate from pandas in order to provide improved
  performance.

* Although the use of experimental storage formats and engines is available through the
  `modin.pandas` module when defining environment variable `MODIN_EXPERIMENTAL=true`,
  the use of experimental I/O functions is available only through the
  `modin.experimental.pandas` module.

Examples
--------
>>> import modin.experimental.pandas as pd
>>> df = pd.read_csv_glob("data*.csv")
"""

from modin.pandas import *  # noqa F401, F403

from .io import (  # noqa F401
    read_csv_glob,
    read_custom_text,
    read_json_glob,
    read_parquet_glob,
    read_pickle_glob,
    read_sql,
    read_xml_glob,
)


================================================
FILE: modin/experimental/pandas/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement experimental I/O public API."""

from __future__ import annotations

import inspect
import pathlib
import pickle
from typing import IO, AnyStr, Callable, Iterator, Literal, Optional, Union

import pandas
import pandas._libs.lib as lib
from pandas._typing import CompressionOptions, DtypeArg, DtypeBackend, StorageOptions

from modin.core.storage_formats import BaseQueryCompiler
from modin.utils import expanduser_path_arg

from . import DataFrame


def read_sql(
    sql,
    con,
    index_col=None,
    coerce_float=True,
    params=None,
    parse_dates=None,
    columns=None,
    chunksize=None,
    dtype_backend=lib.no_default,
    dtype=None,
    partition_column: Optional[str] = None,
    lower_bound: Optional[int] = None,
    upper_bound: Optional[int] = None,
    max_sessions: Optional[int] = None,
) -> Union[DataFrame, Iterator[DataFrame]]:
    """
    General documentation is available in `modin.pandas.read_sql`.

    This experimental feature provides distributed reading from a sql file.
    The function extended with `Spark-like parameters <https://spark.apache.org/docs/2.0.0/api/R/read.jdbc.html>`_
    such as ``partition_column``, ``lower_bound`` and ``upper_bound``. With these
    parameters, the user will be able to specify how to partition the imported data.

    Parameters
    ----------
    sql : str or SQLAlchemy Selectable (select or text object)
        SQL query to be executed or a table name.
    con : SQLAlchemy connectable, str, or sqlite3 connection
        Using SQLAlchemy makes it possible to use any DB supported by that
        library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible
        for engine disposal and connection closure for the SQLAlchemy
        connectable; str connections are closed automatically. See
        `here <https://docs.sqlalchemy.org/en/13/core/connections.html>`_.
    index_col : str or list of str, optional
        Column(s) to set as index(MultiIndex).
    coerce_float : bool, default: True
        Attempts to convert values of non-string, non-numeric objects (like
        decimal.Decimal) to floating point, useful for SQL result sets.
    params : list, tuple or dict, optional
        List of parameters to pass to execute method. The syntax used to pass
        parameters is database driver dependent. Check your database driver
        documentation for which of the five syntax styles, described in PEP 249's
        paramstyle, is supported. Eg. for psycopg2, uses %(name)s so use params=
        {'name' : 'value'}.
    parse_dates : list or dict, optional
        - List of column names to parse as dates.
        - Dict of ``{column_name: format string}`` where format string is
          strftime compatible in case of parsing string times, or is one of
          (D, s, ns, ms, us) in case of parsing integer timestamps.
        - Dict of ``{column_name: arg dict}``, where the arg dict corresponds
          to the keyword arguments of :func:`pandas.to_datetime`
          Especially useful with databases without native Datetime support,
          such as SQLite.
    columns : list, optional
        List of column names to select from SQL table (only used when reading
        a table).
    chunksize : int, optional
        If specified, return an iterator where `chunksize` is the
        number of rows to include in each chunk.
    dtype_backend : {"numpy_nullable", "pyarrow"}, default: NumPy backed DataFrames
        Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,
        nullable dtypes are used for all dtypes that have a nullable implementation when
        "numpy_nullable" is set, PyArrow is used for all dtypes if "pyarrow" is set.
        The dtype_backends are still experimential.
    dtype : Type name or dict of columns, optional
        Data type for data or columns. E.g. np.float64 or {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query.
    partition_column : str, optional
        Column used to share the data between the workers (MUST be a INTEGER column).
    lower_bound : int, optional
        The minimum value to be requested from the partition_column.
    upper_bound : int, optional
        The maximum value to be requested from the partition_column.
    max_sessions : int, optional
        The maximum number of simultaneous connections allowed to use.

    Returns
    -------
    modin.DataFrame or Iterator[modin.DataFrame]
    """
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    result = FactoryDispatcher.read_sql_distributed(**kwargs)
    if isinstance(result, BaseQueryCompiler):
        return DataFrame(query_compiler=result)
    return (DataFrame(query_compiler=qc) for qc in result)


@expanduser_path_arg("filepath_or_buffer")
def read_custom_text(
    filepath_or_buffer,
    columns,
    custom_parser,
    compression="infer",
    nrows: Optional[int] = None,
    is_quoting=True,
):
    r"""
    Load custom text data from file.

    Parameters
    ----------
    filepath_or_buffer : str
        File path where the custom text data will be loaded from.
    columns : list or callable(file-like object, \*\*kwargs) -> list
        Column names of list type or callable that create column names from opened file
        and passed `kwargs`.
    custom_parser : callable(file-like object, \*\*kwargs) -> pandas.DataFrame
        Function that takes as input a part of the `filepath_or_buffer` file loaded into
        memory in file-like object form.
    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default: 'infer'
        If 'infer' and 'path_or_url' is path-like, then detect compression from
        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
        compression). If 'infer' and 'path_or_url' is not path-like, then use
        None (= no decompression).
    nrows : int, optional
        Amount of rows to read.
    is_quoting : bool, default: True
        Whether or not to consider quotes.

    Returns
    -------
    modin.DataFrame
    """
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return DataFrame(query_compiler=FactoryDispatcher.read_custom_text(**kwargs))


# CSV and table
def _make_parser_func(sep: str, funcname: str) -> Callable:
    """
    Create a parser function from the given sep.

    Parameters
    ----------
    sep : str
        The separator default to use for the parser.
    funcname : str
        The name of the generated parser function.

    Returns
    -------
    Callable
    """

    def parser_func(
        filepath_or_buffer: Union[str, pathlib.Path, IO[AnyStr]],
        *,
        sep=lib.no_default,
        delimiter=None,
        header="infer",
        names=lib.no_default,
        index_col=None,
        usecols=None,
        dtype=None,
        engine=None,
        converters=None,
        true_values=None,
        false_values=None,
        skipinitialspace=False,
        skiprows=None,
        skipfooter=0,
        nrows=None,
        na_values=None,
        keep_default_na=True,
        na_filter=True,
        verbose=lib.no_default,
        skip_blank_lines=True,
        parse_dates=None,
        infer_datetime_format=lib.no_default,
        keep_date_col=lib.no_default,
        date_parser=lib.no_default,
        date_format=None,
        dayfirst=False,
        cache_dates=True,
        iterator=False,
        chunksize=None,
        compression="infer",
        thousands=None,
        decimal: str = ".",
        lineterminator=None,
        quotechar='"',
        quoting=0,
        escapechar=None,
        comment=None,
        encoding=None,
        encoding_errors="strict",
        dialect=None,
        on_bad_lines="error",
        doublequote=True,
        delim_whitespace=lib.no_default,
        low_memory=True,
        memory_map=False,
        float_precision=None,
        storage_options: StorageOptions = None,
        dtype_backend=lib.no_default,
    ) -> DataFrame:
        # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args
        _pd_read_csv_signature = {
            val.name for val in inspect.signature(pandas.read_csv).parameters.values()
        }
        _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
        if f_locals.get("sep", sep) is False:
            f_locals["sep"] = "\t"

        kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
        return _read(**kwargs)

    parser_func.__doc__ = _read.__doc__
    parser_func.__name__ = funcname
    return expanduser_path_arg("filepath_or_buffer")(parser_func)


def _read(**kwargs) -> DataFrame:
    """
    General documentation is available in `modin.pandas.read_csv`.

    This experimental feature provides parallel reading from multiple csv files which are
    defined by glob pattern.

    Parameters
    ----------
    **kwargs : dict
        Keyword arguments in `modin.pandas.read_csv`.

    Returns
    -------
    modin.DataFrame

    Examples
    --------
    >>> import modin.experimental.pandas as pd
    >>> df = pd.read_csv_glob("s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-1*")
    UserWarning: `read_*` implementation has mismatches with pandas:
    Data types of partitions are different! Please refer to the troubleshooting section of the Modin documentation to fix this issue.
            VendorID tpep_pickup_datetime  ... total_amount  congestion_surcharge
    0             1.0  2020-10-01 00:09:08  ...         4.30                   0.0
    1             1.0  2020-10-01 00:09:19  ...        13.30                   2.5
    2             1.0  2020-10-01 00:30:00  ...        15.36                   2.5
    3             2.0  2020-10-01 00:56:46  ...        -3.80                   0.0
    4             2.0  2020-10-01 00:56:46  ...         3.80                   0.0
    ...           ...                  ...  ...          ...                   ...
    4652008       NaN  2020-12-31 23:44:35  ...        43.95                   2.5
    4652009       NaN  2020-12-31 23:41:36  ...        20.17                   2.5
    4652010       NaN  2020-12-31 23:01:17  ...        78.98                   0.0
    4652011       NaN  2020-12-31 23:31:29  ...        39.50                   0.0
    4652012       NaN  2020-12-31 23:12:48  ...        20.64                   0.0

    [4652013 rows x 18 columns]
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    pd_obj = FactoryDispatcher.read_csv_glob(**kwargs)
    # This happens when `read_csv` returns a TextFileReader object for iterating through
    if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
        reader = pd_obj.read
        pd_obj.read = lambda *args, **kwargs: DataFrame(
            query_compiler=reader(*args, **kwargs)
        )
        return pd_obj

    return DataFrame(query_compiler=pd_obj)


read_csv_glob = _make_parser_func(sep=",", funcname="read_csv_glob")


@expanduser_path_arg("filepath_or_buffer")
def read_pickle_glob(
    filepath_or_buffer,
    compression: Optional[str] = "infer",
    storage_options: StorageOptions = None,
):
    """
    Load pickled pandas object from files.

    This experimental feature provides parallel reading from multiple pickle files which are
    defined by glob pattern. The files must contain parts of one dataframe, which can be
    obtained, for example, by `DataFrame.modin.to_pickle_glob` function.

    Parameters
    ----------
    filepath_or_buffer : str, path object or file-like object
        File path, URL, or buffer where the pickled object will be loaded from.
        Accept URL. URL is not limited to S3 and GCS.
    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default: 'infer'
        If 'infer' and 'path_or_url' is path-like, then detect compression from
        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
        compression) If 'infer' and 'path_or_url' is not path-like, then use
        None (= no decompression).
    storage_options : dict, optional
        Extra options that make sense for a particular storage connection, e.g.
        host, port, username, password, etc., if using a URL that will be parsed by
        fsspec, e.g., starting "s3://", "gcs://". An error will be raised if providing
        this argument with a non-fsspec URL. See the fsspec and backend storage
        implementation docs for the set of allowed keys and values.

    Returns
    -------
    unpickled : same type as object stored in file

    Notes
    -----
    The number of partitions is equal to the number of input files.
    """
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return DataFrame(query_compiler=FactoryDispatcher.read_pickle_glob(**kwargs))


@expanduser_path_arg("filepath_or_buffer")
def to_pickle_glob(
    self,
    filepath_or_buffer,
    compression: CompressionOptions = "infer",
    protocol: int = pickle.HIGHEST_PROTOCOL,
    storage_options: StorageOptions = None,
) -> None:
    """
    Pickle (serialize) object to file.

    This experimental feature provides parallel writing into multiple pickle files which are
    defined by glob pattern, otherwise (without glob pattern) default pandas implementation is used.

    Parameters
    ----------
    filepath_or_buffer : str
        File path where the pickled object will be stored.
    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default: 'infer'
        A string representing the compression to use in the output file. By
        default, infers from the file extension in specified path.
        Compression mode may be any of the following possible
        values: {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}. If compression
        mode is 'infer' and path_or_buf is path-like, then detect
        compression mode from the following extensions:
        '.gz', '.bz2', '.zip' or '.xz'. (otherwise no compression).
        If dict given and mode is 'zip' or inferred as 'zip', other entries
        passed as additional compression options.
    protocol : int, default: pickle.HIGHEST_PROTOCOL
        Int which indicates which protocol should be used by the pickler,
        default HIGHEST_PROTOCOL (see `pickle docs <https://docs.python.org/3/library/pickle.html>`_
        paragraph 12.1.2 for details). The possible  values are 0, 1, 2, 3, 4, 5. A negative value
        for the protocol parameter is equivalent to setting its value to HIGHEST_PROTOCOL.
    storage_options : dict, optional
        Extra options that make sense for a particular storage connection, e.g.
        host, port, username, password, etc., if using a URL that will be parsed by
        fsspec, e.g., starting "s3://", "gcs://". An error will be raised if providing
        this argument with a non-fsspec URL. See the fsspec and backend storage
        implementation docs for the set of allowed keys and values.
    """
    obj = self
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    if isinstance(self, DataFrame):
        obj = self._query_compiler
    FactoryDispatcher.to_pickle_glob(
        obj,
        filepath_or_buffer=filepath_or_buffer,
        compression=compression,
        protocol=protocol,
        storage_options=storage_options,
    )


@expanduser_path_arg("path")
def read_parquet_glob(
    path,
    engine: str = "auto",
    columns: list[str] | None = None,
    storage_options: StorageOptions = None,
    use_nullable_dtypes: bool = lib.no_default,
    dtype_backend=lib.no_default,
    filesystem=None,
    filters=None,
    **kwargs,
) -> DataFrame:  # noqa: PR01
    """
    Load a parquet object from the file path, returning a DataFrame.

    This experimental feature provides parallel reading from multiple parquet files which are
    defined by glob pattern. The files must contain parts of one dataframe, which can be
    obtained, for example, by `DataFrame.modin.to_parquet_glob` function.

    Returns
    -------
    DataFrame

    Notes
    -----
    * Only string type supported for `path` argument.
    * The rest of the arguments are the same as for `pandas.read_parquet`.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return DataFrame(
        query_compiler=FactoryDispatcher.read_parquet_glob(
            path=path,
            engine=engine,
            columns=columns,
            storage_options=storage_options,
            use_nullable_dtypes=use_nullable_dtypes,
            dtype_backend=dtype_backend,
            filesystem=filesystem,
            filters=filters,
            **kwargs,
        )
    )


@expanduser_path_arg("path")
def to_parquet_glob(
    self,
    path,
    engine="auto",
    compression="snappy",
    index=None,
    partition_cols=None,
    storage_options: StorageOptions = None,
    **kwargs,
) -> None:  # noqa: PR01
    """
    Write a DataFrame to the binary parquet format.

    This experimental feature provides parallel writing into multiple parquet files which are
    defined by glob pattern, otherwise (without glob pattern) default pandas implementation is used.

    Notes
    -----
    * Only string type supported for `path` argument.
    * The rest of the arguments are the same as for `pandas.to_parquet`.
    """
    obj = self
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    if isinstance(self, DataFrame):
        obj = self._query_compiler
    FactoryDispatcher.to_parquet_glob(
        obj,
        path=path,
        engine=engine,
        compression=compression,
        index=index,
        partition_cols=partition_cols,
        storage_options=storage_options,
        **kwargs,
    )


@expanduser_path_arg("path_or_buf")
def read_json_glob(
    path_or_buf,
    *,
    orient: str | None = None,
    typ: Literal["frame", "series"] = "frame",
    dtype: DtypeArg | None = None,
    convert_axes=None,
    convert_dates: bool | list[str] = True,
    keep_default_dates: bool = True,
    precise_float: bool = False,
    date_unit: str | None = None,
    encoding: str | None = None,
    encoding_errors: str | None = "strict",
    lines: bool = False,
    chunksize: int | None = None,
    compression: CompressionOptions = "infer",
    nrows: int | None = None,
    storage_options: StorageOptions = None,
    dtype_backend: Union[DtypeBackend, lib.NoDefault] = lib.no_default,
    engine="ujson",
) -> DataFrame:  # noqa: PR01
    """
    Convert a JSON string to pandas object.

    This experimental feature provides parallel reading from multiple json files which are
    defined by glob pattern. The files must contain parts of one dataframe, which can be
    obtained, for example, by `DataFrame.modin.to_json_glob` function.

    Returns
    -------
    DataFrame

    Notes
    -----
    * Only string type supported for `path_or_buf` argument.
    * The rest of the arguments are the same as for `pandas.read_json`.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    if nrows is not None:
        raise NotImplementedError(
            "`read_json_glob` only support nrows is None, otherwise use `to_json`."
        )

    return DataFrame(
        query_compiler=FactoryDispatcher.read_json_glob(
            path_or_buf=path_or_buf,
            orient=orient,
            typ=typ,
            dtype=dtype,
            convert_axes=convert_axes,
            convert_dates=convert_dates,
            keep_default_dates=keep_default_dates,
            precise_float=precise_float,
            date_unit=date_unit,
            encoding=encoding,
            encoding_errors=encoding_errors,
            lines=lines,
            chunksize=chunksize,
            compression=compression,
            nrows=nrows,
            storage_options=storage_options,
            dtype_backend=dtype_backend,
            engine=engine,
        )
    )


@expanduser_path_arg("path_or_buf")
def to_json_glob(
    self,
    path_or_buf=None,
    orient=None,
    date_format=None,
    double_precision=10,
    force_ascii=True,
    date_unit="ms",
    default_handler=None,
    lines=False,
    compression="infer",
    index=None,
    indent=None,
    storage_options: StorageOptions = None,
    mode="w",
) -> None:  # noqa: PR01
    """
    Convert the object to a JSON string.

    Notes
    -----
    * Only string type supported for `path_or_buf` argument.
    * The rest of the arguments are the same as for `pandas.to_json`.
    """
    obj = self
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    if isinstance(self, DataFrame):
        obj = self._query_compiler
    FactoryDispatcher.to_json_glob(
        obj,
        path_or_buf=path_or_buf,
        orient=orient,
        date_format=date_format,
        double_precision=double_precision,
        force_ascii=force_ascii,
        date_unit=date_unit,
        default_handler=default_handler,
        lines=lines,
        compression=compression,
        index=index,
        indent=indent,
        storage_options=storage_options,
        mode=mode,
    )


@expanduser_path_arg("path_or_buffer")
def read_xml_glob(
    path_or_buffer,
    *,
    xpath="./*",
    namespaces=None,
    elems_only=False,
    attrs_only=False,
    names=None,
    dtype=None,
    converters=None,
    parse_dates=None,
    encoding="utf-8",
    parser="lxml",
    stylesheet=None,
    iterparse=None,
    compression="infer",
    storage_options: StorageOptions = None,
    dtype_backend=lib.no_default,
) -> DataFrame:  # noqa: PR01
    """
    Read XML document into a DataFrame object.

    This experimental feature provides parallel reading from multiple XML files which are
    defined by glob pattern. The files must contain parts of one dataframe, which can be
    obtained, for example, by `DataFrame.modin.to_xml_glob` function.

    Returns
    -------
    DataFrame

    Notes
    -----
    * Only string type supported for `path_or_buffer` argument.
    * The rest of the arguments are the same as for `pandas.read_xml`.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return DataFrame(
        query_compiler=FactoryDispatcher.read_xml_glob(
            path_or_buffer=path_or_buffer,
            xpath=xpath,
            namespaces=namespaces,
            elems_only=elems_only,
            attrs_only=attrs_only,
            names=names,
            dtype=dtype,
            converters=converters,
            parse_dates=parse_dates,
            encoding=encoding,
            parser=parser,
            stylesheet=stylesheet,
            iterparse=iterparse,
            compression=compression,
            storage_options=storage_options,
            dtype_backend=dtype_backend,
        )
    )


@expanduser_path_arg("path_or_buffer")
def to_xml_glob(
    self,
    path_or_buffer=None,
    index=True,
    root_name="data",
    row_name="row",
    na_rep=None,
    attr_cols=None,
    elem_cols=None,
    namespaces=None,
    prefix=None,
    encoding="utf-8",
    xml_declaration=True,
    pretty_print=True,
    parser="lxml",
    stylesheet=None,
    compression="infer",
    storage_options=None,
) -> None:  # noqa: PR01
    """
    Render a DataFrame to an XML document.

    Notes
    -----
    * Only string type supported for `path_or_buffer` argument.
    * The rest of the arguments are the same as for `pandas.to_xml`.
    """
    obj = self
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    if isinstance(self, DataFrame):
        obj = self._query_compiler
    FactoryDispatcher.to_xml_glob(
        obj,
        path_or_buffer=path_or_buffer,
        index=index,
        root_name=root_name,
        row_name=row_name,
        na_rep=na_rep,
        attr_cols=attr_cols,
        elem_cols=elem_cols,
        namespaces=namespaces,
        prefix=prefix,
        encoding=encoding,
        xml_declaration=xml_declaration,
        pretty_print=pretty_print,
        parser=parser,
        stylesheet=stylesheet,
        compression=compression,
        storage_options=storage_options,
    )


================================================
FILE: modin/experimental/sklearn/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module holds experimental scikit-learn specific functionality for Modin."""


================================================
FILE: modin/experimental/sklearn/model_selection/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module holds model selection specific functionality."""

from .train_test_split import train_test_split

__all__ = ["train_test_split"]


================================================
FILE: modin/experimental/sklearn/model_selection/train_test_split.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module holds `train_test_splt` function."""


# FIXME: Change `**options`-->`train_size=0.75`
def train_test_split(df, **options):
    """
    Split input data to train and test data.

    Parameters
    ----------
    df : modin.pandas.DataFrame / modin.pandas.Series
        Data to split.
    **options : dict
        Keyword arguments. If `train_size` key isn't provided
        `train_size` will be 0.75.

    Returns
    -------
    tuple
        A pair of modin.pandas.DataFrame / modin.pandas.Series.
    """
    train_size = options.get("train_size", 0.75)
    train = df.iloc[: int(len(df) * train_size)]
    test = df.iloc[len(train) :]
    return train, test


================================================
FILE: modin/experimental/spreadsheet/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

try:
    import modin_spreadsheet
except ImportError:
    raise ImportError(
        'Please `pip install "modin[spreadsheet]"` to install the spreadsheet extension'
    )

from .general import from_dataframe, to_dataframe

__all__ = ["from_dataframe", "to_dataframe"]

del modin_spreadsheet


================================================
FILE: modin/experimental/spreadsheet/general.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from modin_spreadsheet import SpreadsheetWidget, show_grid

from .. import pandas as pd


def from_dataframe(
    dataframe,
    show_toolbar=None,
    show_history=None,
    precision=None,
    grid_options=None,
    column_options=None,
    column_definitions=None,
    row_edit_callback=None,
):
    """
    Renders a DataFrame or Series as an interactive spreadsheet, represented by
    an instance of the ``SpreadsheetWidget`` class.  The ``SpreadsheetWidget`` instance
    is constructed using the options passed in to this function.  The
    ``dataframe`` argument to this function is used as the ``df`` kwarg in
    call to the SpreadsheetWidget constructor, and the rest of the parameters
    are passed through as is.

    If the ``dataframe`` argument is a Series, it will be converted to a
    DataFrame before being passed in to the SpreadsheetWidget constructor as the
    ``df`` kwarg.

    :rtype: SpreadsheetWidget

    Parameters
    ----------
    dataframe : DataFrame
        The DataFrame that will be displayed by this instance of
        SpreadsheetWidget.
    grid_options : dict
        Options to use when creating the SlickGrid control (i.e. the
        interactive grid).  See the Notes section below for more information
        on the available options, as well as the default options that this
        widget uses.
    precision : integer
        The number of digits of precision to display for floating-point
        values.  If unset, we use the value of
        `pandas.get_option('display.precision')`.
    show_toolbar : bool
        Whether to show a toolbar with options for adding/removing rows.
        Adding/removing rows is an experimental feature which only works
        with DataFrames that have an integer index.
    show_history : bool
        Whether to show the cell containing the spreadsheet transformation
        history.
    column_options : dict
        Column options that are to be applied to every column. See the
        Notes section below for more information on the available options,
        as well as the default options that this widget uses.
    column_definitions : dict
        Column options that are to be applied to individual
        columns. The keys of the dict should be the column names, and each
        value should be the column options for a particular column,
        represented as a dict. The available options for each column are the
        same options that are available to be set for all columns via the
        ``column_options`` parameter. See the Notes section below for more
        information on those options.
    row_edit_callback : callable
        A callable that is called to determine whether a particular row
        should be editable or not. Its signature should be
        ``callable(row)``, where ``row`` is a dictionary which contains a
        particular row's values, keyed by column name. The callback should
        return True if the provided row should be editable, and False
        otherwise.


    Notes
    -----
    The following dictionary is used for ``grid_options`` if none are
    provided explicitly::

        {
            # SlickGrid options
            'fullWidthRows': True,
            'syncColumnCellResize': True,
            'forceFitColumns': False,
            'defaultColumnWidth': 150,
            'rowHeight': 28,
            'enableColumnReorder': False,
            'enableTextSelectionOnCells': True,
            'editable': True,
            'autoEdit': False,
            'explicitInitialization': True,

            # Modin-spreadsheet options
            'maxVisibleRows': 15,
            'minVisibleRows': 8,
            'sortable': True,
            'filterable': True,
            'highlightSelectedCell': False,
            'highlightSelectedRow': True
        }

    The first group of options are SlickGrid "grid options" which are
    described in the `SlickGrid documentation
    <https://github.com/mleibman/SlickGrid/wiki/Grid-Options>`__.

    The second group of option are options that were added specifically
    for modin-spreadsheet and therefore are not documented in the SlickGrid documentation.
    The following bullet points describe these options.

    * **maxVisibleRows** The maximum number of rows that modin-spreadsheet will show.
    * **minVisibleRows** The minimum number of rows that modin-spreadsheet will show
    * **sortable** Whether the modin-spreadsheet instance will allow the user to sort
      columns by clicking the column headers. When this is set to ``False``,
      nothing will happen when users click the column headers.
    * **filterable** Whether the modin-spreadsheet instance will allow the user to filter
      the grid. When this is set to ``False`` the filter icons won't be shown
      for any columns.
    * **highlightSelectedCell** If you set this to True, the selected cell
      will be given a light blue border.
    * **highlightSelectedRow** If you set this to False, the light blue
      background that's shown by default for selected rows will be hidden.

    The following dictionary is used for ``column_options`` if none are
    provided explicitly::

        {
            # SlickGrid column options
            'defaultSortAsc': True,
            'maxWidth': None,
            'minWidth': 30,
            'resizable': True,
            'sortable': True,
            'toolTip': "",
            'width': None

            # Modin-spreadsheet column options
            'editable': True,
        }

    The first group of options are SlickGrid "column options" which are
    described in the `SlickGrid documentation
    <https://github.com/mleibman/SlickGrid/wiki/Column-Options>`__.

    The ``editable`` option was added specifically for modin-spreadsheet and therefore is
    not documented in the SlickGrid documentation.  This option specifies
    whether a column should be editable or not.

    See Also
    --------
    set_defaults : Permanently set global defaults for the parameters
                   of ``show_grid``, with the exception of the ``dataframe``
                   and ``column_definitions`` parameters, since those
                   depend on the particular set of data being shown by an
                   instance, and therefore aren't parameters we would want
                   to set for all SpreadsheetWidget instances.
    set_grid_option : Permanently set global defaults for individual
                      grid options.  Does so by changing the defaults
                      that the ``show_grid`` method uses for the
                      ``grid_options`` parameter.
    SpreadsheetWidget : The widget class that is instantiated and returned by this
                  method.

    """
    if not isinstance(dataframe, pd.DataFrame):
        raise TypeError("dataframe must be modin.DataFrame, not %s" % type(dataframe))
    return show_grid(
        dataframe,
        show_toolbar,
        show_history,
        precision,
        grid_options,
        column_options,
        column_definitions,
        row_edit_callback,
    )


def to_dataframe(spreadsheet):
    """
    Get a copy of the DataFrame that reflects the current state of the ``spreadsheet`` SpreadsheetWidget instance UI.
    This includes any sorting or filtering changes, as well as edits
    that have been made by double clicking cells.

    :rtype: DataFrame

    Parameters
    ----------
    spreadsheet : SpreadsheetWidget
        The SpreadsheetWidget instance that DataFrame that will be displayed by this instance of
        SpreadsheetWidget.
    """
    if not isinstance(spreadsheet, SpreadsheetWidget):
        raise TypeError(
            "spreadsheet must be modin_spreadsheet.SpreadsheetWidget, not %s"
            % type(spreadsheet)
        )
    return spreadsheet.get_changed_df()


================================================
FILE: modin/experimental/torch/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module supports conversion for torch `DataLoader` interplay."""


================================================
FILE: modin/experimental/torch/datasets.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
from __future__ import annotations

import math
from typing import Hashable, Sequence, Type

from pandas import DataFrame
from torch.utils.data import Sampler, SequentialSampler

from modin.pandas import DataFrame as ModinDataFrame


class ModinDataLoader:
    "A self explainatory class to convert a DataFrame into a DataLoader that batches rows."

    def __init__(
        self,
        df: DataFrame | ModinDataFrame,
        batch_size: int,
        features: Sequence[Hashable] = (),
        sampler: Type[Sampler] | Sampler = SequentialSampler,
    ) -> None:
        """
        Converts a Pandas/Modin DataFrame into a torch DataLoader.

        NOTE: This function should eventually go into modin/utils.py.

        Parameters
        ----------
        df : DataFrame

        batch_size : int, default: 1

        features : Sequence[Hashable], default: ()
            If specified, only these features will be used.

        sampler: Type[Sampler] | Sampler, default: SequentialSampler
            The sampler to use. By default, iterates over the DataFrame in order.

        Returns
        -------
        DataLoader
            DataLoader object backed by desired data.
        """

        if features:
            df = df[features]

        if isinstance(sampler, type):
            sampler = sampler(df)

        self._df = df
        self._batch_size = batch_size
        self._sampler = sampler

    def __len__(self):
        # Sampler length is always valid.
        return math.ceil(len(self._sampler) / self._batch_size)

    def __iter__(self):
        idx_buffer = []

        for cnt, idx in enumerate(self._sampler):
            idx_buffer.append(idx)

            if self._end_of_batch(cnt):
                yield self._df.iloc[idx_buffer].to_numpy()
                idx_buffer = []

    def _end_of_batch(self, counter: int):
        return (
            counter % self._batch_size == self._batch_size - 1
            or counter == len(self._sampler) - 1
        )


================================================
FILE: modin/experimental/xgboost/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module holds public interfaces for Modin XGBoost."""

from .xgboost import Booster, DMatrix, train

__all__ = ["DMatrix", "Booster", "train"]


================================================
FILE: modin/experimental/xgboost/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module holds classes for work with Rabit all-reduce context."""

import logging

import xgboost as xgb

LOGGER = logging.getLogger("[modin.xgboost]")


class RabitContextManager:
    """
    A manager class that controls lifecycle of `xgb.RabitTracker`.

    All workers that are used for distributed training will connect to
    Rabit Tracker stored in this class.

    Parameters
    ----------
    num_workers : int
        Number of workers of `self.rabit_tracker`.
    host_ip : str
        IP address of host that creates `self` object.
    """

    # TODO: Specify type of host_ip
    def __init__(self, num_workers: int, host_ip):
        self._num_workers = num_workers
        self.env = {"DMLC_NUM_WORKER": self._num_workers}
        self.rabit_tracker = xgb.RabitTracker(
            host_ip=host_ip, n_workers=self._num_workers
        )

    def __enter__(self):
        """
        Entry point of manager.

        Updates Rabit Tracker environment, starts `self.rabit_tracker`.

        Returns
        -------
        dict
            Dict with Rabit Tracker environment.
        """
        self.env.update(self.rabit_tracker.worker_envs())
        self.rabit_tracker.start(self._num_workers)
        return self.env

    # TODO: (type, value, traceback) -> *args
    def __exit__(self, type, value, traceback):
        """
        Exit point of manager.

        Finishes `self.rabit_tracker`.

        Parameters
        ----------
        type : exception type
            Type of exception, captured  by manager.
        value : Exception
            Exception value.
        traceback : TracebackType
            Traceback of exception.
        """
        self.rabit_tracker.join()


class RabitContext:
    """
    Context to connect a worker to a rabit tracker.

    Parameters
    ----------
    actor_rank : int
        Rank of actor, connected to this context.
    args : list
        List with environment variables for Rabit Tracker.
    """

    def __init__(self, actor_rank, args):
        self.args = args
        self.args.append(("DMLC_TASK_ID=[modin.xgboost]:" + str(actor_rank)).encode())

    def __enter__(self):
        """
        Entry point of context.

        Connects to Rabit Tracker.
        """
        xgb.rabit.init(self.args)
        LOGGER.info("-------------- rabit started ------------------")

    def __exit__(self, *args):
        """
        Exit point of context.

        Disconnects from Rabit Tracker.

        Parameters
        ----------
        *args : iterable
            Parameters for Exception capturing.
        """
        xgb.rabit.finalize()
        LOGGER.info("-------------- rabit finished ------------------")


================================================
FILE: modin/experimental/xgboost/xgboost.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module holds public interfaces for work Modin XGBoost."""

import logging
from typing import Dict, Optional

import xgboost as xgb

import modin.pandas as pd
from modin.config import Engine
from modin.distributed.dataframe.pandas import unwrap_partitions

LOGGER = logging.getLogger("[modin.xgboost]")


class DMatrix:
    """
    DMatrix holds references to partitions of Modin DataFrame.

    On init stage unwrapping partitions of Modin DataFrame is started.

    Parameters
    ----------
    data : modin.pandas.DataFrame
        Data source of DMatrix.
    label : modin.pandas.DataFrame or modin.pandas.Series, optional
        Labels used for training.
    missing : float, optional
        Value in the input data which needs to be present as a missing
        value. If ``None``, defaults to ``np.nan``.
    silent : boolean, optional
        Whether to print messages during construction or not.
    feature_names : list, optional
        Set names for features.
    feature_types : list, optional
        Set types for features.
    feature_weights : array_like, optional
        Set feature weights for column sampling.
    enable_categorical : boolean, optional
        Experimental support of specializing for categorical features.

    Notes
    -----
    Currently DMatrix doesn't support `weight`, `base_margin`, `nthread`,
    `group`, `qid`, `label_lower_bound`, `label_upper_bound` parameters.
    """

    def __init__(
        self,
        data,
        label=None,
        missing=None,
        silent=False,
        feature_names=None,
        feature_types=None,
        feature_weights=None,
        enable_categorical=None,
    ):
        assert isinstance(
            data, pd.DataFrame
        ), f"Type of `data` is {type(data)}, but expected {pd.DataFrame}."

        if label is not None:
            assert isinstance(
                label, (pd.DataFrame, pd.Series)
            ), f"Type of `data` is {type(label)}, but expected {pd.DataFrame} or {pd.Series}."
            self.label = unwrap_partitions(label, axis=0)
        else:
            self.label = None

        self.data = unwrap_partitions(data, axis=0, get_ip=True)

        self._n_rows = data.shape[0]
        self._n_cols = data.shape[1]

        for i, dtype in enumerate(data.dtypes):
            if dtype == "object":
                raise ValueError(f"Column {i} has unsupported data type {dtype}.")

        self.feature_names = feature_names
        self.feature_types = feature_types

        self.missing = missing
        self.silent = silent
        self.feature_weights = feature_weights
        self.enable_categorical = enable_categorical

        self.metadata = (
            data.index,
            data.columns,
            data._query_compiler._modin_frame.row_lengths,
        )

    def __iter__(self):
        """
        Return unwrapped `self.data` and `self.label`.

        Yields
        ------
        list
            List of `self.data` with pairs of references to IP of row partition
            and row partition [(IP_ref0, partition_ref0), ..].
        list
            List of `self.label` with references to row partitions
            [partition_ref0, ..].
        """
        yield self.data
        yield self.label

    def get_dmatrix_params(self):
        """
        Get dict of DMatrix parameters excluding `self.data`/`self.label`.

        Returns
        -------
        dict
        """
        dmatrix_params = {
            "feature_names": self.feature_names,
            "feature_types": self.feature_types,
            "missing": self.missing,
            "silent": self.silent,
            "feature_weights": self.feature_weights,
            "enable_categorical": self.enable_categorical,
        }
        return dmatrix_params

    @property
    def feature_names(self):
        """
        Get column labels.

        Returns
        -------
        Column labels.
        """
        return self._feature_names

    @feature_names.setter
    def feature_names(self, feature_names):
        """
        Set column labels.

        Parameters
        ----------
        feature_names : list or None
            Labels for columns. In the case of ``None``, existing feature names will be reset.
        """
        if feature_names is not None:
            feature_names = (
                list(feature_names)
                if not isinstance(feature_names, str)
                else [feature_names]
            )

            if len(feature_names) != len(set(feature_names)):
                raise ValueError("Items in `feature_names` must be unique.")
            if len(feature_names) != self.num_col() and self.num_col() != 0:
                raise ValueError(
                    "`feature_names` must have the same width as `self.data`."
                )
            if not all(
                isinstance(f, str) and not any(x in f for x in set(("[", "]", "<")))
                for f in feature_names
            ):
                raise ValueError(
                    "Items of `feature_names` must be string and must not contain [, ] or <."
                )
        else:
            feature_names = None
        self._feature_names = feature_names

    @property
    def feature_types(self):
        """
        Get column types.

        Returns
        -------
        Column types.
        """
        return self._feature_types

    @feature_types.setter
    def feature_types(self, feature_types):
        """
        Set column types.

        Parameters
        ----------
        feature_types : list or None
            Labels for columns. In case None, existing feature names will be reset.
        """
        if feature_types is not None:
            if not isinstance(feature_types, (list, str)):
                raise TypeError("feature_types must be string or list of strings")
            if isinstance(feature_types, str):
                feature_types = [feature_types] * self.num_col()
                feature_types = (
                    list(feature_types)
                    if not isinstance(feature_types, str)
                    else [feature_types]
                )
        else:
            feature_types = None
        self._feature_types = feature_types

    def num_row(self):
        """
        Get number of rows.

        Returns
        -------
        int
        """
        return self._n_rows

    def num_col(self):
        """
        Get number of columns.

        Returns
        -------
        int
        """
        return self._n_cols

    def get_float_info(self, name):
        """
        Get float property from the DMatrix.

        Parameters
        ----------
        name : str
            The field name of the information.

        Returns
        -------
        A NumPy array of float information of the data.
        """
        return getattr(self, name)

    def set_info(
        self,
        *,
        label=None,
        feature_names=None,
        feature_types=None,
        feature_weights=None,
    ) -> None:
        """
        Set meta info for DMatrix.

        Parameters
        ----------
        label : modin.pandas.DataFrame or modin.pandas.Series, optional
            Labels used for training.
        feature_names : list, optional
            Set names for features.
        feature_types : list, optional
            Set types for features.
        feature_weights : array_like, optional
            Set feature weights for column sampling.
        """
        if label is not None:
            self.label = label
        if feature_names is not None:
            self.feature_names = feature_names
        if feature_types is not None:
            self.feature_types = feature_types
        if feature_weights is not None:
            self.feature_weights = feature_weights


class Booster(xgb.Booster):
    """
    A Modin Booster of XGBoost.

    Booster is the model of XGBoost, that contains low level routines for
    training, prediction and evaluation.

    Parameters
    ----------
    params : dict, optional
        Parameters for boosters.
    cache : list, default: empty
        List of cache items.
    model_file : string/os.PathLike/xgb.Booster/bytearray, optional
        Path to the model file if it's string or PathLike or xgb.Booster.
    """

    def __init__(self, params=None, cache=(), model_file=None):  # noqa: MD01
        super(Booster, self).__init__(params=params, cache=cache, model_file=model_file)

    def predict(
        self,
        data: DMatrix,
        **kwargs,
    ):
        """
        Run distributed prediction with a trained booster.

        During execution it runs ``xgb.predict`` on each worker for subset of `data`
        and creates Modin DataFrame with prediction results.

        Parameters
        ----------
        data : modin.experimental.xgboost.DMatrix
            Input data used for prediction.
        **kwargs : dict
            Other parameters are the same as for ``xgboost.Booster.predict``.

        Returns
        -------
        modin.pandas.DataFrame
            Modin DataFrame with prediction results.
        """
        LOGGER.info("Prediction started")

        if Engine.get() == "Ray":
            from .xgboost_ray import _predict
        else:
            raise ValueError("Current version supports only Ray engine.")

        assert isinstance(
            data, DMatrix
        ), f"Type of `data` is {type(data)}, but expected {DMatrix}."

        if (
            self.feature_names is not None
            and data.feature_names is not None
            and self.feature_names != data.feature_names
        ):
            data_missing = set(self.feature_names) - set(data.feature_names)
            self_missing = set(data.feature_names) - set(self.feature_names)

            msg = "feature_names mismatch: {0} {1}"

            if data_missing:
                msg += (
                    "\nexpected "
                    + ", ".join(str(s) for s in data_missing)
                    + " in input data"
                )

            if self_missing:
                msg += (
                    "\ntraining data did not have the following fields: "
                    + ", ".join(str(s) for s in self_missing)
                )

            raise ValueError(msg.format(self.feature_names, data.feature_names))

        result = _predict(self.copy(), data, **kwargs)
        LOGGER.info("Prediction finished")

        return result


def train(
    params: Dict,
    dtrain: DMatrix,
    *args,
    evals=(),
    num_actors: Optional[int] = None,
    evals_result: Optional[Dict] = None,
    **kwargs,
):
    """
    Run distributed training of XGBoost model.

    During work it evenly distributes `dtrain` between workers according
    to IP addresses partitions (in case of not even distribution of `dtrain`
    over nodes, some partitions will be re-distributed between nodes),
    runs xgb.train on each worker for subset of `dtrain` and reduces training results
    of each worker using Rabit Context.

    Parameters
    ----------
    params : dict
        Booster params.
    dtrain : modin.experimental.xgboost.DMatrix
        Data to be trained against.
    *args : iterable
        Other parameters for `xgboost.train`.
    evals : list of pairs (modin.experimental.xgboost.DMatrix, str), default: empty
        List of validation sets for which metrics will evaluated during training.
        Validation metrics will help us track the performance of the model.
    num_actors : int, optional
        Number of actors for training. If unspecified, this value will be
        computed automatically.
    evals_result : dict, optional
        Dict to store evaluation results in.
    **kwargs : dict
        Other parameters are the same as `xgboost.train`.

    Returns
    -------
    modin.experimental.xgboost.Booster
        A trained booster.
    """
    LOGGER.info("Training started")

    if Engine.get() == "Ray":
        from .xgboost_ray import _train
    else:
        raise ValueError("Current version supports only Ray engine.")

    assert isinstance(
        dtrain, DMatrix
    ), f"Type of `dtrain` is {type(dtrain)}, but expected {DMatrix}."
    result = _train(dtrain, params, *args, num_actors=num_actors, evals=evals, **kwargs)
    if isinstance(evals_result, dict):
        evals_result.update(result["history"])

    LOGGER.info("Training finished")
    return Booster(model_file=result["booster"])


================================================
FILE: modin/experimental/xgboost/xgboost_ray.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module holds internal entities for Modin XGBoost on Ray engine.

Class ModinXGBoostActor provides interfaces to run XGBoost operations
on remote workers. Other functions create Ray actors, distribute data between them, etc.
"""

import logging
import math
import time
import warnings
from collections import defaultdict
from typing import Dict, List

import numpy as np
import pandas
import ray
import xgboost as xgb
from ray.util import get_node_ip_address

from modin.core.execution.ray.common import RayWrapper
from modin.distributed.dataframe.pandas import from_partitions

from .utils import RabitContext, RabitContextManager

LOGGER = logging.getLogger("[modin.xgboost]")


@ray.remote(num_cpus=0)
class ModinXGBoostActor:
    """
    Ray actor-class runs training on the remote worker.

    Parameters
    ----------
    rank : int
        Rank of this actor.
    nthread : int
        Number of threads used by XGBoost in this actor.
    """

    def __init__(self, rank, nthread):
        self._evals = []
        self._rank = rank
        self._nthreads = nthread

        LOGGER.info(
            f"Actor <{self._rank}>, nthread = {self._nthreads} was initialized."
        )

    def _get_dmatrix(self, X_y, **dmatrix_kwargs):
        """
        Create xgboost.DMatrix from sequence of pandas.DataFrame objects.

        First half of `X_y` should contains objects for `X`, second for `y`.

        Parameters
        ----------
        X_y : list
            List of pandas.DataFrame objects.
        **dmatrix_kwargs : dict
            Keyword parameters for ``xgb.DMatrix``.

        Returns
        -------
        xgb.DMatrix
            A XGBoost DMatrix.
        """
        s = time.time()
        X = X_y[: len(X_y) // 2]
        y = X_y[len(X_y) // 2 :]

        assert (
            len(X) == len(y) and len(X) > 0
        ), "X and y should have the equal length more than 0"

        X = pandas.concat(X, axis=0)
        y = pandas.concat(y, axis=0)
        LOGGER.info(f"Concat time: {time.time() - s} s")

        return xgb.DMatrix(X, y, nthread=self._nthreads, **dmatrix_kwargs)

    def set_train_data(self, *X_y, add_as_eval_method=None, **dmatrix_kwargs):
        """
        Set train data for actor.

        Parameters
        ----------
        *X_y : iterable
            Sequence of ray.ObjectRef objects. First half of sequence is for
            `X` data, second for `y`. When it is passed in actor, auto-materialization
            of ray.ObjectRef -> pandas.DataFrame happens.
        add_as_eval_method : str, optional
            Name of eval data. Used in case when train data also used for evaluation.
        **dmatrix_kwargs : dict
            Keyword parameters for ``xgb.DMatrix``.
        """
        self._dtrain = self._get_dmatrix(X_y, **dmatrix_kwargs)

        if add_as_eval_method is not None:
            self._evals.append((self._dtrain, add_as_eval_method))

    def add_eval_data(self, *X_y, eval_method, **dmatrix_kwargs):
        """
        Add evaluation data for actor.

        Parameters
        ----------
        *X_y : iterable
            Sequence of ray.ObjectRef objects. First half of sequence is for
            `X` data, second for `y`. When it is passed in actor, auto-materialization
            of ray.ObjectRef -> pandas.DataFrame happens.
        eval_method : str
            Name of eval data.
        **dmatrix_kwargs : dict
            Keyword parameters for ``xgb.DMatrix``.
        """
        self._evals.append((self._get_dmatrix(X_y, **dmatrix_kwargs), eval_method))

    def train(self, rabit_args, params, *args, **kwargs):
        """
        Run local XGBoost training.

        Connects to Rabit Tracker environment to share training data between
        actors and trains XGBoost booster using `self._dtrain`.

        Parameters
        ----------
        rabit_args : list
            List with environment variables for Rabit Tracker.
        params : dict
            Booster params.
        *args : iterable
            Other parameters for `xgboost.train`.
        **kwargs : dict
            Other parameters for `xgboost.train`.

        Returns
        -------
        dict
            A dictionary with trained booster and dict of
            evaluation results
            as {"booster": xgb.Booster, "history": dict}.
        """
        local_params = params.copy()
        local_dtrain = self._dtrain
        local_evals = self._evals

        local_params["nthread"] = self._nthreads

        evals_result = dict()

        s = time.time()
        with RabitContext(self._rank, rabit_args):
            bst = xgb.train(
                local_params,
                local_dtrain,
                *args,
                evals=local_evals,
                evals_result=evals_result,
                **kwargs,
            )
            LOGGER.info(f"Local training time: {time.time() - s} s")
            return {"booster": bst, "history": evals_result}


def _get_cluster_cpus():
    """
    Get number of CPUs available on Ray cluster.

    Returns
    -------
    int
        Number of CPUs available on cluster.
    """
    return ray.cluster_resources().get("CPU", 1)


def _get_min_cpus_per_node():
    """
    Get min number of node CPUs available on cluster nodes.

    Returns
    -------
    int
        Min number of CPUs per node.
    """
    # TODO: max_node_cpus -> min_node_cpus
    max_node_cpus = min(
        node.get("Resources", {}).get("CPU", 0.0) for node in ray.nodes()
    )
    return max_node_cpus if max_node_cpus > 0.0 else _get_cluster_cpus()


def _get_cpus_per_actor(num_actors):
    """
    Get number of CPUs to use by each actor.

    Parameters
    ----------
    num_actors : int
        Number of Ray actors.

    Returns
    -------
    int
        Number of CPUs per actor.
    """
    cluster_cpus = _get_cluster_cpus()
    cpus_per_actor = max(
        1, min(int(_get_min_cpus_per_node() or 1), int(cluster_cpus // num_actors))
    )
    return cpus_per_actor


def _get_num_actors(num_actors=None):
    """
    Get number of actors to create.

    Parameters
    ----------
    num_actors : int, optional
        Desired number of actors. If is None, integer number of actors
        will be computed by condition 2 CPUs per 1 actor.

    Returns
    -------
    int
        Number of actors to create.
    """
    min_cpus_per_node = _get_min_cpus_per_node()
    if num_actors is None:
        num_actors_per_node = max(1, int(min_cpus_per_node // 2))
        return num_actors_per_node * len(ray.nodes())
    elif isinstance(num_actors, int):
        assert (
            num_actors % len(ray.nodes()) == 0
        ), "`num_actors` must be a multiple to number of nodes in Ray cluster."
        return num_actors
    else:
        RuntimeError("`num_actors` must be int or None")


def create_actors(num_actors):
    """
    Create ModinXGBoostActors.

    Parameters
    ----------
    num_actors : int
        Number of actors to create.

    Returns
    -------
    list
        List of pairs (ip, actor).
    """
    num_cpus_per_actor = _get_cpus_per_actor(num_actors)
    # starting from ray 2.6 there is a new field: 'node:__internal_head__'
    # example:
    # >>> ray.cluster_resources()
    # {'object_store_memory': 1036438732.0, 'memory': 2072877467.0, 'node:127.0.0.1': 1.0, 'CPU': 8.0, 'node:__internal_head__': 1.0}
    node_ips = [
        key
        for key in ray.cluster_resources().keys()
        if key.startswith("node:") and "__internal_head__" not in key
    ]

    num_actors_per_node = max(num_actors // len(node_ips), 1)
    actors_ips = [ip for ip in node_ips for _ in range(num_actors_per_node)]

    actors = [
        (
            node_ip.split("node:")[-1],
            ModinXGBoostActor.options(resources={node_ip: 0.01}).remote(
                i, nthread=num_cpus_per_actor
            ),
        )
        for i, node_ip in enumerate(actors_ips)
    ]
    return actors


def _split_data_across_actors(
    actors: List,
    set_func,
    X_parts,
    y_parts,
):
    """
    Split row partitions of data between actors.

    Parameters
    ----------
    actors : list
        List of used actors.
    set_func : callable
        The function for setting data in actor.
    X_parts : list
        Row partitions of X data.
    y_parts : list
        Row partitions of y data.
    """
    X_parts_by_actors = _assign_row_partitions_to_actors(
        actors,
        X_parts,
    )

    y_parts_by_actors = _assign_row_partitions_to_actors(
        actors,
        y_parts,
        data_for_aligning=X_parts_by_actors,
    )

    for rank, (_, actor) in enumerate(actors):
        set_func(actor, *(X_parts_by_actors[rank][0] + y_parts_by_actors[rank][0]))


def _assign_row_partitions_to_actors(
    actors: List,
    row_partitions,
    data_for_aligning=None,
):
    """
    Assign row_partitions to actors.

    `row_partitions` will be assigned to actors according to their IPs.
    If distribution isn't even, partitions will be moved from actor
    with excess partitions to actor with lack of them.

    Parameters
    ----------
    actors : list
        List of used actors.
    row_partitions : list
        Row partitions of data to assign.
    data_for_aligning : dict, optional
        Data according to the order of which should be
        distributed `row_partitions`. Used to align y with X.

    Returns
    -------
    dict
        Dictionary of assigned to actors partitions
        as {actor_rank: (partitions, order)}.
    """
    num_actors = len(actors)
    if data_for_aligning is None:
        parts_ips_ref, parts_ref = zip(*row_partitions)

        # Group actors which are one the same ip
        actor_ips = defaultdict(list)
        for rank, (ip, _) in enumerate(actors):
            actor_ips[ip].append(rank)

        # Get distribution of parts between nodes ({ip:[(part, position),..],..})
        init_parts_distribution = defaultdict(list)
        for idx, (ip, part_ref) in enumerate(
            zip(RayWrapper.materialize(list(parts_ips_ref)), parts_ref)
        ):
            init_parts_distribution[ip].append((part_ref, idx))

        num_parts = len(parts_ref)
        min_parts_per_actor = math.floor(num_parts / num_actors)
        max_parts_per_actor = math.ceil(num_parts / num_actors)
        num_actors_with_max_parts = num_parts % num_actors

        row_partitions_by_actors = defaultdict(list)
        # Fill actors without movement parts between ips
        for actor_ip, ranks in actor_ips.items():
            # Loop across actors which are placed on actor_ip
            for rank in ranks:
                num_parts_on_ip = len(init_parts_distribution[actor_ip])

                # Check that have something to distribute on this ip
                if num_parts_on_ip == 0:
                    break
                # Check that node with `actor_ip` has enough parts for minimal
                # filling actor with `rank`
                if num_parts_on_ip >= min_parts_per_actor:
                    # Check that node has enough parts for max filling
                    # actor with `rank`
                    if (
                        num_parts_on_ip >= max_parts_per_actor
                        and num_actors_with_max_parts > 0
                    ):
                        pop_slice = slice(0, max_parts_per_actor)
                        num_actors_with_max_parts -= 1
                    else:
                        pop_slice = slice(0, min_parts_per_actor)

                    row_partitions_by_actors[rank].extend(
                        init_parts_distribution[actor_ip][pop_slice]
                    )
                    # Delete parts which we already assign
                    del init_parts_distribution[actor_ip][pop_slice]
                else:
                    row_partitions_by_actors[rank].extend(
                        init_parts_distribution[actor_ip]
                    )
                    init_parts_distribution[actor_ip] = []

        # Remove empty IPs
        for ip in list(init_parts_distribution):
            if len(init_parts_distribution[ip]) == 0:
                init_parts_distribution.pop(ip)

        # IP's aren't necessary now
        init_parts_distribution = [
            pair for pairs in init_parts_distribution.values() for pair in pairs
        ]

        # Fill the actors with extra parts (movements data between nodes)
        for rank in range(len(actors)):
            num_parts_on_rank = len(row_partitions_by_actors[rank])

            if num_parts_on_rank == max_parts_per_actor or (
                num_parts_on_rank == min_parts_per_actor
                and num_actors_with_max_parts == 0
            ):
                continue

            if num_actors_with_max_parts > 0:
                pop_slice = slice(0, max_parts_per_actor - num_parts_on_rank)
                num_actors_with_max_parts -= 1
            else:
                pop_slice = slice(0, min_parts_per_actor - num_parts_on_rank)

            row_partitions_by_actors[rank].extend(init_parts_distribution[pop_slice])
            del init_parts_distribution[pop_slice]

        if len(init_parts_distribution) != 0:
            raise RuntimeError(
                f"Not all partitions were ditributed between actors: {len(init_parts_distribution)} left."
            )

        row_parts_by_ranks = dict()
        for rank, pairs_part_pos in dict(row_partitions_by_actors).items():
            parts, order = zip(*pairs_part_pos)
            row_parts_by_ranks[rank] = (list(parts), list(order))
    else:
        row_parts_by_ranks = {rank: ([], []) for rank in range(len(actors))}

        for rank, (_, order_of_indexes) in data_for_aligning.items():
            row_parts_by_ranks[rank][1].extend(order_of_indexes)
            for row_idx in order_of_indexes:
                row_parts_by_ranks[rank][0].append(row_partitions[row_idx])

    return row_parts_by_ranks


def _train(
    dtrain,
    params: Dict,
    *args,
    num_actors=None,
    evals=(),
    **kwargs,
):
    """
    Run distributed training of XGBoost model on Ray engine.

    During work it evenly distributes `dtrain` between workers according
    to IP addresses partitions (in case of not even distribution of `dtrain`
    by nodes, part of partitions will be re-distributed between nodes),
    runs xgb.train on each worker for subset of `dtrain` and reduces training results
    of each worker using Rabit Context.

    Parameters
    ----------
    dtrain : modin.experimental.DMatrix
        Data to be trained against.
    params : dict
        Booster params.
    *args : iterable
        Other parameters for `xgboost.train`.
    num_actors : int, optional
        Number of actors for training. If unspecified, this value will be
        computed automatically.
    evals : list of pairs (modin.experimental.xgboost.DMatrix, str), default: empty
        List of validation sets for which metrics will be evaluated during training.
        Validation metrics will help us track the performance of the model.
    **kwargs : dict
        Other parameters are the same as `xgboost.train`.

    Returns
    -------
    dict
        A dictionary with trained booster and dict of
        evaluation results
        as {"booster": xgboost.Booster, "history": dict}.
    """
    s = time.time()

    X_row_parts, y_row_parts = dtrain
    dmatrix_kwargs = dtrain.get_dmatrix_params()

    assert len(X_row_parts) == len(y_row_parts), "Unaligned train data"

    num_actors = _get_num_actors(num_actors)

    if num_actors > len(X_row_parts):
        num_actors = len(X_row_parts)

    if evals:
        min_num_parts = num_actors
        for (eval_X, _), eval_method in evals:
            if len(eval_X) < min_num_parts:
                min_num_parts = len(eval_X)
                method_name = eval_method

        if num_actors != min_num_parts:
            num_actors = min_num_parts
            warnings.warn(
                f"`num_actors` is set to {num_actors}, because `evals` data with name `{method_name}` has only {num_actors} partition(s)."
            )

    actors = create_actors(num_actors)

    add_as_eval_method = None
    if evals:
        for eval_data, method in evals[:]:
            if eval_data is dtrain:
                add_as_eval_method = method
                evals.remove((eval_data, method))

        for (eval_X, eval_y), eval_method in evals:
            # Split data across workers
            _split_data_across_actors(
                actors,
                lambda actor, *X_y: actor.add_eval_data.remote(
                    *X_y, eval_method=eval_method, **dmatrix_kwargs
                ),
                eval_X,
                eval_y,
            )

    # Split data across workers
    _split_data_across_actors(
        actors,
        lambda actor, *X_y: actor.set_train_data.remote(
            *X_y, add_as_eval_method=add_as_eval_method, **dmatrix_kwargs
        ),
        X_row_parts,
        y_row_parts,
    )
    LOGGER.info(f"Data preparation time: {time.time() - s} s")

    s = time.time()
    with RabitContextManager(len(actors), get_node_ip_address()) as env:
        rabit_args = [("%s=%s" % item).encode() for item in env.items()]

        # Train
        fut = [
            actor.train.remote(rabit_args, params, *args, **kwargs)
            for _, actor in actors
        ]
        # All results should be the same because of Rabit tracking. So we just
        # return the first one.
        result = RayWrapper.materialize(fut[0])
        LOGGER.info(f"Training time: {time.time() - s} s")
        return result


@ray.remote
def _map_predict(booster, part, columns, dmatrix_kwargs={}, **kwargs):
    """
    Run prediction on a remote worker.

    Parameters
    ----------
    booster : xgboost.Booster or ray.ObjectRef
        A trained booster.
    part : pandas.DataFrame or ray.ObjectRef
        Partition of full data used for local prediction.
    columns : list or ray.ObjectRef
        Columns for the result.
    dmatrix_kwargs : dict, optional
        Keyword parameters for ``xgb.DMatrix``.
    **kwargs : dict
        Other parameters are the same as for ``xgboost.Booster.predict``.

    Returns
    -------
    ray.ObjectRef
        ``ray.ObjectRef`` with partial prediction.
    """
    dmatrix = xgb.DMatrix(part, **dmatrix_kwargs)
    prediction = pandas.DataFrame(
        booster.predict(dmatrix, **kwargs),
        index=part.index,
        columns=columns,
    )
    return prediction


def _predict(
    booster,
    data,
    **kwargs,
):
    """
    Run distributed prediction with a trained booster on Ray engine.

    During execution it runs ``xgb.predict`` on each worker for subset of `data`
    and creates Modin DataFrame with prediction results.

    Parameters
    ----------
    booster : xgboost.Booster
        A trained booster.
    data : modin.experimental.xgboost.DMatrix
        Input data used for prediction.
    **kwargs : dict
        Other parameters are the same as for ``xgboost.Booster.predict``.

    Returns
    -------
    modin.pandas.DataFrame
        Modin DataFrame with prediction results.
    """
    s = time.time()
    dmatrix_kwargs = data.get_dmatrix_params()

    # Get metadata from DMatrix
    input_index, input_columns, row_lengths = data.metadata

    # Infer columns of result
    def _get_num_columns(booster, n_features, **kwargs):
        rng = np.random.RandomState(777)
        test_data = rng.randn(1, n_features)
        test_predictions = booster.predict(
            xgb.DMatrix(test_data), validate_features=False, **kwargs
        )
        num_columns = (
            test_predictions.shape[1] if len(test_predictions.shape) > 1 else 1
        )
        return num_columns

    result_num_columns = _get_num_columns(booster, len(input_columns), **kwargs)
    new_columns = list(range(result_num_columns))

    # Put common data in object store
    booster = RayWrapper.put(booster)
    new_columns_ref = RayWrapper.put(new_columns)

    prediction_refs = [
        _map_predict.remote(booster, part, new_columns_ref, dmatrix_kwargs, **kwargs)
        for _, part in data.data
    ]
    predictions = from_partitions(
        prediction_refs,
        0,
        index=input_index,
        columns=new_columns,
        row_lengths=row_lengths,
        column_widths=[len(new_columns)],
    )
    LOGGER.info(f"Prediction time: {time.time() - s} s")
    return predictions


================================================
FILE: modin/logging/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from .class_logger import ClassLogger  # noqa: F401
from .config import DEFAULT_LOGGER_NAME, get_logger  # noqa: F401
from .logger_decorator import disable_logging, enable_logging  # noqa: F401
from .metrics import add_metric_handler, clear_metric_handler, emit_metric

__all__ = [
    "ClassLogger",
    "get_logger",
    "enable_logging",
    "disable_logging",
    "emit_metric",
    "add_metric_handler",
    "clear_metric_handler",
    "DEFAULT_LOGGER_NAME",
]


================================================
FILE: modin/logging/class_logger.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains ``ClassLogger`` class.

``ClassLogger`` is used for adding logging to Modin classes and their subclasses.
"""

from typing import Dict, Optional

from .config import LogLevel
from .logger_decorator import enable_logging


class ClassLogger:
    """
    Ensure all subclasses of the class being inherited are logged, too.

    Notes
    -----
    This mixin must go first in class bases declaration to have the desired effect.
    """

    _modin_logging_layer = "PANDAS-API"
    _log_level = LogLevel.INFO

    @classmethod
    def __init_subclass__(
        cls,
        modin_layer: Optional[str] = None,
        class_name: Optional[str] = None,
        log_level: Optional[LogLevel] = None,
        **kwargs: Dict,
    ) -> None:
        """
        Apply logging decorator to all children of ``ClassLogger``.

        Parameters
        ----------
        modin_layer : str, optional
            Specified by the logger (e.g. PANDAS-API).
        class_name : str, optional
            The name of the class the decorator is being applied to.
            Composed from the decorated class name if not specified.
        log_level : LogLevel, optional
            The log level (LogLevel.INFO, LogLevel.DEBUG, LogLevel.WARNING, etc.).
        **kwargs : dict
        """
        modin_layer = modin_layer or cls._modin_logging_layer
        log_level = log_level or cls._log_level
        super().__init_subclass__(**kwargs)
        enable_logging(modin_layer, class_name, log_level)(cls)
        cls._modin_logging_layer = modin_layer
        cls._log_level = log_level


================================================
FILE: modin/logging/config.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains ``ModinFormatter`` class.

``ModinFormatter`` and the associated functions are used for logging configuration.
"""

import datetime as dt
import logging
import platform
import threading
import time
import uuid
from enum import IntEnum
from logging.handlers import RotatingFileHandler
from pathlib import Path
from typing import Optional

import pandas
import psutil

import modin
from modin.config import LogFileSize, LogMemoryInterval, LogMode

DEFAULT_LOGGER_NAME = "modin.logger.default"

__LOGGER_CONFIGURED__: bool = False


class LogLevel(IntEnum):  # noqa: PR01
    """Enumerator to specify the valid values of LogLevel accepted by Logger.setLevel()."""

    DEBUG = 10
    INFO = 20
    WARNING = 30
    ERROR = 40
    CRITICAL = 50


class ModinFormatter(logging.Formatter):  # noqa: PR01
    """Implement custom formatter to log at microsecond granularity."""

    def formatTime(
        self, record: logging.LogRecord, datefmt: Optional[str] = None
    ) -> str:
        """
        Return the creation time of the specified LogRecord as formatted text.

        This custom logging formatter inherits from the logging module and
        records timestamps at the microsecond level of granularity.

        Parameters
        ----------
        record : LogRecord
            The specified LogRecord object.
        datefmt : str, default: None
            Used with time.ststrftime() to format time record.

        Returns
        -------
        str
            Datetime string containing microsecond timestamp.
        """
        ct = dt.datetime.fromtimestamp(record.created)
        if datefmt:
            s = ct.strftime(datefmt)
        else:
            # Format datetime object ct to microseconds
            t = ct.strftime("%Y-%m-%d %H:%M:%S")
            s = f"{t},{record.msecs:03}"
        return s


def bytes_int_to_str(num_bytes: int, suffix: str = "B") -> str:
    """
    Scale bytes to its human-readable format (e.g: 1253656678 => '1.17GB').

    Parameters
    ----------
    num_bytes : int
        Number of bytes.
    suffix : str, default: "B"
        Suffix to add to conversion of num_bytes.

    Returns
    -------
    str
        Human-readable string format.
    """
    factor = 1000
    # Convert n_bytes to float b/c we divide it by factor
    n_bytes: float = num_bytes
    for unit in ["", "K", "M", "G", "T", "P"]:
        if n_bytes < factor:
            return f"{n_bytes:.2f}{unit}{suffix}"
        n_bytes /= factor
    return f"{n_bytes * 1000:.2f}P{suffix}"


def _create_logger(
    namespace: str, job_id: str, log_name: str, log_level: LogLevel
) -> logging.Logger:
    """
    Create and configure logger as Modin expects it to be.

    Parameters
    ----------
    namespace : str
        Logging namespace to use, e.g. "modin.logger.default".
    job_id : str
        Part of path to where logs are stored.
    log_name : str
        Name of the log file to create.
    log_level : LogLevel

    Returns
    -------
    Logger
        Logger object configured per Modin settings.
    """
    # Pathlib makes it OS agnostic.
    modin_path = Path(".modin")
    modin_path.mkdir(exist_ok=True)

    # Add gitignore to the log directory.
    ignore_modin_path = modin_path / ".gitignore"
    if not ignore_modin_path.exists():
        ignore_modin_path.write_text("# Automatically generated by modin.\n*\n")

    log_dir = modin_path / "logs" / f"job_{job_id}"
    log_dir.mkdir(parents=True, exist_ok=True)
    log_filename = log_dir / f"{log_name}.log"

    logger = logging.getLogger(namespace)
    logfile = RotatingFileHandler(
        filename=log_filename,
        mode="a",
        maxBytes=LogFileSize.get() * int(1e6),
        backupCount=10,
    )
    formatter = ModinFormatter(
        fmt="%(process)d, %(thread)d, %(asctime)s, %(message)s",
        datefmt="%Y-%m-%d,%H:%M:%S.%f",
    )
    logfile.setFormatter(formatter)
    logger.addHandler(logfile)
    logger.setLevel(log_level)

    return logger


def configure_logging() -> None:
    """Configure Modin logging by setting up directory structure and formatting."""
    global __LOGGER_CONFIGURED__
    current_timestamp = dt.datetime.now().strftime("%Y.%m.%d_%H-%M-%S")
    job_id = f"{current_timestamp}_{uuid.uuid4().hex}"

    logger = _create_logger(
        DEFAULT_LOGGER_NAME,
        job_id,
        "trace",
        LogLevel.INFO,
    )

    logger.info(f"OS Version: {platform.platform()}")
    logger.info(f"Python Version: {platform.python_version()}")
    num_physical_cores = str(psutil.cpu_count(logical=False))
    num_total_cores = str(psutil.cpu_count(logical=True))
    logger.info(f"Modin Version: {modin.__version__}")
    logger.info(f"Pandas Version: {pandas.__version__}")
    logger.info(f"Physical Cores: {num_physical_cores}")
    logger.info(f"Total Cores: {num_total_cores}")

    mem_sleep = LogMemoryInterval.get()
    mem_logger = _create_logger("modin_memory.logger", job_id, "memory", LogLevel.DEBUG)

    svmem = psutil.virtual_memory()
    mem_logger.info(f"Memory Total: {bytes_int_to_str(svmem.total)}")
    mem_logger.info(f"Memory Available: {bytes_int_to_str(svmem.available)}")
    mem_logger.info(f"Memory Used: {bytes_int_to_str(svmem.used)}")
    mem = threading.Thread(
        target=memory_thread, args=[mem_logger, mem_sleep], daemon=True
    )
    mem.start()

    _create_logger("modin.logger.errors", job_id, "error", LogLevel.INFO)

    __LOGGER_CONFIGURED__ = True


def memory_thread(logger: logging.Logger, sleep_time: int) -> None:
    """
    Configure Modin logging system memory profiling thread.

    Parameters
    ----------
    logger : logging.Logger
        The logger object.
    sleep_time : int
        The interval at which to profile system memory.
    """
    while True:
        rss_mem = bytes_int_to_str(psutil.Process().memory_info().rss)
        svmem = psutil.virtual_memory()
        logger.info(f"Memory Percentage: {svmem.percent}%")
        logger.info(f"RSS Memory: {rss_mem}")
        time.sleep(sleep_time)


def get_logger(namespace: str = "modin.logger.default") -> logging.Logger:
    """
    Configure Modin logger based on Modin config and returns the logger.

    Parameters
    ----------
    namespace : str, default: "modin.logger.default"
        Which namespace to use for logging.

    Returns
    -------
    logging.Logger
        The Modin logger.
    """
    if not __LOGGER_CONFIGURED__ and LogMode.get() != "disable":
        configure_logging()
    return logging.getLogger(namespace)


================================================
FILE: modin/logging/logger_decorator.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains the functions designed for the enable/disable of logging.

``enable_logging`` is used for decorating individual Modin functions or classes.
"""
from __future__ import annotations

from functools import wraps
from time import perf_counter
from types import FunctionType, MethodType
from typing import Any, Callable, Dict, Optional, Tuple, TypeVar, overload

from modin.config import LogMode
from modin.logging.metrics import emit_metric

from .config import LogLevel, get_logger

_MODIN_LOGGER_NOWRAP = "__modin_logging_nowrap__"

Fn = TypeVar("Fn", bound=Any)


def disable_logging(func: Callable) -> Any:
    """
    Disable logging of one particular function. Useful for decorated classes.

    Parameters
    ----------
    func : callable
        A method in a logger-decorated class for which logging should be disabled.

    Returns
    -------
    func
        A function with logging disabled.
    """
    setattr(func, _MODIN_LOGGER_NOWRAP, True)
    return func


@overload
def enable_logging(modin_layer: Fn) -> Fn:
    # This helps preserve typings when the decorator is used without parentheses
    pass


@overload
def enable_logging(
    modin_layer: str = "PANDAS-API",
    name: Optional[str] = None,
    log_level: LogLevel = LogLevel.INFO,
) -> Callable[[Fn], Fn]:
    pass


def enable_logging(
    modin_layer: str | Fn = "PANDAS-API",
    name: Optional[str] = None,
    log_level: LogLevel = LogLevel.INFO,
) -> Callable[[Fn], Fn] | Fn:
    """
    Log Decorator used on specific Modin functions or classes.

    Parameters
    ----------
    modin_layer : str or object to decorate, default: "PANDAS-API"
        Specified by the logger (e.g. PANDAS-API).
        If it's an object to decorate, call logger_decorator() on it with default arguments.
    name : str, optional
        The name of the object the decorator is being applied to.
        Composed from the decorated object name if not specified.
    log_level : LogLevel, default: LogLevel.INFO
        The log level (LogLevel.INFO, LogLevel.DEBUG, LogLevel.WARNING, etc.).

    Returns
    -------
    func
        A decorator function.
    """
    if not isinstance(modin_layer, str):
        # assume the decorator is used in a form without parenthesis like:
        # @enable_logging
        # def func()
        return enable_logging()(modin_layer)

    def decorator(obj: Fn) -> Fn:
        """Decorate function or class to add logs to Modin API function(s)."""
        if isinstance(obj, type):
            seen: Dict[Any, Any] = {}
            for attr_name, attr_value in vars(obj).items():
                if isinstance(
                    attr_value, (FunctionType, MethodType, classmethod, staticmethod)
                ) and not hasattr(attr_value, _MODIN_LOGGER_NOWRAP):
                    try:
                        wrapped = seen[attr_value]
                    except KeyError:
                        wrapped = seen[attr_value] = enable_logging(
                            modin_layer,
                            f"{name or obj.__name__}.{attr_name}",
                            log_level,
                        )(attr_value)

                    setattr(obj, attr_name, wrapped)
            return obj
        elif isinstance(obj, classmethod):
            return classmethod(decorator(obj.__func__))  # type: ignore [return-value, arg-type]
        elif isinstance(obj, staticmethod):
            return staticmethod(decorator(obj.__func__))  # type: ignore [return-value, arg-type]

        assert isinstance(modin_layer, str), "modin_layer is somehow not a string!"

        api_call_name = f"{name or obj.__name__}"
        log_line = f"{modin_layer.upper()}::{api_call_name}"
        metric_name = f"{modin_layer.lower()}.{api_call_name.lower()}"
        start_line = f"START::{log_line}"
        stop_line = f"STOP::{log_line}"

        @wraps(obj)
        def run_and_log(*args: Tuple, **kwargs: Dict) -> Any:
            """
            Compute function with logging if Modin logging is enabled.

            Parameters
            ----------
            *args : tuple
                The function arguments.
            **kwargs : dict
                The function keyword arguments.

            Returns
            -------
            Any
            """
            start_time = perf_counter()
            if LogMode.get() == "disable":
                result = obj(*args, **kwargs)
                emit_metric(metric_name, perf_counter() - start_time)
                return result

            logger = get_logger()
            logger.log(log_level, start_line)
            try:
                result = obj(*args, **kwargs)
                emit_metric(metric_name, perf_counter() - start_time)
            except BaseException as e:
                # Only log the exception if a deeper layer of the modin stack has not
                # already logged it.
                if not hasattr(e, "_modin_logged"):
                    # use stack_info=True so that even if we are a few layers deep in
                    # modin, we log a stack trace that includes calls to higher layers
                    # of modin
                    get_logger("modin.logger.errors").exception(
                        stop_line, stack_info=True
                    )
                    e._modin_logged = True  # type: ignore[attr-defined]
                raise
            finally:
                logger.log(log_level, stop_line)
            return result

        # make sure we won't decorate multiple times
        return disable_logging(run_and_log)

    return decorator


================================================
FILE: modin/logging/metrics.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains metrics handler functions.

Allows for the registration of functions to collect
API metrics.
"""

import re
from typing import Callable, Union

from modin.config.envvars import MetricsMode

metric_name_pattern = r"[a-zA-Z\._\-0-9]+$"
_metric_handlers: list[Callable[[str, Union[int, float]], None]] = []


# Metric/Telemetry hooks can be implemented by plugin engines
# to collect discrete data on how modin is performing at the
# high level modin layer.
def emit_metric(name: str, value: Union[int, float]) -> None:
    """
    Emit a metric using the set of registered handlers.

    Parameters
    ----------
    name : str, required
            Name of the metric, in dot-format.
    value : int or float required
            Value of the metric.
    """
    if MetricsMode.get() == "disable":
        return
    if not re.fullmatch(metric_name_pattern, name):
        raise KeyError(
            f"Metrics name is not in metric-name dot format, (eg. modin.dataframe.hist.duration ): {name}"
        )

    handlers = _metric_handlers.copy()
    for fn in handlers:
        try:
            fn(f"modin.{name}", value)
        except Exception:
            clear_metric_handler(fn)


def add_metric_handler(handler: Callable[[str, Union[int, float]], None]) -> None:
    """
    Add a metric handler to Modin which can collect metrics.

    Parameters
    ----------
    handler : Callable, required
    """
    _metric_handlers.append(handler)


def clear_metric_handler(handler: Callable[[str, Union[int, float]], None]) -> None:
    """
    Remove a metric handler from Modin.

    Parameters
    ----------
    handler : Callable, required
    """
    if handler in _metric_handlers:
        _metric_handlers.remove(handler)


================================================
FILE: modin/numpy/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy
from packaging import version

from . import linalg
from .arr import array
from .array_creation import ones_like, tri, zeros_like
from .array_shaping import append, hstack, ravel, shape, split, transpose
from .constants import e, euler_gamma, inf, nan, newaxis, pi

if version.parse(numpy.__version__) < version.parse("2.0.0b1"):
    from .constants import (
        NAN,
        NINF,
        NZERO,
        PINF,
        PZERO,
        Inf,
        Infinity,
        NaN,
        infty,
    )

from .logic import (
    all,
    any,
    equal,
    greater,
    greater_equal,
    iscomplex,
    isfinite,
    isinf,
    isnan,
    isnat,
    isneginf,
    isposinf,
    isreal,
    isscalar,
    less,
    less_equal,
    logical_and,
    logical_not,
    logical_or,
    logical_xor,
    not_equal,
)
from .math import (
    abs,
    absolute,
    add,
    amax,
    amin,
    argmax,
    argmin,
    divide,
    dot,
    exp,
    float_power,
    floor_divide,
    max,
    maximum,
    mean,
    min,
    minimum,
    mod,
    multiply,
    power,
    prod,
    remainder,
    sqrt,
    subtract,
    sum,
    true_divide,
    var,
)
from .trigonometry import tanh


def where(condition, x=None, y=None):
    if condition is True:
        return x
    if condition is False:
        return y
    if hasattr(condition, "where"):
        return condition.where(x=x, y=y)
    raise NotImplementedError(
        f"np.where for condition of type {type(condition)} is not yet supported in Modin."
    )


__all__ = [  # noqa: F405
    "linalg",
    "array",
    "zeros_like",
    "ones_like",
    "ravel",
    "shape",
    "transpose",
    "all",
    "any",
    "isfinite",
    "isinf",
    "isnan",
    "isnat",
    "isneginf",
    "isposinf",
    "iscomplex",
    "isreal",
    "isscalar",
    "logical_not",
    "logical_and",
    "logical_or",
    "logical_xor",
    "greater",
    "greater_equal",
    "less",
    "less_equal",
    "equal",
    "not_equal",
    "absolute",
    "abs",
    "add",
    "divide",
    "dot",
    "float_power",
    "floor_divide",
    "power",
    "prod",
    "multiply",
    "remainder",
    "mod",
    "subtract",
    "sum",
    "true_divide",
    "mean",
    "maximum",
    "amax",
    "max",
    "minimum",
    "amin",
    "min",
    "where",
    "e",
    "euler_gamma",
    "inf",
    "nan",
    "newaxis",
    "pi",
    "sqrt",
    "tanh",
    "exp",
    "argmax",
    "argmin",
    "var",
    "split",
    "hstack",
    "append",
    "tri",
]
if version.parse(numpy.__version__) < version.parse("2.0.0b1"):
    __all__ += [
        "Inf",
        "Infinity",
        "NAN",
        "NINF",
        "NZERO",
        "NaN",
        "PINF",
        "PZERO",
        "infty",
    ]


================================================
FILE: modin/numpy/arr.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses ``array`` class, that is distributed version of ``numpy.array``."""

from inspect import signature
from math import prod

import numpy
import pandas
from pandas.api.types import is_scalar
from pandas.core.dtypes.common import is_bool_dtype, is_list_like, is_numeric_dtype

import modin.pandas as pd
from modin.core.dataframe.algebra import Binary, Map, Reduce
from modin.error_message import ErrorMessage

from .utils import try_convert_from_interoperable_type


def check_kwargs(order="C", subok=True, keepdims=None, casting="same_kind", where=True):
    if order not in ["K", "C"]:
        ErrorMessage.single_warning(
            "Array order besides 'C' is not currently supported in Modin. Defaulting to 'C' order."
        )
    if not subok:
        ErrorMessage.single_warning(
            "Subclassing types is not currently supported in Modin. Defaulting to the same base dtype."
        )
    if keepdims:
        ErrorMessage.single_warning(
            "Modin does not yet support broadcasting between nested 1D arrays and 2D arrays."
        )
    if casting != "same_kind":
        ErrorMessage.single_warning(
            "Modin does not yet support the `casting` argument."
        )
    if not (
        is_scalar(where) or (isinstance(where, array) and is_bool_dtype(where.dtype))
    ):
        if not isinstance(where, array):
            raise NotImplementedError(
                f"Modin only supports scalar or modin.numpy.array `where` parameter, not `where` parameter of type {type(where)}"
            )
        raise TypeError(
            f"Cannot cast array data from {where.dtype} to dtype('bool') according to the rule 'safe'"
        )


def check_can_broadcast_to_output(arr_in: "array", arr_out: "array"):
    if not isinstance(arr_out, array):
        raise TypeError("return arrays must be of modin.numpy.array type.")
    # Broadcasting is ok if both arrays have matching ndim + shape, OR
    # arr_in is 1xN or a 1D N-element array and arr_out is MxN.
    # Note that 1xN arr_in cannot be broadcasted into a 1D N-element arr_out.
    #
    # This is slightly different from the rules for checking if two inputs
    # of a binary operation can be broadcasted together.
    broadcast_ok = (
        (
            # Case 1: arrays have matching ndim + shape
            # Case 2a: arr_in is 1D N-element, arr_out is 1D N-element (covered here)
            arr_in._ndim == arr_out._ndim
            and arr_in.shape == arr_out.shape
        )
        or (
            # Case 2b: both arrays are 2D, arr_in is 1xN and arr_out is MxN
            arr_in._ndim == 2
            and arr_out._ndim == 2
            and arr_in.shape[0] == 1
            and arr_in.shape[1] == arr_out.shape[1]
        )
        or (
            # Case 2c: arr_in is 1D N-element, arr_out is MxN
            arr_in._ndim == 1
            and arr_out._ndim == 2
            and arr_in.shape[0] == arr_out.shape[1]
            and arr_out.shape[0] == 1
        )
    )
    # Case 2b would require duplicating the 1xN result M times to match the shape of out,
    # which we currently do not support. See GH#5831.
    if (
        arr_in._ndim == 2
        and arr_out._ndim == 2
        and arr_in.shape[0] == 1
        and arr_in.shape[1] == arr_out.shape[1]
        and arr_in.shape[0] != 1
    ):
        raise NotImplementedError(
            f"Modin does not currently support broadcasting shape {arr_in.shape} to output operand with shape {arr_out.shape}"
        )
    if not broadcast_ok:
        raise ValueError(
            f"non-broadcastable output operand with shape {arr_out.shape} doesn't match the broadcast shape {arr_in.shape}"
        )


def fix_dtypes_and_determine_return(
    query_compiler_in, _ndim, dtype=None, out=None, where=True
):
    if dtype is not None:
        query_compiler_in = query_compiler_in.astype(
            {col_name: dtype for col_name in query_compiler_in.columns}
        )
    result = array(_query_compiler=query_compiler_in, _ndim=_ndim)
    if out is not None:
        out = try_convert_from_interoperable_type(out, copy=False)
        check_can_broadcast_to_output(result, out)
        result._query_compiler = result._query_compiler.astype(
            {col_name: out.dtype for col_name in result._query_compiler.columns}
        )
        if isinstance(where, array):
            out._update_inplace(where.where(result, out)._query_compiler)
        elif where:
            out._update_inplace(result._query_compiler)
        return out
    if isinstance(where, array) and out is None:
        from .array_creation import zeros_like

        out = zeros_like(result).astype(dtype if dtype is not None else result.dtype)
        out._query_compiler = where.where(result, out)._query_compiler
        return out
    elif not where:
        from .array_creation import zeros_like

        return zeros_like(result)
    return result


class array(object):
    """
    Modin distributed representation of ``numpy.array``.

    Internally, the data can be divided into partitions along both columns and rows
    in order to parallelize computations and utilize the user's hardware as much as possible.

    Notes
    -----
    The ``array`` class is a lightweight shim that relies on the pandas Query Compiler in order to
    provide functionality.
    """

    def __init__(
        self,
        object=None,
        dtype=None,
        *,
        copy=True,
        order="K",
        subok=False,
        ndmin=0,
        like=numpy._NoValue,
        _query_compiler=None,
        _ndim=None,
    ):
        self._siblings = []
        ErrorMessage.single_warning(
            "Using Modin's new NumPy API. To convert from a Modin object to a NumPy array, either turn off the ModinNumpy flag, or use `modin.pandas.io.to_numpy`."
        )
        if isinstance(object, array):
            _query_compiler = object._query_compiler.copy()
            if not copy:
                object._add_sibling(self)
            _ndim = object._ndim
        elif isinstance(object, (pd.DataFrame, pd.Series)):
            _query_compiler = object._query_compiler.copy()
            if not copy:
                object._add_sibling(self)
            _ndim = 1 if isinstance(object, pd.Series) else 2
        if _query_compiler is not None:
            self._query_compiler = _query_compiler
            self._ndim = _ndim
            new_dtype = pandas.core.dtypes.cast.find_common_type(
                list(self._query_compiler.dtypes.values)
            )
        elif is_list_like(object) and not is_list_like(object[0]):
            series = pd.Series(object)
            self._query_compiler = series._query_compiler
            self._ndim = 1
            new_dtype = self._query_compiler.dtypes.values[0]
        else:
            target_kwargs = {
                "dtype": None,
                "copy": True,
                "order": "K",
                "subok": False,
                "ndmin": 0,
                "like": numpy._NoValue,
            }
            for key, value in target_kwargs.copy().items():
                if value == locals()[key]:
                    target_kwargs.pop(key)
                else:
                    target_kwargs[key] = locals()[key]
            arr = numpy.asarray(object)
            assert arr.ndim in (
                1,
                2,
            ), "modin.numpy currently only supports 1D and 2D objects."
            self._ndim = len(arr.shape)
            if self._ndim > 2:
                ErrorMessage.not_implemented(
                    "NumPy arrays with dimensions higher than 2 are not yet supported."
                )

            self._query_compiler = pd.DataFrame(arr)._query_compiler
            new_dtype = arr.dtype
        # These two lines are necessary so that our query compiler does not keep track of indices
        # and try to map like indices to like indices. (e.g. if we multiply two arrays that used
        # to be dataframes, and the dataframes had the same column names but ordered differently
        # we want to do a simple broadcast where we only consider position, as numpy would, rather
        # than pair columns with the same name and multiply them.)
        self._query_compiler = self._query_compiler.reset_index(drop=True)
        self._query_compiler.columns = range(len(self._query_compiler.columns))
        new_dtype = new_dtype if dtype is None else dtype
        if isinstance(new_dtype, pandas.Float64Dtype):
            new_dtype = numpy.float64
        cols_with_wrong_dtype = self._query_compiler.dtypes != new_dtype
        if cols_with_wrong_dtype.any():
            self._query_compiler = self._query_compiler.astype(
                {
                    col_name: new_dtype
                    for col_name in self._query_compiler.columns[cols_with_wrong_dtype]
                }
            )
        self.indexer = None

    def __getitem__(self, key):
        if isinstance(key, array) and is_bool_dtype(key.dtype) and key._ndim == 2:
            raise NotImplementedError(
                "Advanced indexing with 2D boolean indexes is not currently supported."
            )
        if self.indexer is None:
            from .indexing import ArrayIndexer

            self.indexer = ArrayIndexer(self)
        return self.indexer.__getitem__(key)

    def __setitem__(self, key, item):
        if self.indexer is None:
            from .indexing import ArrayIndexer

            self.indexer = ArrayIndexer(self)
        return self.indexer.__setitem__(key, item)

    def _add_sibling(self, sibling):
        """
        Add an array object to the list of siblings.

        Siblings are objects that share the same query compiler. This function is called
        when a shallow copy is made.

        Parameters
        ----------
        sibling : BasePandasDataset
            Dataset to add to siblings list.
        """
        sibling._siblings = self._siblings + [self]
        self._siblings += [sibling]
        for sib in self._siblings:
            sib._siblings += [sibling]

    def _update_inplace(self, new_query_compiler):
        """
        Update the current array inplace.

        Parameters
        ----------
        new_query_compiler : query_compiler
            The new QueryCompiler to use to manage the data.
        """
        old_query_compiler = self._query_compiler
        self._query_compiler = new_query_compiler
        for sib in self._siblings:
            sib._query_compiler = new_query_compiler
        old_query_compiler.free()

    def _validate_axis(self, axis):
        """
        Check that the provided axis argument is valid on this array.

        Parameters
        ----------
        axis : int, optional
            The axis argument passed to the function.

        Returns
        -------
        int, optional
            Axis to apply the function over (None, 0, or 1).

        Raises
        -------
        numpy.AxisError
            if the axis is invalid.
        """
        if axis is not None and axis < 0:
            new_axis = axis + self._ndim
            if self._ndim == 1 and new_axis != 0:
                raise numpy.AxisError(axis, 1)
            elif self._ndim == 2 and new_axis not in [0, 1]:
                raise numpy.AxisError(axis, 2)
            return new_axis
        return axis

    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
        ufunc_name = ufunc.__name__
        supported_array_layer = hasattr(self, ufunc_name) or hasattr(
            self, f"__{ufunc_name}__"
        )
        if supported_array_layer:
            args = []
            for input in inputs:
                input = try_convert_from_interoperable_type(input)
                if not (isinstance(input, array) or is_scalar(input)):
                    input = array(input)
                args += [input]
            function = (
                getattr(args[0], ufunc_name)
                if hasattr(args[0], ufunc_name)
                else getattr(args[0], f"__{ufunc_name}__")
            )
            len_expected_arguments = len(
                [
                    param
                    for param in signature(function).parameters.values()
                    if param.default == param.empty
                ]
            )
            if len_expected_arguments == (len(args) - 1) and method == "__call__":
                return function(*tuple(args[1:]), **kwargs)
            else:
                ErrorMessage.single_warning(
                    f"{ufunc} method {method} is not yet supported in Modin. Defaulting to NumPy."
                )
                args = []
                for input in inputs:
                    if isinstance(input, array):
                        input = input._to_numpy()
                    if isinstance(input, pd.DataFrame):
                        input = input._query_compiler.to_numpy()
                    if isinstance(input, pd.Series):
                        input = input._query_compiler.to_numpy().flatten()
                    args += [input]
                output = self._to_numpy().__array_ufunc__(
                    ufunc, method, *args, **kwargs
                )
                if is_scalar(output):
                    return output
                return array(output)
        new_ufunc = None
        out_ndim = -1
        if method == "__call__":
            if len(inputs) == 1:
                new_ufunc = Map.register(ufunc)
                out_ndim = len(inputs[0].shape)
            else:
                new_ufunc = Binary.register(ufunc)
                out_ndim = max(
                    [len(inp.shape) for inp in inputs if hasattr(inp, "shape")]
                )
        elif method == "reduce":
            if len(inputs) == 1:
                new_ufunc = Reduce.register(ufunc, axis=kwargs.get("axis", None))
            if kwargs.get("axis", None) is None:
                out_ndim = 0
            else:
                out_ndim = len(inputs[0].shape) - 1
        elif method == "accumulate":
            if len(inputs) == 1:
                new_ufunc = Reduce.register(ufunc, axis=None)
            out_ndim = 0
        if new_ufunc is None:
            ErrorMessage.single_warning(
                f"{ufunc} is not yet supported in Modin. Defaulting to NumPy."
            )
            args = []
            for input in inputs:
                if isinstance(input, array):
                    input = input._to_numpy()
                if isinstance(input, pd.DataFrame):
                    input = input._query_compiler.to_numpy()
                if isinstance(input, pd.Series):
                    input = input._query_compiler.to_numpy().flatten()
                args += [input]
            output = self._to_numpy().__array_ufunc__(ufunc, method, *args, **kwargs)
            if is_scalar(output):
                return output
            return array(output)
        args = []
        for input in inputs:
            input = try_convert_from_interoperable_type(input)
            if not (isinstance(input, array) or is_scalar(input)):
                input = array(input)
            args += [
                input._query_compiler if hasattr(input, "_query_compiler") else input
            ]
        out_kwarg = kwargs.get("out", None)
        if out_kwarg is not None:
            # If `out` is a modin.numpy.array, `kwargs.get("out")` returns a 1-tuple
            # whose only element is that array, so we need to unwrap it from the tuple.
            out_kwarg = out_kwarg[0]
        where_kwarg = kwargs.get("where", True)
        kwargs["out"] = None
        kwargs["where"] = True
        result = new_ufunc(*args, **kwargs)
        return fix_dtypes_and_determine_return(
            result,
            out_ndim,
            dtype=kwargs.get("dtype", None),
            out=out_kwarg,
            where=where_kwarg,
        )

    def __array_function__(self, func, types, args, kwargs):
        from . import array_creation as creation
        from . import array_shaping as shaping
        from . import math

        func_name = func.__name__
        modin_func = None
        if hasattr(math, func_name):
            modin_func = getattr(math, func_name)
        elif hasattr(shaping, func_name):
            modin_func = getattr(shaping, func_name)
        elif hasattr(creation, func_name):
            modin_func = getattr(creation, func_name)
        if modin_func is None:
            return NotImplemented
        return modin_func(*args, **kwargs)

    def where(self, x=None, y=None):
        if not is_bool_dtype(self.dtype):
            raise NotImplementedError(
                "Modin currently only supports where on condition arrays with boolean dtype."
            )
        if x is None and y is None:
            ErrorMessage.single_warning(
                "np.where method with only condition specified is not yet supported in Modin. Defaulting to NumPy."
            )
            condition = self._to_numpy()
            return array(numpy.where(condition))
        x, y = try_convert_from_interoperable_type(
            x
        ), try_convert_from_interoperable_type(y)
        if not (
            (isinstance(x, array) or is_scalar(x))
            and (isinstance(y, array) or is_scalar(y))
        ):
            raise ValueError(
                "np.where requires x and y to either be np.arrays or scalars."
            )
        if is_scalar(x) and is_scalar(y):
            ErrorMessage.single_warning(
                "np.where not supported when both x and y are scalars. Defaulting to NumPy."
            )
            return array(numpy.where(self._to_numpy(), x, y))
        if is_scalar(x) and not is_scalar(y):
            if self._ndim < y._ndim:
                if not self.shape[0] == y.shape[1]:
                    raise ValueError(
                        f"operands could not be broadcast together with shapes {self.shape} {y.shape}"
                    )
                ErrorMessage.single_warning(
                    "np.where method where condition must be broadcast is not yet available in Modin. Defaulting to NumPy."
                )
                return array(numpy.where(self._to_numpy(), x, y._to_numpy()))
            elif self._ndim == y._ndim:
                if not self.shape == y.shape:
                    raise ValueError(
                        f"operands could not be broadcast together with shapes {self.shape} {y.shape}"
                    )
                return array(
                    _query_compiler=y._query_compiler.where((~self)._query_compiler, x),
                    _ndim=y._ndim,
                )
            else:
                ErrorMessage.single_warning(
                    "np.where method with broadcast is not yet available in Modin. Defaulting to NumPy."
                )
                return numpy.where(self._to_numpy(), x, y._to_numpy())
        if not is_scalar(x) and is_scalar(y):
            if self._ndim < x._ndim:
                if not self.shape[0] == x.shape[1]:
                    raise ValueError(
                        f"operands could not be broadcast together with shapes {self.shape} {x.shape}"
                    )
                ErrorMessage.single_warning(
                    "np.where method where condition must be broadcast is not yet available in Modin. Defaulting to NumPy."
                )
                return array(numpy.where(self._to_numpy(), x._to_numpy(), y))
            elif self._ndim == x._ndim:
                if not self.shape == x.shape:
                    raise ValueError(
                        f"operands could not be broadcast together with shapes {self.shape} {x.shape}"
                    )
                return array(
                    _query_compiler=x._query_compiler.where(self._query_compiler, y),
                    _ndim=x._ndim,
                )
            else:
                ErrorMessage.single_warning(
                    "np.where method with broadcast is not yet available in Modin. Defaulting to NumPy."
                )
                return array(numpy.where(self._to_numpy(), x._to_numpy(), y))
        if not (x.shape == y.shape and y.shape == self.shape):
            ErrorMessage.single_warning(
                "np.where method with broadcast is not yet available in Modin. Defaulting to NumPy."
            )
            return array(numpy.where(self._to_numpy(), x._to_numpy(), y._to_numpy()))
        return array(
            _query_compiler=x._query_compiler.where(
                self._query_compiler, y._query_compiler
            ),
            _ndim=self._ndim,
        )

    def max(
        self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=True
    ):
        check_kwargs(keepdims=keepdims, where=where)
        apply_axis = self._validate_axis(axis)
        truthy_where = bool(where)
        if initial is None and where is not True:
            raise ValueError(
                "reduction operation 'maximum' does not have an identity, so to use a where mask one has to specify 'initial'"
            )
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            target = where.where(self, initial) if isinstance(where, array) else self
            result = target._query_compiler.max(axis=0)
            if keepdims:
                if initial is not None and result.lt(initial).any():
                    result = pd.Series([initial])._query_compiler
                if initial is not None and out is not None:
                    out._update_inplace(
                        (numpy.ones_like(out) * initial)._query_compiler
                    )
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        result, 1, dtype, out, truthy_where
                    )
                else:
                    return array([initial])
            if initial is not None:
                result = max(result.to_numpy()[0, 0], initial)
            else:
                result = result.to_numpy()[0, 0]
            return result if truthy_where else initial
        if axis is None:
            target = where.where(self, initial) if isinstance(where, array) else self
            result = target._query_compiler.max(axis=0).max(axis=1).to_numpy()[0, 0]
            if initial is not None:
                result = max(result, initial)
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if initial is not None and out is not None:
                    out._update_inplace(
                        (numpy.ones_like(out) * initial)._query_compiler
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        array(numpy.array([[result]]))._query_compiler,
                        2,
                        dtype,
                        out,
                        truthy_where,
                    )
                else:
                    return array([[initial]])
            return result if truthy_where else initial
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        target = where.where(self, initial) if isinstance(where, array) else self
        result = target._query_compiler.max(axis=apply_axis)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            if initial is not None:
                result = max(result.to_numpy()[0, 0], initial)
            else:
                result = result.to_numpy()[0, 0]
            return result if truthy_where else initial
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        if initial is not None and out is not None:
            out._update_inplace((numpy.ones_like(out) * initial)._query_compiler)
        intermediate = fix_dtypes_and_determine_return(
            result, new_ndim, dtype, out, truthy_where
        )
        if initial is not None:
            intermediate._update_inplace(
                (intermediate > initial).where(intermediate, initial)._query_compiler
            )
        if truthy_where or out is not None:
            return intermediate
        else:
            return numpy.ones_like(intermediate) * initial

    def min(
        self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=True
    ):
        check_kwargs(keepdims=keepdims, where=where)
        truthy_where = bool(where)
        apply_axis = self._validate_axis(axis)
        if initial is None and where is not True:
            raise ValueError(
                "reduction operation 'minimum' does not have an identity, so to use a where mask one has to specify 'initial'"
            )
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            target = where.where(self, initial) if isinstance(where, array) else self
            result = target._query_compiler.min(axis=0)
            if keepdims:
                if initial is not None and result.gt(initial).any():
                    result = pd.Series([initial])._query_compiler
                if initial is not None and out is not None:
                    out._update_inplace(
                        (numpy.ones_like(out) * initial)._query_compiler
                    )
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        result, 1, dtype, out, truthy_where
                    )
                else:
                    return array([initial])
            if initial is not None:
                result = min(result.to_numpy()[0, 0], initial)
            else:
                result = result.to_numpy()[0, 0]
            return result if truthy_where else initial
        if apply_axis is None:
            target = where.where(self, initial) if isinstance(where, array) else self
            result = target._query_compiler.min(axis=0).min(axis=1).to_numpy()[0, 0]
            if initial is not None:
                result = min(result, initial)
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if initial is not None and out is not None:
                    out._update_inplace(
                        (numpy.ones_like(out) * initial)._query_compiler
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        array(numpy.array([[result]]))._query_compiler,
                        2,
                        dtype,
                        out,
                        truthy_where,
                    )
                else:
                    return array([[initial]])
            return result if truthy_where else initial
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        target = where.where(self, initial) if isinstance(where, array) else self
        result = target._query_compiler.min(axis=apply_axis)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            if initial is not None:
                result = min(result.to_numpy()[0, 0], initial)
            else:
                result = result.to_numpy()[0, 0]
            return result if truthy_where else initial
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        if initial is not None and out is not None:
            out._update_inplace((numpy.ones_like(out) * initial)._query_compiler)
        intermediate = fix_dtypes_and_determine_return(
            result, new_ndim, dtype, out, truthy_where
        )
        if initial is not None:
            intermediate._update_inplace(
                (intermediate < initial).where(intermediate, initial)._query_compiler
            )
        if truthy_where or out is not None:
            return intermediate
        else:
            return numpy.ones_like(intermediate) * initial

    def __abs__(
        self,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        out_dtype = (
            dtype
            if dtype is not None
            else (out.dtype if out is not None else self.dtype)
        )
        check_kwargs(order=order, casting=casting, subok=subok, where=where)
        result = self._query_compiler.astype(
            {col_name: out_dtype for col_name in self._query_compiler.columns}
        ).abs()
        if dtype is not None:
            result = result.astype({col_name: dtype for col_name in result.columns})
        if out is not None:
            out = try_convert_from_interoperable_type(out, copy=False)
            check_can_broadcast_to_output(self, out)
            out._update_inplace(result)
            return out
        return array(_query_compiler=result, _ndim=self._ndim)

    absolute = __abs__

    def __invert__(self):
        """
        Apply bitwise inverse to each element of the `BasePandasDataset`.

        Returns
        -------
        BasePandasDataset
            New BasePandasDataset containing bitwise inverse to each value.
        """
        if not is_numeric_dtype(self.dtype):
            raise TypeError(f"bad operand type for unary ~: '{self.dtype}'")
        return array(_query_compiler=self._query_compiler.invert(), _ndim=self._ndim)

    def _preprocess_binary_op(self, other, cast_input_types=True, dtype=None, out=None):
        """
        Processes arguments and performs dtype conversions necessary to perform binary
        operations. If the arguments to the binary operation are a 1D object and a 2D object,
        then it will swap the order of the caller and callee return values in order to
        facilitate native broadcasting by modin.

        This function may modify `self._query_compiler` and `other._query_compiler` by replacing
        it with the result of `astype`.

        Parameters
        ----------
        other : array or scalar
            The RHS of the binary operation.
        cast_input_types : bool, default: True
            If specified, the columns of the caller/callee query compilers will be assigned
            dtypes in the following priority, depending on what values were specified:
            (1) the `dtype` argument,
            (2) the dtype of the `out` array,
            (3) the common parent dtype of `self` and `other`.
            If this flag is not specified, then the resulting dtype is left to be determined
            by the result of the modin operation.
        dtype : numpy type, optional
            The desired dtype of the output array.
        out : array, optional
            Existing array object to which to assign the computation's result.

        Returns
        -------
        tuple
            Returns a 4-tuple with the following elements:
            - 0: QueryCompiler object that is the LHS of the binary operation, with types converted
                 as needed.
            - 1: QueryCompiler object OR scalar that is the RHS of the binary operation, with types
                 converted as needed.
            - 2: The ndim of the result.
            - 3: kwargs to pass to the query compiler.
        """
        other = try_convert_from_interoperable_type(other)

        if cast_input_types:
            operand_dtype = (
                self.dtype
                if not isinstance(other, array)
                else pandas.core.dtypes.cast.find_common_type([self.dtype, other.dtype])
            )
            out_dtype = (
                dtype
                if dtype is not None
                else (out.dtype if out is not None else operand_dtype)
            )
            self._query_compiler = self._query_compiler.astype(
                {col_name: out_dtype for col_name in self._query_compiler.columns}
            )
        if is_scalar(other):
            # Return early, since no need to check broadcasting behavior if RHS is a scalar
            return (self._query_compiler, other, self._ndim, {})
        elif cast_input_types:
            other._query_compiler = other._query_compiler.astype(
                {col_name: out_dtype for col_name in other._query_compiler.columns}
            )

        if not isinstance(other, array):
            raise TypeError(
                f"Unsupported operand type(s): '{type(self)}' and '{type(other)}'"
            )
        broadcast = self._ndim != other._ndim
        if broadcast:
            # In this case, we have a 1D object doing a binary op with a 2D object
            caller, callee = (self, other) if self._ndim == 2 else (other, self)
            if callee.shape[0] != caller.shape[1]:
                raise ValueError(
                    f"operands could not be broadcast together with shapes {self.shape} {other.shape}"
                )
            return (
                caller._query_compiler,
                callee._query_compiler,
                caller._ndim,
                {"broadcast": broadcast, "axis": 1},
            )
        else:
            if self.shape != other.shape:
                # In this case, we either have two mismatched objects trying to do an operation
                # or a nested 1D object that must be broadcasted trying to do an operation.
                broadcast = True
                if self.shape[0] == other.shape[0]:
                    matched_dimension = 0
                elif self.shape[1] == other.shape[1]:
                    matched_dimension = 1
                    broadcast = False
                else:
                    raise ValueError(
                        f"operands could not be broadcast together with shapes {self.shape} {other.shape}"
                    )
                if (
                    self.shape[matched_dimension ^ 1] == 1
                    or other.shape[matched_dimension ^ 1] == 1
                ):
                    return (
                        self._query_compiler,
                        other._query_compiler,
                        self._ndim,
                        {"broadcast": broadcast, "axis": matched_dimension},
                    )
                else:
                    raise ValueError(
                        f"operands could not be broadcast together with shapes {self.shape} {other.shape}"
                    )
            else:
                return (
                    self._query_compiler,
                    other._query_compiler,
                    self._ndim,
                    {"broadcast": False},
                )

    def _greater(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        if is_scalar(x2):
            return array(_query_compiler=self._query_compiler.gt(x2), _ndim=self._ndim)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, cast_input_types=False, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # In this case, we are doing an operation that looks like this 1D_object > 2D_object.
            # For Modin to broadcast directly, we have to swap it so that the operation is actually
            # 2D_object < 1D_object.
            result = caller.lt(callee, **kwargs)
        else:
            result = caller.gt(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def __gt__(self, x2):
        return self._greater(x2)

    def _greater_equal(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        if is_scalar(x2):
            return array(_query_compiler=self._query_compiler.ge(x2), _ndim=self._ndim)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, cast_input_types=False, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # In this case, we are doing an operation that looks like this 1D_object >= 2D_object.
            # For Modin to broadcast directly, we have to swap it so that the operation is actually
            # 2D_object <= 1D_object.
            result = caller.le(callee, **kwargs)
        else:
            result = caller.ge(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def __ge__(self, x2):
        return self._greater_equal(x2)

    def _less(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        if is_scalar(x2):
            return array(_query_compiler=self._query_compiler.lt(x2), _ndim=self._ndim)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, cast_input_types=False, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # In this case, we are doing an operation that looks like this 1D_object < 2D_object.
            # For Modin to broadcast directly, we have to swap it so that the operation is actually
            # 2D_object < 1D_object.
            result = caller.gt(callee, **kwargs)
        else:
            result = caller.lt(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def __lt__(self, x2):
        return self._less(x2)

    def _less_equal(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        if is_scalar(x2):
            return array(_query_compiler=self._query_compiler.le(x2), _ndim=self._ndim)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, cast_input_types=False, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # In this case, we are doing an operation that looks like this 1D_object <= 2D_object.
            # For Modin to broadcast directly, we have to swap it so that the operation is actually
            # 2D_object <= 1D_object.
            result = caller.ge(callee, **kwargs)
        else:
            result = caller.le(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def __le__(self, x2):
        return self._less_equal(x2)

    def _equal(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        if is_scalar(x2):
            return array(_query_compiler=self._query_compiler.eq(x2), _ndim=self._ndim)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, cast_input_types=False, dtype=dtype, out=out
        )
        result = caller.eq(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def __eq__(self, x2):
        return self._equal(x2)

    def _not_equal(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        if is_scalar(x2):
            return array(_query_compiler=self._query_compiler.ne(x2), _ndim=self._ndim)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, cast_input_types=False, dtype=dtype, out=out
        )
        result = caller.ne(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def __ne__(self, x2):
        return self._not_equal(x2)

    def _unary_math_operator(
        self,
        opName,
        *args,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        out_dtype = (
            dtype
            if dtype is not None
            else (out.dtype if out is not None else self.dtype)
        )
        check_kwargs(order=order, casting=casting, subok=subok, where=where)
        result = self._query_compiler.astype(
            {col_name: out_dtype for col_name in self._query_compiler.columns}
        )
        result = getattr(result, opName)(*args)
        if dtype is not None:
            result = result.astype({col_name: dtype for col_name in result.columns})
        if out is not None:
            out = try_convert_from_interoperable_type(out)
            check_can_broadcast_to_output(self, out)
            out._query_compiler = result
            return out
        return array(_query_compiler=result, _ndim=self._ndim)

    def tanh(
        self,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        return self._unary_math_operator(
            "_tanh",
            out=out,
            where=where,
            casting=casting,
            order=order,
            dtype=dtype,
            subok=subok,
        )

    def exp(
        self,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        return self._unary_math_operator(
            "_exp",
            out=out,
            where=where,
            casting=casting,
            order=order,
            dtype=dtype,
            subok=subok,
        )

    def sqrt(
        self,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        return self._unary_math_operator(
            "_sqrt",
            out=out,
            where=where,
            casting=casting,
            order=order,
            dtype=dtype,
            subok=subok,
        )

    def append(self, values, axis=None):
        if not isinstance(values, array):
            if is_list_like(values):
                lengths = [len(a) if is_list_like(a) else None for a in values]
                if any(numpy.array(lengths[1:]) != lengths[0]):
                    raise ValueError(
                        "setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part."
                    )
            values = array(values)
        if axis is None:
            return self.flatten().hstack([values.flatten()])
        elif self._ndim == 1:
            if values._ndim == 1:
                return self.hstack([values])
            raise ValueError(
                f"all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has {values._ndim} dimension(s)"
            )
        if (axis ^ 1 < values._ndim) and self.shape[axis ^ 1] != values.shape[axis ^ 1]:
            raise ValueError(
                f"all the input array dimensions except for the concatenation axis must match exactly, but along dimension {axis ^ 1}, the array at index 0 has size {self.shape[axis^1]} and the array at index 1 has size {values.shape[axis^1]}"
            )
        new_qc = self._query_compiler.concat(axis, values._query_compiler)
        return array(_query_compiler=new_qc, _ndim=self._ndim)

    def hstack(self, others, dtype=None, casting="same_kind"):
        check_kwargs(casting=casting)
        new_dtype = (
            dtype
            if dtype is not None
            else pandas.core.dtypes.cast.find_common_type(
                [self.dtype] + [a.dtype for a in others]
            )
        )
        for index, i in enumerate([a._ndim for a in others]):
            if i != self._ndim:
                raise ValueError(
                    f"all the input arrays must have same number of dimensions, but the array at index 0 has {self._ndim} dimension(s) and the array at index {index} has {i} dimension(s)"
                )
        if self._ndim == 1:
            new_qc = self._query_compiler.concat(0, [o._query_compiler for o in others])
        else:
            for index, i in enumerate([a.shape[0] for a in others]):
                if i != self.shape[0]:
                    raise ValueError(
                        f"all the input array dimensions except for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size {self.shape[0]} and the array at index {index} has size {i}"
                    )
            new_qc = self._query_compiler.concat(1, [o._query_compiler for o in others])
        return array(_query_compiler=new_qc, _ndim=self._ndim, dtype=new_dtype)

    def split(self, indices, axis=0):
        if axis is not None and axis < 0:
            new_axis = axis + self._ndim
            if self._ndim == 1 and new_axis != 0:
                raise IndexError
            elif self._ndim == 2 and new_axis not in [0, 1]:
                raise IndexError
            axis = new_axis
        if self._ndim == 1:
            if axis != 0:
                raise IndexError
        if self._ndim == 2:
            if axis > 1:
                raise IndexError
        arrays = []
        if is_list_like(indices) or isinstance(indices, array):
            if not isinstance(indices, array):
                indices = array(indices)
            if indices._ndim != 1:
                raise TypeError(
                    "only integer scalar arrays can be converted to a scalar index"
                )
            prev_index = 0
            for i in range(len(indices) + 1):
                if i < len(indices):
                    end_index = indices._query_compiler.take_2d_positional(
                        [i]
                    ).to_numpy()[0, 0]
                    if end_index == 0:
                        ErrorMessage.single_warning(
                            "Defaulting to NumPy for empty arrays."
                        )
                        new_shape = list(self.shape)
                        new_shape[axis] = 0
                        arrays.append(numpy.empty(new_shape, dtype=self.dtype))
                        continue
                    if end_index < 0:
                        end_index = self.shape[axis] + end_index
                else:
                    end_index = self.shape[axis]
                if prev_index > self.shape[axis] or prev_index == end_index:
                    ErrorMessage.single_warning("Defaulting to NumPy for empty arrays.")
                    new_shape = list(self.shape)
                    new_shape[axis] = 0
                    arrays.append(numpy.empty(new_shape, dtype=self.dtype))
                else:
                    idxs = list(range(prev_index, min(end_index, self.shape[axis])))
                    if axis == 0:
                        new_qc = self._query_compiler.take_2d_positional(index=idxs)
                    else:
                        new_qc = self._query_compiler.take_2d_positional(columns=idxs)
                    arrays.append(array(_query_compiler=new_qc, _ndim=self._ndim))
                prev_index = end_index
        else:
            if self.shape[axis] % indices != 0:
                raise ValueError("array split does not result in an equal division")
            for i in range(0, self.shape[axis], self.shape[axis] // indices):
                if axis == 0:
                    new_qc = self._query_compiler.take_2d_positional(
                        index=list(range(i, i + self.shape[axis] // indices))
                    )
                else:
                    new_qc = self._query_compiler.take_2d_positional(
                        columns=list(range(i, i + self.shape[axis] // indices))
                    )
                arrays.append(array(_query_compiler=new_qc, _ndim=self._ndim))
        return arrays

    def _compute_masked_variance(self, mask, output_dtype, axis, ddof):
        if axis == 0 and self._ndim != 1:
            # Our broadcasting is wrong, so we can't do the final subtraction at the end.
            raise NotImplementedError(
                "Masked variance on 2D arrays along axis = 0 is currently unsupported."
            )
        axis_mean = self.mean(axis, output_dtype, keepdims=True, where=mask)
        target = mask.where(self, numpy.nan)
        if self._ndim == 1:
            axis_mean = axis_mean._to_numpy()[0]
            target = target._query_compiler.sub(axis_mean).pow(2).sum(axis=axis)
        else:
            target = (target - axis_mean)._query_compiler.pow(2).sum(axis=axis)
        num_elems = (
            mask.where(self, 0)._query_compiler.notna().sum(axis=axis, skipna=False)
        )
        num_elems = num_elems.sub(ddof)
        target = target.truediv(num_elems)
        na_propagation_mask = mask.where(self, 0)._query_compiler.sum(
            axis=axis, skipna=False
        )
        target = target.where(na_propagation_mask.notna(), numpy.nan)
        return target

    def var(
        self, axis=None, dtype=None, out=None, ddof=0, keepdims=None, *, where=True
    ):
        out_dtype = (
            dtype
            if dtype is not None
            else (out.dtype if out is not None else self.dtype)
        )
        out_type = getattr(out_dtype, "type", out_dtype)
        if isinstance(where, array) and issubclass(out_type, numpy.integer):
            out_dtype = numpy.float64
        apply_axis = self._validate_axis(axis)
        check_kwargs(keepdims=keepdims, where=where)
        truthy_where = bool(where)
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            if isinstance(where, array):
                result = self._compute_masked_variance(where, out_dtype, 0, ddof)
            else:
                result = self._query_compiler.var(axis=0, skipna=False, ddof=ddof)
            if keepdims:
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if out is not None:
                    out._query_compiler = (
                        numpy.ones_like(out) * numpy.nan
                    )._query_compiler
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        result, 1, dtype, out, truthy_where
                    )
                else:
                    return array([numpy.nan], dtype=out_dtype)
        if apply_axis is None:
            # If any of the (non-masked) elements of our array are `NaN`, we know that the
            # result of `mean` must be `NaN`. This is a fastpath to see if any unmasked elements
            # are `NaN`.
            contains_na_check = (
                where.where(self, 0) if isinstance(where, array) else self
            )
            if (
                contains_na_check._query_compiler.isna()
                .any(axis=1)
                .any(axis=0)
                .to_numpy()[0, 0]
            ):
                return numpy.nan
            result = where.where(self, numpy.nan) if isinstance(where, array) else self
            # Since our current QueryCompiler does not have a variance that reduces 2D objects to
            # a single value, we need to calculate the variance ourselves. First though, we need
            # to figure out how many objects that we are taking the variance over (since any
            # entries in our array that are `numpy.nan` must be ignored when taking the variance,
            # and so cannot be included in the final division (of the sum over num total elements))
            num_na_elements = (
                result._query_compiler.isna().sum(axis=1).sum(axis=0).to_numpy()[0, 0]
            )
            num_total_elements = prod(self.shape) - num_na_elements
            mean = (
                numpy.array(
                    [result._query_compiler.sum(axis=1).sum(axis=0).to_numpy()[0, 0]],
                    dtype=out_dtype,
                )
                / num_total_elements
            )[0]
            result = (
                numpy.array(
                    [
                        result._query_compiler.sub(mean)
                        .pow(2)
                        .sum(axis=1)
                        .sum(axis=0)
                        .to_numpy()[0, 0]
                    ],
                    dtype=out_dtype,
                )
                / (num_total_elements - ddof)
            )[0]
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if out is not None:
                    out._query_compiler = (
                        numpy.ones_like(out) * numpy.nan
                    )._query_compiler
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        array(numpy.array([[result]]))
                        .astype(out_dtype)
                        ._query_compiler,
                        2,
                        dtype,
                        out,
                        truthy_where,
                    )
                else:
                    return array([[numpy.nan]], dtype=out_dtype)
            return result if truthy_where else numpy.nan
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        if isinstance(where, array):
            result = self._compute_masked_variance(where, out_dtype, apply_axis, ddof)
        else:
            result = self._query_compiler.astype(
                {col_name: out_dtype for col_name in self._query_compiler.columns}
            ).var(axis=apply_axis, skipna=False, ddof=ddof)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            return result.to_numpy()[0, 0] if truthy_where else numpy.nan
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        if out is not None:
            out._query_compiler = (numpy.ones_like(out) * numpy.nan)._query_compiler
        if truthy_where or out is not None:
            return fix_dtypes_and_determine_return(
                result, new_ndim, dtype, out, truthy_where
            )
        else:
            return (
                numpy.ones(array(_query_compiler=result, _ndim=new_ndim).shape)
            ) * numpy.nan

    def _compute_masked_mean(self, mask, output_dtype, axis):
        # By default, pandas ignores NaN values when doing computations.
        # NumPy; however, propagates the value by default. We use pandas
        # default behaviour in order to mask values (by replacing them)
        # with NaN when initially computing the mean, but we need to propagate
        # NaN values that were not masked to the final output, so we do a
        # sum along the same axis (where masked values are 0) to see where
        # NumPy would propagate NaN, and swap out those values in our result
        # with NaN.
        target = mask.where(self, numpy.nan)._query_compiler
        target = target.astype(
            {col_name: output_dtype for col_name in target.columns}
        ).mean(axis=axis)
        na_propagation_mask = mask.where(self, 0)._query_compiler
        na_propagation_mask = na_propagation_mask.sum(axis=axis, skipna=False)
        target = target.where(na_propagation_mask.notna(), numpy.nan)
        return target

    def mean(self, axis=None, dtype=None, out=None, keepdims=None, *, where=True):
        out_dtype = (
            dtype
            if dtype is not None
            else (out.dtype if out is not None else self.dtype)
        )
        out_type = getattr(out_dtype, "type", out_dtype)
        if isinstance(where, array) and issubclass(out_type, numpy.integer):
            out_dtype = numpy.float64
        apply_axis = self._validate_axis(axis)
        check_kwargs(keepdims=keepdims, where=where)
        truthy_where = bool(where)
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            if isinstance(where, array):
                result = self._compute_masked_mean(where, out_dtype, 0)
            else:
                result = self._query_compiler.astype(
                    {col_name: out_dtype for col_name in self._query_compiler.columns}
                ).mean(axis=0, skipna=False)
            if keepdims:
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if out is not None:
                    out._update_inplace(
                        (numpy.ones_like(out) * numpy.nan)._query_compiler
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        result, 1, dtype, out, truthy_where
                    )
                else:
                    return array([numpy.nan], dtype=out_dtype)
            # This is just to see if `where` is a truthy value. If `where` is an array,
            # we would have already masked the input before computing `result`, so here
            # we just want to ensure that `where=False` was not passed in, and if it was
            # we return `numpy.nan`, since that is what NumPy would do.
            return result.to_numpy()[0, 0] if where else numpy.nan
        if apply_axis is None:
            # If any of the (non-masked) elements of our array are `NaN`, we know that the
            # result of `mean` must be `NaN`. This is a fastpath to see if any unmasked elements
            # are `NaN`.
            contains_na_check = (
                where.where(self, 0) if isinstance(where, array) else self
            )
            if (
                contains_na_check._query_compiler.isna()
                .any(axis=1)
                .any(axis=0)
                .to_numpy()[0, 0]
            ):
                return numpy.nan
            result = where.where(self, numpy.nan) if isinstance(where, array) else self
            # Since our current QueryCompiler does not have a mean that reduces 2D objects to
            # a single value, we need to calculate the mean ourselves. First though, we need
            # to figure out how many objects that we are taking the mean over (since any
            # entries in our array that are `numpy.nan` must be ignored when taking the mean,
            # and so cannot be included in the final division (of the sum over num total elements))
            num_na_elements = (
                result._query_compiler.isna().sum(axis=1).sum(axis=0).to_numpy()[0, 0]
            )
            num_total_elements = prod(self.shape) - num_na_elements
            result = (
                numpy.array(
                    [result._query_compiler.sum(axis=1).sum(axis=0).to_numpy()[0, 0]],
                    dtype=out_dtype,
                )
                / num_total_elements
            )[0]
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if out is not None:
                    out._update_inplace(
                        (numpy.ones_like(out) * numpy.nan)._query_compiler
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        array(numpy.array([[result]]))
                        .astype(out_dtype)
                        ._query_compiler,
                        2,
                        dtype,
                        out,
                        truthy_where,
                    )
                else:
                    return array([[numpy.nan]], dtype=out_dtype)
            return result if truthy_where else numpy.nan
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        if isinstance(where, array):
            result = self._compute_masked_mean(where, out_dtype, apply_axis)
        else:
            result = self._query_compiler.astype(
                {col_name: out_dtype for col_name in self._query_compiler.columns}
            ).mean(axis=apply_axis, skipna=False)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            return result.to_numpy()[0, 0] if truthy_where else numpy.nan
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        if out is not None:
            out._update_inplace((numpy.ones_like(out) * numpy.nan)._query_compiler)
        if truthy_where or out is not None:
            return fix_dtypes_and_determine_return(
                result, new_ndim, dtype, out, truthy_where
            )
        else:
            return (
                numpy.ones(array(_query_compiler=result, _ndim=new_ndim).shape)
            ) * numpy.nan

    def __add__(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        result = caller.add(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def __radd__(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        return self.__add__(x2, out, where, casting, order, dtype, subok)

    def divide(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # In this case, we are doing an operation that looks like this 1D_object/2D_object.
            # For Modin to broadcast directly, we have to swap it so that the operation is actually
            # 2D_object.rtruediv(1D_object).
            result = caller.rtruediv(callee, **kwargs)
        else:
            result = caller.truediv(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    __truediv__ = divide

    def __rtruediv__(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            result = caller.truediv(callee, **kwargs)
        else:
            result = caller.rtruediv(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def floor_divide(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        operand_dtype = (
            self.dtype
            if not isinstance(x2, array)
            else pandas.core.dtypes.cast.find_common_type([self.dtype, x2.dtype])
        )
        out_dtype = (
            dtype
            if dtype is not None
            else (out.dtype if out is not None else operand_dtype)
        )
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        if is_scalar(x2):
            result = self._query_compiler.floordiv(x2)
            if x2 == 0 and numpy.issubdtype(out_dtype, numpy.integer):
                # NumPy's floor_divide by 0 works differently from pandas', so we need to fix
                # the output.
                result = (
                    result.replace(numpy.inf, 0)
                    .replace(-numpy.inf, 0)
                    .where(self._query_compiler.ne(0), 0)
                )
            return fix_dtypes_and_determine_return(
                result, self._ndim, dtype, out, where
            )
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # Modin does not correctly support broadcasting when the caller of the function is
            # a Series (1D), and the operand is a Dataframe (2D). We cannot workaround this using
            # commutativity, and `rfloordiv` also works incorrectly. GH#5529
            raise NotImplementedError(
                "Using floor_divide with broadcast is not currently available in Modin."
            )
        result = caller.floordiv(callee, **kwargs)
        if callee.eq(0).any() and numpy.issubdtype(out_dtype, numpy.integer):
            # NumPy's floor_divide by 0 works differently from pandas', so we need to fix
            # the output.
            result = (
                result.replace(numpy.inf, 0)
                .replace(-numpy.inf, 0)
                .where(callee.ne(0), 0)
            )
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    __floordiv__ = floor_divide

    def power(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # Modin does not correctly support broadcasting when the caller of the function is
            # a Series (1D), and the operand is a Dataframe (2D). We cannot workaround this using
            # commutativity, and `rpow` also works incorrectly. GH#5529
            raise NotImplementedError(
                "Using power with broadcast is not currently available in Modin."
            )
        result = caller.pow(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    __pow__ = power

    def prod(
        self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=True
    ):
        out_dtype = (
            dtype
            if dtype is not None
            else (out.dtype if out is not None else self.dtype)
        )
        initial = 1 if initial is None else initial
        check_kwargs(keepdims=keepdims, where=where)
        apply_axis = self._validate_axis(axis)
        truthy_where = bool(where)
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            target = where.where(self, 1) if isinstance(where, array) else self
            result = target._query_compiler.astype(
                {col_name: out_dtype for col_name in target._query_compiler.columns}
            ).prod(axis=0, skipna=False)
            result = result.mul(initial)
            if keepdims:
                if out is not None:
                    out._update_inplace(
                        (numpy.ones_like(out) * initial)
                        .astype(out_dtype)
                        ._query_compiler
                    )
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        result, 1, dtype, out, truthy_where
                    )
                else:
                    return array([initial], dtype=out_dtype)
            return result.to_numpy()[0, 0] if truthy_where else initial
        if apply_axis is None:
            result = self
            if isinstance(where, array):
                result = where.where(self, 1)
            result = (
                result.astype(out_dtype)
                ._query_compiler.prod(axis=1, skipna=False)
                .prod(axis=0, skipna=False)
                .to_numpy()[0, 0]
            )
            result *= initial
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if out is not None:
                    out._update_inplace(
                        (numpy.ones_like(out) * initial)
                        .astype(out_dtype)
                        ._query_compiler
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        array(numpy.array([[result]]))
                        .astype(out_dtype)
                        ._query_compiler,
                        2,
                        dtype,
                        out,
                        truthy_where,
                    )
                else:
                    return array([[initial]], dtype=out_dtype)
            return result if truthy_where else initial
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        target = where.where(self, 1) if isinstance(where, array) else self
        result = target._query_compiler.astype(
            {col_name: out_dtype for col_name in target._query_compiler.columns}
        ).prod(axis=apply_axis, skipna=False)
        result = result.mul(initial)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            return result.to_numpy()[0, 0] if truthy_where else initial
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        if initial is not None and out is not None:
            out._update_inplace(
                (numpy.ones_like(out) * initial).astype(out_dtype)._query_compiler
            )
        if truthy_where or out is not None:
            return fix_dtypes_and_determine_return(
                result, new_ndim, dtype, out, truthy_where
            )
        else:
            return (
                numpy.ones_like(array(_query_compiler=result, _ndim=new_ndim)) * initial
            )

    def multiply(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        result = caller.mul(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    __mul__ = multiply

    def __rmul__(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        return self.multiply(x2, out, where, casting, order, dtype, subok)

    def dot(self, other, out=None):
        other = try_convert_from_interoperable_type(other)
        if numpy.isscalar(other):
            # other is scalar -- result is an array
            result = self._query_compiler.mul(other)
            result_ndim = self._ndim
        elif not isinstance(other, array):
            raise TypeError(
                f"Unsupported operand type(s): '{type(self)}' and '{type(other)}'"
            )
        elif self._ndim == 1 and other._ndim == 1:
            # both 1D arrays -- result is a scalar
            result = self._query_compiler.dot(
                other._query_compiler, squeeze_self=True, squeeze_other=True
            )
            return result.to_numpy()[0, 0]
        elif self._ndim == 2 and other._ndim == 2:
            # both 2D arrays -- result is a 2D array
            result = self._query_compiler.dot(other._query_compiler)
            result_ndim = 2
        elif self._ndim == 1 and other._ndim == 2:
            result = self._query_compiler.dot(other._query_compiler, squeeze_self=True)
            result_ndim = 1
        elif self._ndim == 2 and other._ndim == 1:
            result = self._query_compiler.dot(other._query_compiler)
            result_ndim = 1
        return fix_dtypes_and_determine_return(
            result,
            result_ndim,
            out=out,
        )

    def __matmul__(self, other):
        if numpy.isscalar(other):
            # numpy's original error message is something cryptic about a gufunc signature
            raise ValueError(
                "cannot call matmul with a scalar argument (use np.dot instead)"
            )
        return self.dot(other)

    def _norm(self, ord=None, axis=None, keepdims=False):
        check_kwargs(keepdims=keepdims)
        if ord is not None and ord not in ("fro",):  # , numpy.inf, -numpy.inf, 0):
            raise NotImplementedError("unsupported ord argument for norm:", ord)
        if isinstance(axis, int) and axis < 0:
            apply_axis = self._ndim + axis
        else:
            apply_axis = axis or 0
        if apply_axis >= self._ndim or apply_axis < 0:
            raise numpy.AxisError(axis, self._ndim)
        result = self._query_compiler.pow(2)
        if self._ndim == 2:
            result = result.sum(axis=apply_axis)
            if axis is None:
                result = result.sum(axis=apply_axis ^ 1)
        else:
            result = result.sum(axis=0)
        if axis is None:
            # Return a scalar
            return result._sqrt().to_numpy()[0, 0]
        else:
            result = result._sqrt()
            # the DF may be transposed after processing through pandas
            # check query compiler shape to ensure this is a row vector (1xN) not column (Nx1)
            if len(result.index) != 1:
                result = result.transpose()
            return array(_query_compiler=result, _ndim=1)

    def remainder(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        operand_dtype = (
            self.dtype
            if not isinstance(x2, array)
            else pandas.core.dtypes.cast.find_common_type([self.dtype, x2.dtype])
        )
        out_dtype = (
            dtype
            if dtype is not None
            else (out.dtype if out is not None else operand_dtype)
        )
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        if is_scalar(x2):
            result = self._query_compiler.astype(
                {col_name: out_dtype for col_name in self._query_compiler.columns}
            ).mod(x2)
            if x2 == 0 and numpy.issubdtype(out_dtype, numpy.integer):
                # NumPy's remainder by 0 works differently from pandas', so we need to fix
                # the output.
                result = result.replace(numpy.nan, 0)
            return fix_dtypes_and_determine_return(
                result, self._ndim, dtype, out, where
            )
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # Modin does not correctly support broadcasting when the caller of the function is
            # a Series (1D), and the operand is a Dataframe (2D). We cannot workaround this using
            # commutativity, and `rmod` also works incorrectly. GH#5529
            raise NotImplementedError(
                "Using remainder with broadcast is not currently available in Modin."
            )
        result = caller.mod(callee, **kwargs)
        if callee.eq(0).any() and numpy.issubdtype(out_dtype, numpy.integer):
            # NumPy's floor_divide by 0 works differently from pandas', so we need to fix
            # the output.
            result = result.replace(numpy.nan, 0)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    __mod__ = remainder

    def subtract(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # In this case, we are doing an operation that looks like this 1D_object - 2D_object.
            # For Modin to broadcast directly, we have to swap it so that the operation is actually
            # 2D_object.rsub(1D_object).
            result = caller.rsub(callee, **kwargs)
        else:
            result = caller.sub(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    __sub__ = subtract

    def __rsub__(
        self,
        x2,
        out=None,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(order=order, subok=subok, casting=casting, where=where)
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, dtype=dtype, out=out
        )
        if caller != self._query_compiler:
            # In this case, we are doing an operation that looks like this 1D_object - 2D_object.
            # For Modin to broadcast directly, we have to swap it so that the operation is actually
            # 2D_object.sub(1D_object).
            result = caller.sub(callee, **kwargs)
        else:
            result = caller.rsub(callee, **kwargs)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def sum(
        self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=True
    ):
        out_dtype = (
            dtype
            if dtype is not None
            else (out.dtype if out is not None else self.dtype)
        )
        initial = 0 if initial is None else initial
        check_kwargs(keepdims=keepdims, where=where)
        apply_axis = self._validate_axis(axis)
        truthy_where = bool(where)
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            target = where.where(self, 0) if isinstance(where, array) else self
            if target.dtype != out_dtype:
                target = target.astype(out_dtype)
            result = target._query_compiler.sum(axis=0, skipna=False)
            if initial != 0:
                result = result.add(initial)
            if keepdims:
                if out is not None:
                    out._update_inplace(
                        (
                            numpy.ones_like(out, dtype=out_dtype) * initial
                        )._query_compiler
                    )
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        result, 1, dtype, out, truthy_where
                    )
                else:
                    return array([initial], dtype=out_dtype)
            return result.to_numpy()[0, 0] if truthy_where else initial
        if apply_axis is None:
            target = where.where(self, 0) if isinstance(where, array) else self
            if target.dtype != out_dtype:
                target = target.astype(out_dtype)
            result = (
                target._query_compiler.sum(axis=1, skipna=False)
                .sum(axis=0, skipna=False)
                .to_numpy()[0, 0]
            )
            if initial != 0:
                result += initial
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if out is not None:
                    out._update_inplace(
                        (
                            numpy.ones_like(out, dtype=out_dtype) * initial
                        )._query_compiler
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        array(numpy.array([[result]], dtype=out_dtype))._query_compiler,
                        2,
                        dtype,
                        out,
                        truthy_where,
                    )
                else:
                    return array([[initial]], dtype=out_dtype)
            return result if truthy_where else initial
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        target = where.where(self, 0) if isinstance(where, array) else self
        if target.dtype != out_dtype:
            target = target.astype(out_dtype)
        result = target._query_compiler.sum(axis=apply_axis, skipna=False)
        if initial != 0:
            result = result.add(initial)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            return result.to_numpy()[0, 0] if truthy_where else initial
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        if out is not None:
            out._update_inplace(
                (numpy.ones_like(out, dtype=out_dtype) * initial)._query_compiler
            )
        if truthy_where or out is not None:
            return fix_dtypes_and_determine_return(
                result, new_ndim, dtype, out, truthy_where
            )
        else:
            return (
                numpy.zeros_like(array(_query_compiler=result, _ndim=new_ndim))
                + initial
            )

    def all(self, axis=None, out=None, keepdims=None, *, where=True):
        check_kwargs(keepdims=keepdims, where=where)
        truthy_where = bool(where)
        apply_axis = self._validate_axis(axis)
        target = where.where(self, True) if isinstance(where, array) else self
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            result = target._query_compiler.all(axis=0)
            if keepdims:
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        result, 1, bool, out, truthy_where
                    )
                else:
                    return array([True], dtype=bool)
            return result.to_numpy()[0, 0] if truthy_where else True
        if apply_axis is None:
            result = target._query_compiler.all(axis=1).all(axis=0)
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        array(numpy.array([[result]], dtype=bool))._query_compiler,
                        2,
                        bool,
                        out,
                        truthy_where,
                    )
                else:
                    return array([[True]], dtype=bool)
            return result.to_numpy()[0, 0] if truthy_where else True
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        result = target._query_compiler.all(axis=apply_axis)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            result = result.to_numpy()[0, 0]
            return result if truthy_where else True
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        if truthy_where or out is not None:
            return fix_dtypes_and_determine_return(
                result, new_ndim, bool, out, truthy_where
            )
        else:
            return numpy.ones_like(array(_query_compiler=result, _ndim=new_ndim))

    _all = all

    def any(self, axis=None, out=None, keepdims=None, *, where=True):
        check_kwargs(keepdims=keepdims, where=where)
        truthy_where = bool(where)
        apply_axis = self._validate_axis(axis)
        target = where.where(self, False) if isinstance(where, array) else self
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            result = target._query_compiler.any(axis=0)
            if keepdims:
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        result, 1, bool, out, truthy_where
                    )
                else:
                    return array([False], dtype=bool)
            return result.to_numpy()[0, 0] if truthy_where else False
        if apply_axis is None:
            result = target._query_compiler.any(axis=1).any(axis=0)
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                if truthy_where or out is not None:
                    return fix_dtypes_and_determine_return(
                        array(numpy.array([[result]], dtype=bool))._query_compiler,
                        2,
                        bool,
                        out,
                        truthy_where,
                    )
                else:
                    return array([[False]], dtype=bool)
            return result.to_numpy()[0, 0] if truthy_where else False
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        result = target._query_compiler.any(axis=apply_axis)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            result = result.to_numpy()[0, 0]
            return result if truthy_where else False
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        if truthy_where or out is not None:
            return fix_dtypes_and_determine_return(
                result, new_ndim, bool, out, truthy_where
            )
        else:
            return numpy.zeros_like(array(_query_compiler=result, _ndim=new_ndim))

    _any = any

    def argmax(self, axis=None, out=None, keepdims=None):
        check_kwargs(keepdims=keepdims)
        apply_axis = self._validate_axis(axis)
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            if self._query_compiler.isna().any(axis=1).any(axis=0).to_numpy()[0, 0]:
                na_row_map = self._query_compiler.isna().any(axis=1)
                result = na_row_map.idxmax()
            else:
                result = self._query_compiler.idxmax(axis=0)
            if keepdims:
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                return fix_dtypes_and_determine_return(
                    result, 1, numpy.int64, out, True
                )
            return result.to_numpy()[0, 0]
        if apply_axis is None:
            if self._query_compiler.isna().any(axis=1).any(axis=0).to_numpy()[0, 0]:
                na_row_map = self._query_compiler.isna().any(axis=1)
                na_row = self._query_compiler.getitem_array(na_row_map)
                col_idx = na_row.to_numpy().argmax()
                final_idxmax = na_row_map.idxmax().to_numpy().flatten()
            else:
                inner_idxs = self._query_compiler.idxmax(axis=1)
                final_idxmax = (
                    self._query_compiler.max(axis=1).idxmax(axis=0).to_numpy().flatten()
                )
                col_idx = inner_idxs.take_2d_positional(final_idxmax, [0]).to_numpy()[
                    0, 0
                ]
            result = (self.shape[1] * final_idxmax[0]) + col_idx
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                return fix_dtypes_and_determine_return(
                    array(numpy.array([[result]], dtype=bool))._query_compiler,
                    2,
                    numpy.int64,
                    out,
                    True,
                )
            return result
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        result = self._query_compiler.idxmax(axis=apply_axis)
        na_mask = self._query_compiler.isna().any(axis=apply_axis)
        if na_mask.any(axis=apply_axis ^ 1).to_numpy()[0, 0]:
            na_idxs = self._query_compiler.isna().idxmax(axis=apply_axis)
            result = na_idxs.where(na_mask, result)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            result = result.to_numpy()[0, 0]
            return result
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        return fix_dtypes_and_determine_return(result, new_ndim, numpy.int64, out, True)

    def argmin(self, axis=None, out=None, keepdims=None):
        check_kwargs(keepdims=keepdims)
        apply_axis = self._validate_axis(axis)
        if self._ndim == 1:
            if apply_axis == 1:
                raise numpy.AxisError(1, 1)
            if self._query_compiler.isna().any(axis=1).any(axis=0).to_numpy()[0, 0]:
                na_row_map = self._query_compiler.isna().any(axis=1)
                # numpy apparently considers nan to be the minimum value in an array if present
                # therefore, we use idxmax on the mask array to identify where nans are
                result = na_row_map.idxmax()
            else:
                result = self._query_compiler.idxmin(axis=0)
            if keepdims:
                if out is not None and out.shape != (1,):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                return fix_dtypes_and_determine_return(
                    result, 1, numpy.int64, out, True
                )
            return result.to_numpy()[0, 0]
        if apply_axis is None:
            if self._query_compiler.isna().any(axis=1).any(axis=0).to_numpy()[0, 0]:
                na_row_map = self._query_compiler.isna().any(axis=1)
                na_row = self._query_compiler.getitem_array(na_row_map)
                col_idx = na_row.to_numpy().argmax()
                final_idxmax = na_row_map.idxmax().to_numpy().flatten()
            else:
                inner_idxs = self._query_compiler.idxmin(axis=1)
                final_idxmax = (
                    self._query_compiler.min(axis=1).idxmin(axis=0).to_numpy().flatten()
                )
                col_idx = inner_idxs.take_2d_positional(final_idxmax, [0]).to_numpy()[
                    0, 0
                ]
            result = (self.shape[1] * final_idxmax[0]) + col_idx
            if keepdims:
                if out is not None and out.shape != (1, 1):
                    raise ValueError(
                        f"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)"
                    )
                return fix_dtypes_and_determine_return(
                    array(numpy.array([[result]], dtype=bool))._query_compiler,
                    2,
                    numpy.int64,
                    out,
                    True,
                )
            return result
        if apply_axis > 1:
            raise numpy.AxisError(axis, 2)
        result = self._query_compiler.idxmin(axis=apply_axis)
        na_mask = self._query_compiler.isna().any(axis=apply_axis)
        if na_mask.any(axis=apply_axis ^ 1).to_numpy()[0, 0]:
            na_idxs = self._query_compiler.isna().idxmax(axis=apply_axis)
            result = na_idxs.where(na_mask, result)
        new_ndim = self._ndim - 1 if not keepdims else self._ndim
        if new_ndim == 0:
            result = result.to_numpy()[0, 0]
            return result
        if not keepdims and apply_axis != 1:
            result = result.transpose()
        return fix_dtypes_and_determine_return(result, new_ndim, numpy.int64, out, True)

    def _isfinite(
        self,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        result = self._query_compiler._isfinite()
        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)

    def _isinf(
        self,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        result = self._query_compiler._isinf()
        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)

    def _isnan(
        self,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        result = self._query_compiler.isna()
        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)

    def _isnat(
        self,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        result = self._query_compiler._isnat()
        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)

    def _isneginf(self, out=None):
        result = self._query_compiler._isneginf()
        return fix_dtypes_and_determine_return(result, self._ndim, out=out)

    def _isposinf(self, out=None):
        result = self._query_compiler._isposinf()
        return fix_dtypes_and_determine_return(result, self._ndim, out=out)

    def _iscomplex(self):
        result = self._query_compiler._iscomplex()
        return fix_dtypes_and_determine_return(result, self._ndim)

    def _isreal(self):
        result = self._query_compiler._isreal()
        return fix_dtypes_and_determine_return(result, self._ndim)

    def _logical_not(
        self,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        result = self._query_compiler._logical_not()
        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)

    def _logical_binop(
        self, qc_method_name, x2, out, where, casting, order, dtype, subok
    ):
        check_kwargs(where=where, casting=casting, order=order, subok=subok)
        if self._ndim != x2._ndim:
            raise ValueError(
                "modin.numpy logic operators do not currently support broadcasting between arrays of different dimensions"
            )
        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(
            x2, cast_input_types=False, dtype=dtype, out=out
        )
        # Deliberately do not pass **kwargs, since they're not used
        result = getattr(caller, qc_method_name)(callee)
        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

    def _logical_and(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        return self._logical_binop(
            "_logical_and", x2, out, where, casting, order, dtype, subok
        )

    def _logical_or(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        return self._logical_binop(
            "_logical_or", x2, out, where, casting, order, dtype, subok
        )

    def _logical_xor(
        self,
        x2,
        /,
        out=None,
        *,
        where=True,
        casting="same_kind",
        order="K",
        dtype=None,
        subok=True,
    ):
        return self._logical_binop(
            "_logical_xor", x2, out, where, casting, order, dtype, subok
        )

    def flatten(self, order="C"):
        check_kwargs(order=order)
        qcs = [
            self._query_compiler.getitem_row_array([index_val]).reset_index(drop=True)
            for index_val in self._query_compiler.index[1:]
        ]
        new_query_compiler = (
            self._query_compiler.getitem_row_array([self._query_compiler.index[0]])
            .reset_index(drop=True)
            .concat(1, qcs, ignore_index=True)
        )
        new_query_compiler.columns = range(len(new_query_compiler.columns))
        new_query_compiler = new_query_compiler.transpose()
        new_ndim = 1
        return array(_query_compiler=new_query_compiler, _ndim=new_ndim)

    def _get_shape(self):
        if self._ndim == 1:
            return (len(self._query_compiler.index),)
        return (len(self._query_compiler.index), len(self._query_compiler.columns))

    def _set_shape(self, new_shape):
        if not (isinstance(new_shape, int)) and not isinstance(new_shape, tuple):
            raise TypeError(
                f"expected a sequence of integers or a single integer, got '{new_shape}'"
            )
        elif isinstance(new_shape, tuple):
            for dim in new_shape:
                if not isinstance(dim, int):
                    raise TypeError(
                        f"'{type(dim)}' object cannot be interpreted as an integer"
                    )

        new_dimensions = new_shape if isinstance(new_shape, int) else prod(new_shape)
        if new_dimensions != prod(self._get_shape()):
            raise ValueError(
                f"cannot reshape array of size {prod(self._get_shape())} into {new_shape if isinstance(new_shape, tuple) else (new_shape,)}"
            )
        if isinstance(new_shape, int) or len(new_shape) == 1:
            self._update_inplace(self.flatten()._query_compiler)
            self._ndim = 1
        else:
            raise NotImplementedError(
                "Modin numpy does not currently support reshaping to a 2D object"
            )

    shape = property(_get_shape, _set_shape)

    def transpose(self):
        if self._ndim == 1:
            return self
        return array(_query_compiler=self._query_compiler.transpose(), _ndim=self._ndim)

    T = property(transpose)

    @property
    def dtype(self):
        dtype = self._query_compiler.dtypes
        if self._ndim == 1:
            return dtype[0]
        else:
            return pandas.core.dtypes.cast.find_common_type(list(dtype.values))

    @property
    def size(self):
        return prod(self.shape)

    def __len__(self):
        return self.shape[0]

    def astype(self, dtype, order="K", casting="unsafe", subok=True, copy=True):
        if casting != "unsafe":
            raise ValueError(
                "Modin does not support `astype` with `casting != unsafe`."
            )
        check_kwargs(order=order, subok=subok)
        result = self._query_compiler.astype(
            {col_name: dtype for col_name in self._query_compiler.columns}
        )
        if not copy and subok and numpy.issubdtype(self.dtype, dtype):
            return self
        return array(_query_compiler=result, _ndim=self._ndim)

    def _build_repr_array(self):
        def _generate_indices_for_axis(
            axis_size, num_elements=numpy.get_printoptions()["edgeitems"]
        ):
            if axis_size > num_elements * 2:
                return list(range(num_elements + 1)) + list(
                    range(axis_size - num_elements, axis_size)
                )
            return list(range(axis_size))

        # We want to rely on NumPy for creating a string representation of this array; however
        # we also don't want to materialize all of the data to the head node. Instead, we will
        # materialize enough data that NumPy can build the summarized representation of the array
        # (while changing with the NumPy print options so it will format this smaller array as
        # abridged) and return this smaller array. In the worst case, this array will have
        # (2*numpy.get_printoptions()["edgeitems"] + 1)^2 items, so 49 items max for the default
        # value of 3.
        if self._ndim == 1 or self.shape[1] == 0:
            idxs = _generate_indices_for_axis(len(self))
            arr = self._query_compiler.getitem_row_array(idxs).to_numpy()
            if self._ndim == 1:
                arr = arr.flatten()
        elif self.shape[0] == 1:
            idxs = _generate_indices_for_axis(self.shape[1])
            arr = self._query_compiler.getitem_column_array(idxs).to_numpy()
        else:
            row_idxs = _generate_indices_for_axis(len(self))
            col_idxs = _generate_indices_for_axis(self.shape[1])
            arr = self._query_compiler.take_2d_positional(row_idxs, col_idxs).to_numpy()
        return arr

    def __repr__(self):
        # If we are dealing with a small array, we can just collate all the data on the
        # head node and let numpy handle the logic to get a string representation.
        if self.size <= numpy.get_printoptions()["threshold"]:
            return repr(self._to_numpy())
        arr = self._build_repr_array()
        prev_threshold = numpy.get_printoptions()["threshold"]
        numpy.set_printoptions(threshold=arr.size - 1)
        try:
            repr_str = repr(arr)
        finally:
            numpy.set_printoptions(threshold=prev_threshold)
        return repr_str

    def _to_numpy(self):
        arr = self._query_compiler.to_numpy()
        if self._ndim == 1:
            arr = arr.flatten()
        return arr


================================================
FILE: modin/numpy/array_creation.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses array creation methods for Modin's NumPy API."""

import numpy

from modin.error_message import ErrorMessage

from .arr import array


def _create_array(dtype, shape, order, subok, numpy_method):
    if order not in ["K", "C"]:
        ErrorMessage.single_warning(
            "Array order besides 'C' is not currently supported in Modin. Defaulting to 'C' order."
        )
    if not subok:
        ErrorMessage.single_warning(
            "Subclassing types is not currently supported in Modin. Defaulting to the same base dtype."
        )
    ErrorMessage.single_warning(f"np.{numpy_method}_like defaulting to NumPy.")
    return array(getattr(numpy, numpy_method)(shape, dtype=dtype))


def zeros_like(a, dtype=None, order="K", subok=True, shape=None):
    if not isinstance(a, array):
        ErrorMessage.bad_type_for_numpy_op("zeros_like", type(a))
        return numpy.zeros_like(a, dtype=dtype, order=order, subok=subok, shape=shape)
    dtype = a.dtype if dtype is None else dtype
    shape = a.shape if shape is None else shape
    return _create_array(dtype, shape, order, subok, "zeros")


def ones_like(a, dtype=None, order="K", subok=True, shape=None):
    if not isinstance(a, array):
        ErrorMessage.bad_type_for_numpy_op("ones_like", type(a))
        return numpy.ones_like(a, dtype=dtype, order=order, subok=subok, shape=shape)
    dtype = a.dtype if dtype is None else dtype
    shape = a.shape if shape is None else shape
    return _create_array(dtype, shape, order, subok, "ones")


def tri(N, M=None, k=0, dtype=float, like=None):
    if like is not None:
        ErrorMessage.single_warning(
            "Modin NumPy does not support the `like` argument for np.tri. Defaulting to `like=None`."
        )
    ErrorMessage.single_warning("np.tri defaulting to NumPy.")
    return array(numpy.tri(N, M=M, k=k, dtype=dtype))


================================================
FILE: modin/numpy/array_shaping.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses array shaping methods for Modin's NumPy API."""

import numpy

from modin.error_message import ErrorMessage

from .arr import array
from .utils import try_convert_from_interoperable_type


def ravel(a, order="C"):
    a = try_convert_from_interoperable_type(a)
    if not isinstance(a, array):
        ErrorMessage.bad_type_for_numpy_op("ravel", type(a))
        return numpy.ravel(a, order=order)
    if order != "C":
        ErrorMessage.single_warning(
            "Array order besides 'C' is not currently supported in Modin. Defaulting to 'C' order."
        )
    return a.flatten(order)


def shape(a):
    a = try_convert_from_interoperable_type(a)
    if not isinstance(a, array):
        ErrorMessage.bad_type_for_numpy_op("shape", type(a))
        return numpy.shape(a)
    return a.shape


def transpose(a, axes=None):
    a = try_convert_from_interoperable_type(a)
    if not isinstance(a, array):
        ErrorMessage.bad_type_for_numpy_op("transpose", type(a))
        return numpy.transpose(a, axes=axes)
    if axes is not None:
        raise NotImplementedError(
            "Modin does not support arrays higher than 2-dimensions. Please use `transpose` with `axis=None` on a 2-dimensional or lower object."
        )
    return a.transpose()


def split(arr, indices, axis=0):
    arr = try_convert_from_interoperable_type(arr)
    if not isinstance(arr, array):
        ErrorMessage.bad_type_for_numpy_op("split", type(arr))
        return numpy.split(arr, indices, axis=axis)
    return arr.split(indices, axis)


def hstack(tup, dtype=None, casting="same_kind"):
    a = try_convert_from_interoperable_type(tup[0])
    if not isinstance(a, array):
        ErrorMessage.bad_type_for_numpy_op("hstack", type(a))
        return numpy.hstack(tup, dtype=dtype, casting=casting)
    return a.hstack(tup[1:], dtype, casting)


def append(arr, values, axis=None):
    arr = try_convert_from_interoperable_type(arr)
    if not isinstance(arr, array):
        ErrorMessage.bad_type_for_numpy_op("append", type(arr))
        return numpy.append(arr, values, axis=axis)
    return arr.append(values, axis)


================================================
FILE: modin/numpy/constants.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy
from numpy import e, euler_gamma, inf, nan, newaxis, pi
from packaging import version

if version.parse(numpy.__version__) < version.parse("2.0.0b1"):
    from numpy import NAN, NINF, NZERO, PINF, PZERO, Inf, Infinity, NaN, infty

__all__ = [
    "e",
    "euler_gamma",
    "inf",
    "nan",
    "newaxis",
    "pi",
]

if version.parse(numpy.__version__) < version.parse("2.0.0b1"):
    __all__ += [
        "Inf",
        "Infinity",
        "NAN",
        "NINF",
        "NZERO",
        "NaN",
        "PINF",
        "PZERO",
        "infty",
    ]


================================================
FILE: modin/numpy/indexing.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# noqa: MD02
"""
Details about how Indexing Helper Class works.

_LocationIndexerBase provide methods framework for __getitem__
  and __setitem__ that work with Modin NumPy Array's internal index. Base
  class's __{get,set}item__ takes in partitions & idx_in_partition data
  and perform lookup/item write.

_iLocIndexer is responsible for indexer specific logic and
  lookup computation. Loc will take care of enlarge DataFrame. Both indexer
  will take care of translating pandas's lookup to Modin DataFrame's internal
  lookup.

An illustration is available at
https://github.com/ray-project/ray/pull/1955#issuecomment-386781826
"""

import itertools

import numpy as np
import pandas
from pandas.api.types import is_bool, is_list_like
from pandas.core.dtypes.common import is_bool_dtype, is_integer, is_integer_dtype
from pandas.core.indexing import IndexingError

from modin.error_message import ErrorMessage
from modin.pandas.indexing import compute_sliced_len, is_range_like, is_slice, is_tuple
from modin.pandas.utils import is_scalar

from .arr import array


def broadcast_item(
    obj,
    row_lookup,
    col_lookup,
    item,
    need_columns_reindex=True,
):
    """
    Use NumPy to broadcast or reshape item with reindexing.

    Parameters
    ----------
    obj : DataFrame or Series
        The object containing the necessary information about the axes.
    row_lookup : slice or scalar
        The global row index to locate inside of `item`.
    col_lookup : range, array, list, slice or scalar
        The global col index to locate inside of `item`.
    item : DataFrame, Series, or query_compiler
        Value that should be broadcast to a new shape of `to_shape`.
    need_columns_reindex : bool, default: True
        In the case of assigning columns to a dataframe (broadcasting is
        part of the flow), reindexing is not needed.

    Returns
    -------
    np.ndarray
        `item` after it was broadcasted to `to_shape`.

    Raises
    ------
    ValueError
        1) If `row_lookup` or `col_lookup` contains values missing in
        DataFrame/Series index or columns correspondingly.
        2) If `item` cannot be broadcast from its own shape to `to_shape`.

    Notes
    -----
    NumPy is memory efficient, there shouldn't be performance issue.
    """
    new_row_len = (
        len(obj._query_compiler.index[row_lookup])
        if isinstance(row_lookup, slice)
        else len(row_lookup)
    )
    new_col_len = (
        len(obj._query_compiler.columns[col_lookup])
        if isinstance(col_lookup, slice)
        else len(col_lookup)
    )
    to_shape = new_row_len, new_col_len

    if isinstance(item, array):
        # convert indices in lookups to names, as pandas reindex expects them to be so
        axes_to_reindex = {}
        index_values = obj._query_compiler.index[row_lookup]
        if not index_values.equals(item._query_compiler.index):
            axes_to_reindex["index"] = index_values
        if need_columns_reindex and isinstance(item, array) and item._ndim == 2:
            column_values = obj._query_compiler.columns[col_lookup]
            if not column_values.equals(item._query_compiler.columns):
                axes_to_reindex["columns"] = column_values
        # New value for columns/index make that reindex add NaN values
        if axes_to_reindex:
            row_axes = axes_to_reindex.get("index", None)
            if row_axes is not None:
                item._query_compiler = item._query_compiler.reindex(
                    axis=0, labels=row_axes, copy=None
                )
            col_axes = axes_to_reindex.get("columns", None)
            if col_axes is not None:
                item._query_compiler = item._query_compiler.reindex(
                    axis=1, labels=col_axes, copy=None
                )
    try:
        item = np.array(item) if not isinstance(item, array) else item._to_numpy()
        if np.prod(to_shape) == np.prod(item.shape):
            return item.reshape(to_shape)
        else:
            return np.broadcast_to(item, to_shape)
    except ValueError:
        from_shape = np.array(item).shape
        raise ValueError(
            f"could not broadcast input array from shape {from_shape} into shape "
            + f"{to_shape}"
        )


def is_boolean_array(x):
    """
    Check that argument is an array of bool.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        True if argument is an array of bool, False otherwise.
    """
    if isinstance(x, (np.ndarray, array, pandas.Series, pandas.Index)):
        return is_bool_dtype(x.dtype)
    return is_list_like(x) and all(map(is_bool, x))


def is_integer_array(x):
    """
    Check that argument is an array of integers.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        True if argument is an array of integers, False otherwise.
    """
    if isinstance(x, (np.ndarray, array, pandas.Series, pandas.Index)):
        return is_integer_dtype(x.dtype)
    return is_list_like(x) and all(map(is_integer, x))


def is_integer_slice(x):
    """
    Check that argument is an array of int.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        True if argument is an array of int, False otherwise.
    """
    if not is_slice(x):
        return False
    for pos in [x.start, x.stop, x.step]:
        if not ((pos is None) or is_integer(pos)):
            return False  # one position is neither None nor int
    return True


def boolean_mask_to_numeric(indexer):
    """
    Convert boolean mask to numeric indices.

    Parameters
    ----------
    indexer : list-like of booleans

    Returns
    -------
    np.ndarray of ints
        Numerical positions of ``True`` elements in the passed `indexer`.
    """
    if isinstance(indexer, (np.ndarray, array, pandas.Series)):
        return np.where(indexer)[0]
    else:
        # It's faster to build the resulting numpy array from the reduced amount of data via
        # `compress` iterator than convert non-numpy-like `indexer` to numpy and apply `np.where`.
        return np.fromiter(
            # `itertools.compress` masks `data` with the `selectors` mask,
            # works about ~10% faster than a pure list comprehension
            itertools.compress(data=range(len(indexer)), selectors=indexer),
            dtype=np.int64,
        )


_ILOC_INT_ONLY_ERROR = """
Location based indexing can only have [integer, integer slice (START point is
INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types.
"""


def _compute_ndim(row_loc, col_loc):
    """
    Compute the number of dimensions of result from locators.

    Parameters
    ----------
    row_loc : list or scalar
        Row locator.
    col_loc : list or scalar
        Column locator.

    Returns
    -------
    {0, 1, 2}
        Number of dimensions in located dataset.
    """
    row_scalar = is_scalar(row_loc) or is_tuple(row_loc)
    col_scalar = is_scalar(col_loc) or is_tuple(col_loc)

    if row_scalar and col_scalar:
        ndim = 0
    elif row_scalar ^ col_scalar:
        ndim = 1
    else:
        ndim = 2

    return ndim


class ArrayIndexer(object):
    """
    An indexer for modin_arr.__{get|set}item__ functionality.

    Parameters
    ----------
    array : modin.numpy.array
        Array to operate on.
    """

    def __init__(self, array):
        self.arr = array

    def _get_numpy_object_from_qc_view(
        self,
        qc_view,
        row_scalar: bool,
        col_scalar: bool,
        ndim: int,
    ):
        """
        Convert the query compiler view to the appropriate NumPy object.

        Parameters
        ----------
        qc_view : BaseQueryCompiler
            Query compiler to convert.
        row_scalar : bool
            Whether indexer for rows is scalar.
        col_scalar : bool
            Whether indexer for columns is scalar.
        ndim : {0, 1, 2}
            Number of dimensions in dataset to be retrieved.

        Returns
        -------
        modin.numpy.array
            The array object with the data from the query compiler view.

        Notes
        -----
        Usage of `slice(None)` as a lookup is a hack to pass information about
        full-axis grab without computing actual indices that triggers lazy computations.
        Ideally, this API should get rid of using slices as indexers and either use a
        common ``Indexer`` object or range and ``np.ndarray`` only.
        """
        if ndim == 2:
            return array(_query_compiler=qc_view, _ndim=self.arr._ndim)
        if self.arr._ndim == 1 and not row_scalar:
            return array(_query_compiler=qc_view, _ndim=1)

        if self.arr._ndim == 1:
            _ndim = 0
        elif ndim == 0:
            _ndim = 0
        else:
            # We are in the case where ndim == 1
            # The axis we squeeze on depends on whether we are looking for an exact
            # value or a subset of rows and columns. Knowing if we have a full MultiIndex
            # lookup or scalar lookup can help us figure out whether we need to squeeze
            # on the row or column index.
            if row_scalar and col_scalar:
                _ndim = 0
            elif not any([row_scalar, col_scalar]):
                _ndim = 2
            else:
                _ndim = 1
                if row_scalar:
                    qc_view = qc_view.transpose()

        if _ndim == 0:
            return qc_view.to_numpy()[0, 0]

        res_arr = array(_query_compiler=qc_view, _ndim=_ndim)
        return res_arr

    def _parse_row_and_column_locators(self, tup):
        """
        Unpack the user input for getitem and setitem and compute ndim.

        loc[a] -> ([a], :), 1D
        loc[[a,b]] -> ([a,b], :),
        loc[a,b] -> ([a], [b]), 0D

        Parameters
        ----------
        tup : tuple
            User input to unpack.

        Returns
        -------
        row_loc : scalar or list
            Row locator(s) as a scalar or List.
        col_list : scalar or list
            Column locator(s) as a scalar or List.
        ndim : {0, 1, 2}
            Number of dimensions of located dataset.
        """
        row_loc, col_loc = slice(None), slice(None)

        if is_tuple(tup):
            row_loc = tup[0]
            if len(tup) == 2:
                col_loc = tup[1]
            if len(tup) > 2:
                raise IndexingError("Too many indexers")
        else:
            row_loc = tup

        row_loc = row_loc(self.arr) if callable(row_loc) else row_loc
        col_loc = col_loc(self.arr) if callable(col_loc) else col_loc
        row_loc = row_loc._to_numpy() if isinstance(row_loc, array) else row_loc
        col_loc = col_loc._to_numpy() if isinstance(col_loc, array) else col_loc
        return row_loc, col_loc, _compute_ndim(row_loc, col_loc)

    def __getitem__(self, key):
        """
        Retrieve dataset according to `key`.

        Parameters
        ----------
        key : callable or tuple
            The global row numbers to retrieve data from.

        Returns
        -------
        DataFrame or Series
            Located dataset.

        See Also
        --------
        pandas.DataFrame.iloc
        """
        row_loc, col_loc, ndim = self._parse_row_and_column_locators(key)
        row_scalar = is_scalar(row_loc)
        col_scalar = is_scalar(col_loc)
        self._check_dtypes(row_loc)
        self._check_dtypes(col_loc)

        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
        if isinstance(row_lookup, slice):
            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=row_lookup != slice(None),
                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {row_lookup}",
            )
            row_lookup = None
        if isinstance(col_lookup, slice):
            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=col_lookup != slice(None),
                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {col_lookup}",
            )
            col_lookup = None
        qc_view = self.arr._query_compiler.take_2d_positional(row_lookup, col_lookup)
        result = self._get_numpy_object_from_qc_view(
            qc_view,
            row_scalar=row_scalar,
            col_scalar=col_scalar,
            ndim=ndim,
        )
        return result

    def _determine_setitem_axis(self, row_lookup, col_lookup, row_scalar, col_scalar):
        """
        Determine an axis along which we should do an assignment.

        Parameters
        ----------
        row_lookup : slice or list
            Indexer for rows.
        col_lookup : slice or list
            Indexer for columns.
        row_scalar : bool
            Whether indexer for rows is scalar or not.
        col_scalar : bool
            Whether indexer for columns is scalar or not.

        Returns
        -------
        int or None
            None if this will be a both axis assignment, number of axis to assign in other cases.

        Notes
        -----
        axis = 0: column assignment df[col] = item
        axis = 1: row assignment df.loc[row] = item
        axis = None: assignment along both axes
        """
        if self.arr.shape == (1, 1):
            return None if not (row_scalar ^ col_scalar) else 1 if row_scalar else 0

        def get_axis(axis):
            return (
                self.arr._query_compiler.index
                if axis == 0
                else self.arr._query_compiler.columns
            )

        row_lookup_len, col_lookup_len = [
            (
                len(lookup)
                if not isinstance(lookup, slice)
                else compute_sliced_len(lookup, len(get_axis(i)))
            )
            for i, lookup in enumerate([row_lookup, col_lookup])
        ]

        if col_lookup_len == 1 and row_lookup_len == 1:
            axis = None
        elif (
            row_lookup_len == len(self.arr._query_compiler.index)
            and col_lookup_len == 1
            and self.arr._ndim == 2
        ):
            axis = 0
        elif (
            col_lookup_len == len(self.arr._query_compiler.columns)
            and row_lookup_len == 1
        ):
            axis = 1
        else:
            axis = None
        return axis

    def _setitem_positional(self, row_lookup, col_lookup, item, axis=None):
        """
        Assign `item` value to located dataset.

        Parameters
        ----------
        row_lookup : slice or scalar
            The global row index to write item to.
        col_lookup : slice or scalar
            The global col index to write item to.
        item : DataFrame, Series or scalar
            The new item needs to be set. It can be any shape that's
            broadcast-able to the product of the lookup tables.
        axis : {None, 0, 1}, default: None
            If not None, it means that whole axis is used to assign a value.
            0 means assign to whole column, 1 means assign to whole row.
            If None, it means that partial assignment is done on both axes.
        """
        # Convert slices to indices for the purposes of application.
        # TODO (devin-petersohn): Apply to slice without conversion to list
        if isinstance(row_lookup, slice):
            row_lookup = range(len(self.arr._query_compiler.index))[row_lookup]
        if isinstance(col_lookup, slice):
            col_lookup = range(len(self.arr._query_compiler.columns))[col_lookup]

        new_qc = self.arr._query_compiler.write_items(row_lookup, col_lookup, item)
        self.arr._update_inplace(new_qc)

    def __setitem__(self, key, item):
        """
        Assign `item` value to dataset located by `key`.

        Parameters
        ----------
        key : callable or tuple
            The global row numbers to assign data to.
        item : modin.pandas.DataFrame, modin.pandas.Series or scalar
            Value that should be assigned to located dataset.

        See Also
        --------
        pandas.DataFrame.iloc
        """
        row_loc, col_loc, _ = self._parse_row_and_column_locators(key)
        row_scalar = is_scalar(row_loc)
        col_scalar = is_scalar(col_loc)
        self._check_dtypes(row_loc)
        self._check_dtypes(col_loc)

        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
        self._setitem_positional(
            row_lookup,
            col_lookup,
            item,
            axis=self._determine_setitem_axis(
                row_lookup, col_lookup, row_scalar, col_scalar
            ),
        )

    def _compute_lookup(self, row_loc, col_loc):
        """
        Compute index and column labels from index and column integer locators.

        Parameters
        ----------
        row_loc : slice, list, array or tuple
            Row locator.
        col_loc : slice, list, array or tuple
            Columns locator.

        Returns
        -------
        row_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise
            List of index labels.
        col_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise
            List of columns labels.

        Notes
        -----
        Usage of `slice(None)` as a resulting lookup is a hack to pass information about
        full-axis grab without computing actual indices that triggers lazy computations.
        Ideally, this API should get rid of using slices as indexers and either use a
        common ``Indexer`` object or range and ``np.ndarray`` only.
        """
        lookups = []
        for axis, axis_loc in enumerate((row_loc, col_loc)):
            if is_scalar(axis_loc):
                axis_loc = np.array([axis_loc])
            if isinstance(axis_loc, slice):
                axis_lookup = (
                    axis_loc
                    if axis_loc == slice(None)
                    else pandas.RangeIndex(
                        *axis_loc.indices(len(self.arr._query_compiler.get_axis(axis)))
                    )
                )
            elif is_range_like(axis_loc):
                axis_lookup = pandas.RangeIndex(
                    axis_loc.start, axis_loc.stop, axis_loc.step
                )
            elif is_boolean_array(axis_loc):
                axis_lookup = boolean_mask_to_numeric(axis_loc)
            else:
                if isinstance(axis_loc, pandas.Index):
                    axis_loc = axis_loc.values
                elif is_list_like(axis_loc) and not isinstance(axis_loc, np.ndarray):
                    # `Index.__getitem__` works much faster with numpy arrays than with python lists,
                    # so although we lose some time here on converting to numpy, `Index.__getitem__`
                    # speedup covers the loss that we gain here.
                    axis_loc = np.array(axis_loc, dtype=np.int64)
                # Relatively fast check allows us to not trigger `self.qc.get_axis()` computation
                # if there're no negative indices and so they don't not depend on the axis length.
                if isinstance(axis_loc, np.ndarray) and not (axis_loc < 0).any():
                    axis_lookup = axis_loc
                else:
                    axis_lookup = pandas.RangeIndex(
                        len(self.arr._query_compiler.get_axis(axis))
                    )[axis_loc]

            if isinstance(axis_lookup, pandas.Index) and not is_range_like(axis_lookup):
                axis_lookup = axis_lookup.values
            lookups.append(axis_lookup)
        return lookups

    def _check_dtypes(self, locator):
        """
        Check that `locator` is an integer scalar, integer slice, integer list or array of booleans.

        Parameters
        ----------
        locator : scalar, list, slice or array
            Object to check.

        Raises
        ------
        ValueError
            If check fails.
        """
        is_int = is_integer(locator)
        is_int_slice = is_integer_slice(locator)
        is_int_arr = is_integer_array(locator)
        is_bool_arr = is_boolean_array(locator)

        if not any([is_int, is_int_slice, is_int_arr, is_bool_arr]):
            raise ValueError(_ILOC_INT_ONLY_ERROR)


================================================
FILE: modin/numpy/linalg.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy

from modin.error_message import ErrorMessage

from .arr import array
from .utils import try_convert_from_interoperable_type


def norm(x, ord=None, axis=None, keepdims=False):
    x = try_convert_from_interoperable_type(x)
    if not isinstance(x, array):
        ErrorMessage.bad_type_for_numpy_op("linalg.norm", type(x))
        return numpy.linalg.norm(x, ord=ord, axis=axis, keepdims=keepdims)
    return x._norm(ord=ord, axis=axis, keepdims=keepdims)


================================================
FILE: modin/numpy/logic.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy

from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings

from .arr import array
from .utils import try_convert_from_interoperable_type


def _dispatch_logic(operator_name):
    @_inherit_docstrings(getattr(numpy, operator_name))
    def call(x, *args, **kwargs):
        x = try_convert_from_interoperable_type(x)
        if not isinstance(x, array):
            ErrorMessage.bad_type_for_numpy_op(operator_name, type(x))
            return getattr(numpy, operator_name)(x, *args, **kwargs)
        return getattr(x, f"_{operator_name}")(*args, **kwargs)

    return call


all = _dispatch_logic("all")
any = _dispatch_logic("any")
isfinite = _dispatch_logic("isfinite")
isinf = _dispatch_logic("isinf")
isnan = _dispatch_logic("isnan")
isnat = _dispatch_logic("isnat")
isneginf = _dispatch_logic("isneginf")
isposinf = _dispatch_logic("isposinf")
iscomplex = _dispatch_logic("iscomplex")
isreal = _dispatch_logic("isreal")


def isscalar(e):
    if isinstance(e, array):
        return False
    return numpy.isscalar(e)


logical_not = _dispatch_logic("logical_not")
logical_and = _dispatch_logic("logical_and")
logical_or = _dispatch_logic("logical_or")
logical_xor = _dispatch_logic("logical_xor")
greater = _dispatch_logic("greater")
greater_equal = _dispatch_logic("greater_equal")
less = _dispatch_logic("less")
less_equal = _dispatch_logic("less_equal")
equal = _dispatch_logic("equal")
not_equal = _dispatch_logic("not_equal")


================================================
FILE: modin/numpy/math.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy

from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings

from .arr import array
from .utils import try_convert_from_interoperable_type


def _dispatch_math(operator_name, arr_method_name=None):
    # `operator_name` is the name of the method on the numpy API
    # `arr_method_name` is the name of the method on the modin.numpy.array object,
    # which is assumed to be `operator_name` by default
    @_inherit_docstrings(getattr(numpy, operator_name))
    def call(x, *args, **kwargs):
        x = try_convert_from_interoperable_type(x)
        if not isinstance(x, array):
            ErrorMessage.bad_type_for_numpy_op(operator_name, type(x))
            return getattr(numpy, operator_name)(x, *args, **kwargs)

        return getattr(x, arr_method_name or operator_name)(*args, **kwargs)

    return call


absolute = _dispatch_math("absolute")
abs = absolute
add = _dispatch_math("add", "__add__")
divide = _dispatch_math("divide")
dot = _dispatch_math("dot")
float_power = _dispatch_math("float_power")
floor_divide = _dispatch_math("floor_divide")
power = _dispatch_math("power")
prod = _dispatch_math("prod")
multiply = _dispatch_math("multiply")
remainder = _dispatch_math("remainder")
mod = remainder
subtract = _dispatch_math("subtract")
sum = _dispatch_math("sum")
true_divide = _dispatch_math("true_divide", "divide")
mean = _dispatch_math("mean")


def var(x1, axis=None, dtype=None, out=None, keepdims=None, *, where=True):
    x1 = try_convert_from_interoperable_type(x1)
    if not isinstance(x1, array):
        ErrorMessage.bad_type_for_numpy_op("var", type(x1))
        return numpy.var(
            x1, axis=axis, out=out, keepdims=keepdims, where=where, dtype=dtype
        )
    return x1.var(axis=axis, out=out, keepdims=keepdims, where=where, dtype=dtype)


# Maximum and minimum are ufunc's in NumPy, which means that our array's __array_ufunc__
# implementation will automatically handle this. We still need the function though, so that
# if the operands are modin.pandas objects, we can convert them to arrays, but after that
# we can just use NumPy's maximum/minimum since that will route to our array's ufunc.
def maximum(
    x1, x2, out=None, where=True, casting="same_kind", order="K", dtype=None, subok=True
):
    x1 = try_convert_from_interoperable_type(x1)
    if not isinstance(x1, array):
        ErrorMessage.bad_type_for_numpy_op("maximum", type(x1))
    return numpy.maximum(
        x1,
        x2,
        out=out,
        where=where,
        casting=casting,
        order=order,
        dtype=dtype,
        subok=subok,
    )


def minimum(
    x1, x2, out=None, where=True, casting="same_kind", order="K", dtype=None, subok=True
):
    x1 = try_convert_from_interoperable_type(x1)
    if not isinstance(x1, array):
        ErrorMessage.bad_type_for_numpy_op("minimum", type(x1))
    return numpy.minimum(
        x1,
        x2,
        out=out,
        where=where,
        casting=casting,
        order=order,
        dtype=dtype,
        subok=subok,
    )


amax = _dispatch_math("amax", "max")
amin = _dispatch_math("amin", "min")
max = amax
min = amin


def sqrt(
    x, out=None, *, where=True, casting="same_kind", order="K", dtype=None, subok=True
):
    x = try_convert_from_interoperable_type(x)
    if not isinstance(x, array):
        ErrorMessage.bad_type_for_numpy_op("sqrt", type(x))
        return numpy.sqrt(
            x,
            out=out,
            where=where,
            casting=casting,
            order=order,
            dtype=dtype,
            subok=subok,
        )
    return x.sqrt(out, where, casting, order, dtype, subok)


def exp(
    x, out=None, *, where=True, casting="same_kind", order="K", dtype=None, subok=True
):
    x = try_convert_from_interoperable_type(x)
    if not isinstance(x, array):
        ErrorMessage.bad_type_for_numpy_op("exp", type(x))
        return numpy.exp(
            x,
            out=out,
            where=where,
            casting=casting,
            order=order,
            dtype=dtype,
            subok=subok,
        )
    return x.exp(out, where, casting, order, dtype, subok)


def argmax(a, axis=None, out=None, *, keepdims=None):
    a = try_convert_from_interoperable_type(a)
    if not isinstance(a, array):
        ErrorMessage.bad_type_for_numpy_op("argmax", type(a))
        return numpy.argmax(a, axis=axis, out=out, keepdims=keepdims)
    return a.argmax(axis=axis, out=out, keepdims=keepdims)


def argmin(a, axis=None, out=None, *, keepdims=None):
    a = try_convert_from_interoperable_type(a)
    if not isinstance(a, array):
        ErrorMessage.bad_type_for_numpy_op("argmin", type(a))
        return numpy.argmin(a, axis=axis, out=out, keepdims=keepdims)
    return a.argmin(axis=axis, out=out, keepdims=keepdims)


================================================
FILE: modin/numpy/trigonometry.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy

from modin.error_message import ErrorMessage

from .arr import array
from .utils import try_convert_from_interoperable_type


def tanh(
    x, out=None, where=True, casting="same_kind", order="K", dtype=None, subok=True
):
    x = try_convert_from_interoperable_type(x)
    if not isinstance(x, array):
        ErrorMessage.bad_type_for_numpy_op("tanh", type(x))
        return numpy.tanh(
            x,
            out=out,
            where=where,
            casting=casting,
            order=order,
            dtype=dtype,
            subok=subok,
        )
    return x.tanh(
        out=out, where=where, casting=casting, order=order, dtype=dtype, subok=subok
    )


================================================
FILE: modin/numpy/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Collection of array utility functions for internal use."""

import modin.numpy as np
import modin.pandas as pd

_INTEROPERABLE_TYPES = (pd.DataFrame, pd.Series)


def try_convert_from_interoperable_type(obj, copy=False):
    if isinstance(obj, _INTEROPERABLE_TYPES):
        obj = np.array(obj, copy=copy)
    return obj


================================================
FILE: modin/pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import warnings

import pandas
from packaging import version

__min_pandas_version__ = "2.2"
__max_pandas_version__ = "2.4"

pandas_version = version.parse(pandas.__version__)
if pandas_version < version.parse(
    __min_pandas_version__
) or pandas_version >= version.parse(__max_pandas_version__):
    warnings.warn(
        f"The pandas version installed ({pandas.__version__}) is outside the supported range in Modin"
        + f" ({__min_pandas_version__} to {__max_pandas_version__}). This may cause undesired side effects!"
    )

# to not pollute namespace
del version, pandas_version, __min_pandas_version__, __max_pandas_version__


with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    import inspect

    from modin.core.storage_formats.pandas.query_compiler_caster import (
        wrap_free_function_in_argument_caster,
    )

    # To allow the extensions system to override these methods, we must wrap all objects re-exported
    # from pandas in a backend dispatcher.
    _reexport_list = (
        "eval",
        "factorize",
        "test",
        "date_range",
        "period_range",
        "Index",
        "MultiIndex",
        "CategoricalIndex",
        "bdate_range",
        "DatetimeIndex",
        "Timedelta",
        "Timestamp",
        "set_eng_float_format",
        "options",
        "describe_option",
        "set_option",
        "get_option",
        "reset_option",
        "option_context",
        "NaT",
        "PeriodIndex",
        "Categorical",
        "Interval",
        "UInt8Dtype",
        "UInt16Dtype",
        "UInt32Dtype",
        "UInt64Dtype",
        "SparseDtype",
        "Int8Dtype",
        "Int16Dtype",
        "Int32Dtype",
        "Int64Dtype",
        "StringDtype",
        "BooleanDtype",
        "CategoricalDtype",
        "DatetimeTZDtype",
        "IntervalDtype",
        "PeriodDtype",
        "RangeIndex",
        "TimedeltaIndex",
        "IntervalIndex",
        "IndexSlice",
        "Grouper",
        "array",
        "Period",
        "DateOffset",
        "timedelta_range",
        "infer_freq",
        "interval_range",
        "ExcelWriter",
        "NamedAgg",
        "NA",
        "api",
        "ArrowDtype",
        "Flags",
        "Float32Dtype",
        "Float64Dtype",
        "from_dummies",
        "testing",
    )
    for name in _reexport_list:
        item = getattr(pandas, name)
        if inspect.isfunction(item):
            # Note that this is applied to only functions, not classes.
            item = wrap_free_function_in_argument_caster(name)(item)
        globals()[name] = item
    del inspect, item, _reexport_list, name, wrap_free_function_in_argument_caster

import os

from modin.config import Parameter

_engine_initialized = {}


def _initialize_engine(engine_string: str):
    from modin.config import (
        CpuCount,
        Engine,
        IsExperimental,
        StorageFormat,
        ValueSource,
    )

    # Set this so that Pandas doesn't try to multithread by itself
    os.environ["OMP_NUM_THREADS"] = "1"

    if engine_string == "Ray":
        if not _engine_initialized.get("Ray", False):
            from modin.core.execution.ray.common import initialize_ray

            initialize_ray()
    elif engine_string == "Dask":
        if not _engine_initialized.get("Dask", False):
            from modin.core.execution.dask.common import initialize_dask

            initialize_dask()
    elif engine_string == "Unidist":
        if not _engine_initialized.get("Unidist", False):
            from modin.core.execution.unidist.common import initialize_unidist

            initialize_unidist()
    elif engine_string not in Engine.NOINIT_ENGINES:
        raise ImportError("Unrecognized execution engine: {}.".format(engine_string))

    _engine_initialized[engine_string] = True


from modin.pandas import arrays, errors
from modin.pandas.api.extensions.extensions import __getattr___impl
from modin.utils import show_versions

from .. import __version__
from .dataframe import DataFrame
from .general import (
    concat,
    crosstab,
    cut,
    get_dummies,
    isna,
    isnull,
    lreshape,
    melt,
    merge,
    merge_asof,
    merge_ordered,
    notna,
    notnull,
    pivot,
    pivot_table,
    qcut,
    to_datetime,
    to_numeric,
    to_timedelta,
    unique,
    value_counts,
    wide_to_long,
)
from .io import (
    ExcelFile,
    HDFStore,
    json_normalize,
    read_clipboard,
    read_csv,
    read_excel,
    read_feather,
    read_fwf,
    read_gbq,
    read_hdf,
    read_html,
    read_json,
    read_orc,
    read_parquet,
    read_pickle,
    read_sas,
    read_spss,
    read_sql,
    read_sql_query,
    read_sql_table,
    read_stata,
    read_table,
    read_xml,
    to_pickle,
)
from .plotting import Plotting as plotting
from .series import Series

__getattr__ = __getattr___impl


__all__ = [  # noqa: F405
    "DataFrame",
    "Series",
    "read_csv",
    "read_parquet",
    "read_json",
    "read_html",
    "read_clipboard",
    "read_excel",
    "read_hdf",
    "read_feather",
    "read_stata",
    "read_sas",
    "read_pickle",
    "read_sql",
    "read_gbq",
    "read_table",
    "read_spss",
    "read_orc",
    "json_normalize",
    "concat",
    "eval",
    "cut",
    "factorize",
    "test",
    "qcut",
    "to_datetime",
    "get_dummies",
    "isna",
    "isnull",
    "merge",
    "pivot_table",
    "date_range",
    "Index",
    "MultiIndex",
    "Series",
    "bdate_range",
    "period_range",
    "DatetimeIndex",
    "to_timedelta",
    "set_eng_float_format",
    "options",
    "describe_option",
    "set_option",
    "get_option",
    "reset_option",
    "option_context",
    "CategoricalIndex",
    "Timedelta",
    "Timestamp",
    "NaT",
    "PeriodIndex",
    "Categorical",
    "__version__",
    "melt",
    "crosstab",
    "plotting",
    "Interval",
    "UInt8Dtype",
    "UInt16Dtype",
    "UInt32Dtype",
    "UInt64Dtype",
    "SparseDtype",
    "Int8Dtype",
    "Int16Dtype",
    "Int32Dtype",
    "Int64Dtype",
    "CategoricalDtype",
    "DatetimeTZDtype",
    "IntervalDtype",
    "PeriodDtype",
    "BooleanDtype",
    "StringDtype",
    "NA",
    "RangeIndex",
    "TimedeltaIndex",
    "IntervalIndex",
    "IndexSlice",
    "Grouper",
    "array",
    "Period",
    "show_versions",
    "DateOffset",
    "timedelta_range",
    "infer_freq",
    "interval_range",
    "ExcelWriter",
    "read_fwf",
    "read_sql_table",
    "read_sql_query",
    "ExcelFile",
    "to_pickle",
    "HDFStore",
    "lreshape",
    "wide_to_long",
    "merge_asof",
    "merge_ordered",
    "notnull",
    "notna",
    "pivot",
    "to_numeric",
    "unique",
    "value_counts",
    "NamedAgg",
    "api",
    "read_xml",
    "ArrowDtype",
    "Flags",
    "Float32Dtype",
    "Float64Dtype",
    "from_dummies",
    "errors",
]

# Remove these attributes from this module's namespace.
del pandas, Parameter, __getattr___impl


================================================
FILE: modin/pandas/accessor.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Implement various accessor classes for DataFrame and Series API.

SparseFrameAccessor implements API of pandas.DataFrame.sparse accessor.

SparseAccessor implements API of pandas.Series.sparse accessor.

CachedAccessor implements API of pandas.core.accessor.CachedAccessor
"""

from __future__ import annotations

import pickle
from typing import TYPE_CHECKING, Union

import pandas
from pandas._typing import CompressionOptions, StorageOptions
from pandas.core.dtypes.dtypes import SparseDtype

from modin import pandas as pd
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger
from modin.pandas.io import to_dask, to_ray
from modin.utils import _inherit_docstrings

if TYPE_CHECKING:
    from modin.pandas import DataFrame, Series


class BaseSparseAccessor(ClassLogger):
    """
    Base class for various sparse DataFrame accessor classes.

    Parameters
    ----------
    data : DataFrame or Series
        Object to operate on.
    """

    _parent: Union[DataFrame, Series]
    _validation_msg = "Can only use the '.sparse' accessor with Sparse data."

    def __init__(self, data: Union[DataFrame, Series] = None):
        self._parent = data
        self._validate(data)

    @classmethod
    def _validate(cls, data: Union[DataFrame, Series]):
        """
        Verify that `data` dtypes are compatible with `pandas.core.dtypes.dtypes.SparseDtype`.

        Parameters
        ----------
        data : DataFrame or Series
            Object to check.

        Raises
        ------
        NotImplementedError
            Function is implemented in child classes.
        """
        raise NotImplementedError

    def _default_to_pandas(self, op, *args, **kwargs):
        """
        Convert dataset to pandas type and call a pandas sparse.`op` on it.

        Parameters
        ----------
        op : str
            Name of pandas function.
        *args : list
            Additional positional arguments to be passed in `op`.
        **kwargs : dict
            Additional keywords arguments to be passed in `op`.

        Returns
        -------
        object
            Result of operation.
        """
        return self._parent._default_to_pandas(
            lambda parent: op(parent.sparse, *args, **kwargs)
        )


@_inherit_docstrings(pandas.core.arrays.sparse.accessor.SparseFrameAccessor)
class SparseFrameAccessor(BaseSparseAccessor):
    @classmethod
    def _validate(cls, data: DataFrame):
        """
        Verify that `data` dtypes are compatible with `pandas.core.dtypes.dtypes.SparseDtype`.

        Parameters
        ----------
        data : DataFrame
            Object to check.

        Raises
        ------
        AttributeError
            If check fails.
        """
        dtypes = data.dtypes
        if not all(isinstance(t, SparseDtype) for t in dtypes):
            raise AttributeError(cls._validation_msg)

    @property
    def density(self):
        return self._parent._default_to_pandas(pandas.DataFrame.sparse).density

    @classmethod
    def from_spmatrix(cls, data, index=None, columns=None):
        ErrorMessage.default_to_pandas("`from_spmatrix`")
        return pd.DataFrame(
            pandas.DataFrame.sparse.from_spmatrix(data, index=index, columns=columns)
        )

    def to_dense(self):
        return self._default_to_pandas(pandas.DataFrame.sparse.to_dense)

    def to_coo(self):
        return self._default_to_pandas(pandas.DataFrame.sparse.to_coo)


@_inherit_docstrings(pandas.core.arrays.sparse.accessor.SparseAccessor)
class SparseAccessor(BaseSparseAccessor):
    @classmethod
    def _validate(cls, data: Series):
        """
        Verify that `data` dtype is compatible with `pandas.core.dtypes.dtypes.SparseDtype`.

        Parameters
        ----------
        data : Series
            Object to check.

        Raises
        ------
        AttributeError
            If check fails.
        """
        if not isinstance(data.dtype, SparseDtype):
            raise AttributeError(cls._validation_msg)

    @property
    def density(self):
        return self._parent._default_to_pandas(pandas.Series.sparse).density

    @property
    def fill_value(self):
        return self._parent._default_to_pandas(pandas.Series.sparse).fill_value

    @property
    def npoints(self):
        return self._parent._default_to_pandas(pandas.Series.sparse).npoints

    @property
    def sp_values(self):
        return self._parent._default_to_pandas(pandas.Series.sparse).sp_values

    @classmethod
    def from_coo(cls, A, dense_index=False):
        return cls._default_to_pandas(
            pandas.Series.sparse.from_coo, A, dense_index=dense_index
        )

    def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False):
        return self._default_to_pandas(
            pandas.Series.sparse.to_coo,
            row_levels=row_levels,
            column_levels=column_levels,
            sort_labels=sort_labels,
        )

    def to_dense(self):
        return self._default_to_pandas(pandas.Series.sparse.to_dense)


@_inherit_docstrings(pandas.core.accessor.CachedAccessor)
class CachedAccessor(ClassLogger):
    def __init__(self, name: str, accessor) -> None:
        self._name = name
        self._accessor = accessor

    def __get__(self, obj, cls):  # noqa: GL08
        if obj is None:
            return self._accessor
        accessor_obj = self._accessor(obj)
        object.__setattr__(obj, self._name, accessor_obj)
        return accessor_obj


class ModinAPI:
    """
    Namespace class for accessing additional Modin functions that are not available in pandas.

    Parameters
    ----------
    data : DataFrame or Series
        Object to operate on.
    """

    _data: Union[DataFrame, Series]

    def __init__(self, data: Union[DataFrame, Series]):
        self._data = data

    def to_pandas(self):
        """
        Convert a Modin DataFrame/Series object to a pandas DataFrame/Series object.

        Returns
        -------
        pandas.Series or pandas.DataFrame
        """
        return self._data._to_pandas()

    def to_ray(self):
        """
        Convert a Modin DataFrame/Series to a Ray Dataset.

        Returns
        -------
        ray.data.Dataset
            Converted object with type depending on input.

        Notes
        -----
        Modin DataFrame/Series can only be converted to a Ray Dataset if Modin uses a Ray engine.
        """
        return to_ray(self._data)

    def to_dask(self):
        """
        Convert a Modin DataFrame/Series to a Dask DataFrame/Series.

        Returns
        -------
        dask.dataframe.DataFrame or dask.dataframe.Series
            Converted object with type depending on input.

        Notes
        -----
        Modin DataFrame/Series can only be converted to a Dask DataFrame/Series if Modin uses a Dask engine.
        """
        return to_dask(self._data)

    def to_pickle_glob(
        self,
        filepath_or_buffer,
        compression: CompressionOptions = "infer",
        protocol: int = pickle.HIGHEST_PROTOCOL,
        storage_options: StorageOptions = None,
    ) -> None:
        """
        Pickle (serialize) object to file.

        This experimental feature provides parallel writing into multiple pickle files which are
        defined by glob pattern, otherwise (without glob pattern) default pandas implementation is used.

        Parameters
        ----------
        filepath_or_buffer : str
            File path where the pickled object will be stored.
        compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default: 'infer'
            A string representing the compression to use in the output file. By
            default, infers from the file extension in specified path.
            Compression mode may be any of the following possible
            values: {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}. If compression
            mode is 'infer' and path_or_buf is path-like, then detect
            compression mode from the following extensions:
            '.gz', '.bz2', '.zip' or '.xz'. (otherwise no compression).
            If dict given and mode is 'zip' or inferred as 'zip', other entries
            passed as additional compression options.
        protocol : int, default: pickle.HIGHEST_PROTOCOL
            Int which indicates which protocol should be used by the pickler,
            default HIGHEST_PROTOCOL (see `pickle docs <https://docs.python.org/3/library/pickle.html>`_
            paragraph 12.1.2 for details). The possible  values are 0, 1, 2, 3, 4, 5. A negative value
            for the protocol parameter is equivalent to setting its value to HIGHEST_PROTOCOL.
        storage_options : dict, optional
            Extra options that make sense for a particular storage connection, e.g.
            host, port, username, password, etc., if using a URL that will be parsed by
            fsspec, e.g., starting "s3://", "gcs://". An error will be raised if providing
            this argument with a non-fsspec URL. See the fsspec and backend storage
            implementation docs for the set of allowed keys and values.
        """
        from modin.experimental.pandas.io import to_pickle_glob

        to_pickle_glob(
            self._data,
            filepath_or_buffer=filepath_or_buffer,
            compression=compression,
            protocol=protocol,
            storage_options=storage_options,
        )

    def to_parquet_glob(
        self,
        path,
        engine="auto",
        compression="snappy",
        index=None,
        partition_cols=None,
        storage_options: StorageOptions = None,
        **kwargs,
    ) -> None:  # noqa: PR01
        """
        Write a DataFrame to the binary parquet format.

        This experimental feature provides parallel writing into multiple parquet files which are
        defined by glob pattern, otherwise (without glob pattern) default pandas implementation is used.

        Notes
        -----
        * Only string type supported for `path` argument.
        * The rest of the arguments are the same as for `pandas.to_parquet`.
        """
        from modin.experimental.pandas.io import to_parquet_glob

        if path is None:
            raise NotImplementedError(
                "`to_parquet_glob` doesn't support path=None, use `to_parquet` in that case."
            )

        to_parquet_glob(
            self._data,
            path=path,
            engine=engine,
            compression=compression,
            index=index,
            partition_cols=partition_cols,
            storage_options=storage_options,
            **kwargs,
        )

    def to_json_glob(
        self,
        path_or_buf=None,
        orient=None,
        date_format=None,
        double_precision=10,
        force_ascii=True,
        date_unit="ms",
        default_handler=None,
        lines=False,
        compression="infer",
        index=None,
        indent=None,
        storage_options: StorageOptions = None,
        mode="w",
    ) -> None:  # noqa: PR01
        """
        Convert the object to a JSON string.

        Notes
        -----
        * Only string type supported for `path_or_buf` argument.
        * The rest of the arguments are the same as for `pandas.to_json`.
        """
        from modin.experimental.pandas.io import to_json_glob

        if path_or_buf is None:
            raise NotImplementedError(
                "`to_json_glob` doesn't support path_or_buf=None, use `to_json` in that case."
            )

        to_json_glob(
            self._data,
            path_or_buf=path_or_buf,
            orient=orient,
            date_format=date_format,
            double_precision=double_precision,
            force_ascii=force_ascii,
            date_unit=date_unit,
            default_handler=default_handler,
            lines=lines,
            compression=compression,
            index=index,
            indent=indent,
            storage_options=storage_options,
            mode=mode,
        )

    def to_xml_glob(
        self,
        path_or_buffer=None,
        index=True,
        root_name="data",
        row_name="row",
        na_rep=None,
        attr_cols=None,
        elem_cols=None,
        namespaces=None,
        prefix=None,
        encoding="utf-8",
        xml_declaration=True,
        pretty_print=True,
        parser="lxml",
        stylesheet=None,
        compression="infer",
        storage_options=None,
    ) -> None:  # noqa: PR01
        """
        Render a DataFrame to an XML document.

        Notes
        -----
        * Only string type supported for `path_or_buffer` argument.
        * The rest of the arguments are the same as for `pandas.to_xml`.
        """
        from modin.experimental.pandas.io import to_xml_glob

        if path_or_buffer is None:
            raise NotImplementedError(
                "`to_xml_glob` doesn't support path_or_buffer=None, use `to_xml` in that case."
            )

        to_xml_glob(
            self._data,
            path_or_buffer=path_or_buffer,
            index=index,
            root_name=root_name,
            row_name=row_name,
            na_rep=na_rep,
            attr_cols=attr_cols,
            elem_cols=elem_cols,
            namespaces=namespaces,
            prefix=prefix,
            encoding=encoding,
            xml_declaration=xml_declaration,
            pretty_print=pretty_print,
            parser=parser,
            stylesheet=stylesheet,
            compression=compression,
            storage_options=storage_options,
        )


================================================
FILE: modin/pandas/api/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


# Re-export all other pandas.api submodules
from pandas.api import indexers, interchange, types, typing

from modin.pandas.api import extensions

__all__ = ["extensions", "interchange", "indexers", "types", "typing"]


================================================
FILE: modin/pandas/api/extensions/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from .extensions import (
    register_base_accessor,
    register_dataframe_accessor,
    register_dataframe_groupby_accessor,
    register_pd_accessor,
    register_series_accessor,
    register_series_groupby_accessor,
)

__all__ = [
    "register_base_accessor",
    "register_dataframe_accessor",
    "register_series_accessor",
    "register_pd_accessor",
    "register_dataframe_groupby_accessor",
    "register_series_groupby_accessor",
]


================================================
FILE: modin/pandas/api/extensions/extensions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import inspect
from collections import defaultdict
from functools import cached_property
from types import MethodType, ModuleType
from typing import Any, Dict, Optional, Union

import modin.pandas as pd
from modin.config import Backend
from modin.core.storage_formats.pandas.query_compiler_caster import (
    _GENERAL_EXTENSIONS,
    _NON_EXTENDABLE_ATTRIBUTES,
    EXTENSION_DICT_TYPE,
    wrap_function_in_argument_caster,
)

_attrs_to_delete_on_test = defaultdict(set)

# Track a dict of module-level classes that are re-exported from pandas that may need to dynamically
# change when overridden by the extensions system, such as pd.Index.
# See register_pd_accessor for details.
_reexport_classes: Dict[str, Any] = {}


def _set_attribute_on_obj(
    name: str,
    extensions: EXTENSION_DICT_TYPE,
    backend: Optional[str],
    obj: Union[type, ModuleType],
    set_reexport: bool = False,
):
    """
    Create a new or override existing attribute on obj.

    Parameters
    ----------
    name : str
        The name of the attribute to assign to `obj`.
    extensions : EXTENSION_DICT_TYPE
        The dictionary mapping extension name to `new_attr` (assigned below).
    backend : Optional[str]
        The backend to which the accessor applies. If `None`, this accessor
        will become the default for all backends.
    obj : DataFrame, Series, or modin.pandas
        The object we are assigning the new attribute to.
    set_reexport : bool, default False
        If True, register the original property in `_reexport_classes`.

    Returns
    -------
    decorator
        Returns the decorator function.
    """
    if name in _NON_EXTENDABLE_ATTRIBUTES:
        raise ValueError(f"Cannot register an extension with the reserved name {name}.")

    def decorator(new_attr: Any):
        """
        Decorate a function or class to be assigned to the given name.

        Parameters
        ----------
        new_attr : Any
            The new attribute to assign to name.

        Returns
        -------
        new_attr
            Unmodified new_attr is return from the decorator.
        """
        # Module-level functions are resolved by `wrap_free_function_in_argument_caster`, which dynamically
        # identifies the appropriate backend to use. We cannot apply this wrapper to classes in order
        # to preserve the vailidity of `isinstance` checks, and instead must force __getattr__ to directly
        # return the correct class.
        # Because the module-level __getattr__ function is not called if the object is found in the namespace,
        # any overrides from the extensions system must `delattr` the attribute to force any future lookups
        # to hit this code path.
        # We cannot do this by omitting those exports at module initialization time because the
        # __getattr__ codepath performs a call to Backend.get() that assumes the presence of an engine;
        # in an extensions system that may reference types like pd.Timestamp/pd.Index before registering
        # itself as an engine, this will cause errors.
        if set_reexport:
            original_attr = getattr(pd, name)
            _reexport_classes[name] = original_attr
            delattr(pd, name)
        # If the attribute is an instance of functools.cached_property, we must manually call __set_name__ on it.
        # https://stackoverflow.com/a/62161136
        if isinstance(new_attr, cached_property):
            new_attr.__set_name__(obj, name)
        extensions[None if backend is None else Backend.normalize(backend)][
            name
        ] = new_attr
        if (
            callable(new_attr)
            and name not in dir(obj)
            and not inspect.isclass(new_attr)
        ):
            # For callable extensions, we add a method to `obj`'s namespace that
            # dispatches to the correct implementation.
            # If the extension is a class like pd.Index, do not add a wrapper and let
            # the getattr dispatcher choose the correct item.
            setattr(
                obj,
                name,
                wrap_function_in_argument_caster(
                    klass=obj if isinstance(obj, type) else None,
                    f=new_attr,
                    wrapping_function_type=(
                        MethodType if isinstance(obj, type) else None
                    ),
                    extensions=extensions,
                    name=name,
                ),
            )
            # "Free" functions are permanently kept in the wrapper, so no need to clear them in tests.
            if obj is not pd:
                _attrs_to_delete_on_test[obj].add(name)
        return new_attr

    return decorator


def register_dataframe_accessor(name: str, *, backend: Optional[str] = None):
    """
    Register a dataframe attribute with the name provided.

    This is a decorator that assigns a new attribute to DataFrame. It can be used
    with the following syntax:

    ```
    @register_dataframe_accessor("new_method")
    def my_new_dataframe_method(*args, **kwargs):
        # logic goes here
        return
    ```

    The new attribute can then be accessed with the name provided:

    ```
    df.new_method(*my_args, **my_kwargs)
    ```

    Parameters
    ----------
    name : str
        The name of the attribute to assign to DataFrame.

    Returns
    -------
    decorator
        Returns the decorator function.
    backend : Optional[str]
        The backend to which the accessor applies. If ``None``, this accessor
        will become the default for all backends.
    """
    return _set_attribute_on_obj(
        name, pd.dataframe.DataFrame._extensions, backend, pd.dataframe.DataFrame
    )


def register_series_accessor(name: str, *, backend: Optional[str] = None):
    """
    Register a series attribute with the name provided.

    This is a decorator that assigns a new attribute to Series. It can be used
    with the following syntax:

    ```
    @register_series_accessor("new_method")
    def my_new_series_method(*args, **kwargs):
        # logic goes here
        return
    ```

    The new attribute can then be accessed with the name provided:

    ```
    s.new_method(*my_args, **my_kwargs)
    ```

    Parameters
    ----------
    name : str
        The name of the attribute to assign to Series.
    backend : Optional[str]
        The backend to which the accessor applies. If ``None``, this accessor
        will become the default for all backends.

    Returns
    -------
    decorator
        Returns the decorator function.
    """
    return _set_attribute_on_obj(
        name, pd.series.Series._extensions, backend=backend, obj=pd.series.Series
    )


def register_base_accessor(name: str, *, backend: Optional[str] = None):
    """
    Register a base attribute with the name provided.

    This is a decorator that assigns a new attribute to BasePandasDataset. It can be used
    with the following syntax:

    ```
    @register_base_accessor("new_method")
    def register_base_accessor(*args, **kwargs):
        # logic goes here
        return
    ```

    The new attribute can then be accessed with the name provided:

    ```
    s.new_method(*my_args, **my_kwargs)
    ```

    Parameters
    ----------
    name : str
        The name of the attribute to assign to BasePandasDataset.
    backend : Optional[str]
        The backend to which the accessor applies. If ``None``, this accessor
        will become the default for all backends.

    Returns
    -------
    decorator
        Returns the decorator function.
    """
    from modin.pandas.base import BasePandasDataset

    return _set_attribute_on_obj(
        name,
        BasePandasDataset._extensions,
        backend=backend,
        obj=BasePandasDataset,
    )


def register_pd_accessor(name: str, *, backend: Optional[str] = None):
    """
    Register a pd namespace attribute with the name provided.

    This is a decorator that assigns a new attribute to modin.pandas. It can be used
    with the following syntax:

    ```
    @register_pd_accessor("new_function")
    def my_new_pd_function(*args, **kwargs):
        # logic goes here
        return
    ```

    The new attribute can then be accessed with the name provided:

    ```
    import modin.pandas as pd

    pd.new_method(*my_args, **my_kwargs)
    ```


    Parameters
    ----------
    name : str
        The name of the attribute to assign to modin.pandas.
    backend : Optional[str]
        The backend to which the accessor applies. If ``None``, this accessor
        will become the default for all backends.

    Returns
    -------
    decorator
        Returns the decorator function.
    """
    set_reexport = name not in _GENERAL_EXTENSIONS[backend] and name in dir(pd)
    return _set_attribute_on_obj(
        name=name,
        extensions=_GENERAL_EXTENSIONS,
        backend=backend,
        obj=pd,
        set_reexport=set_reexport,
    )


def __getattr___impl(name: str):
    """
    Override __getattr__ on the modin.pandas module to enable extensions.

    Note that python only falls back to this function if the attribute is not
    found in this module's namespace.

    Parameters
    ----------
    name : str
        The name of the attribute being retrieved.

    Returns
    -------
    Attribute
        Returns the extension attribute, if it exists, otherwise returns the attribute
        imported in this file.
    """
    from modin.config import Backend

    backend = Backend.get()
    if name in _GENERAL_EXTENSIONS[backend]:
        return _GENERAL_EXTENSIONS[backend][name]
    elif name in _GENERAL_EXTENSIONS[None]:
        return _GENERAL_EXTENSIONS[None][name]
    elif name in _reexport_classes:
        return _reexport_classes[name]
    else:
        raise AttributeError(f"module 'modin.pandas' has no attribute '{name}'")


def register_dataframe_groupby_accessor(name: str, *, backend: Optional[str] = None):
    """
    Register a dataframe groupby attribute with the name provided.

    This is a decorator that assigns a new attribute to DataFrameGroupBy. It can be used
    with the following syntax:

    ```
    @register_dataframe_groupby_accessor("new_method")
    def my_new_dataframe_groupby_method(*args, **kwargs):
        # logic goes here
        return
    ```
    The new attribute can then be accessed with the name provided:

    ```
    df.groupby("col").new_method(*my_args, **my_kwargs)
    ```

    Parameters
    ----------
    name : str
        The name of the attribute to assign to DataFrameGroupBy.
    backend : Optional[str]
        The backend to which the accessor applies. If ``None``, this accessor
        will become the default for all backends.

    Returns
    -------
    decorator
        Returns the decorator function.
    """
    return _set_attribute_on_obj(
        name,
        pd.groupby.DataFrameGroupBy._extensions,
        backend=backend,
        obj=pd.groupby.DataFrameGroupBy,
    )


def register_series_groupby_accessor(name: str, *, backend: Optional[str] = None):
    """
    Register a series groupby attribute with the name provided.

    This is a decorator that assigns a new attribute to SeriesGroupBy. It can be used
    with the following syntax:

    ```
    @register_series_groupby_accessor("new_method")
    def my_new_series_groupby_method(*args, **kwargs):
        # logic goes here
        return
    ```
    The new attribute can then be accessed with the name provided:
    ```
    df.groupby("col0")["col1"].new_method(*my_args, **my_kwargs)
    ```
    Parameters
    ----------
    name : str
        The name of the attribute to assign to SeriesGroupBy.
    backend : Optional[str]
        The backend to which the accessor applies. If ``None``, this accessor
        will become the default for all backends.

    Returns
    -------
    decorator
        Returns the decorator function.
    """
    return _set_attribute_on_obj(
        name,
        pd.groupby.SeriesGroupBy._extensions,
        backend=backend,
        obj=pd.groupby.SeriesGroupBy,
    )


================================================
FILE: modin/pandas/arrays/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


"""The module is needed to allow the following import `import modin.pandas.arrays`."""

from pandas.arrays import *  # noqa: F403, F401
from pandas.arrays import __all__  # noqa: F401


================================================
FILE: modin/pandas/base.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement DataFrame/Series public API as pandas does."""

from __future__ import annotations

import abc
import pickle as pkl
import re
import sys
import warnings
from functools import cached_property
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Hashable,
    Literal,
    Optional,
    Sequence,
    Union,
)

import numpy as np
import pandas
import pandas.core.generic
import pandas.core.resample
import pandas.core.window.rolling
from pandas._libs import lib
from pandas._libs.tslibs import to_offset
from pandas._typing import (
    Axis,
    CompressionOptions,
    DtypeBackend,
    IndexKeyFunc,
    IndexLabel,
    Level,
    RandomState,
    Scalar,
    StorageOptions,
    T,
    TimedeltaConvertibleTypes,
    TimestampConvertibleTypes,
    npt,
)
from pandas.compat import numpy as numpy_compat
from pandas.core.common import count_not_none, pipe
from pandas.core.dtypes.common import (
    is_bool_dtype,
    is_dict_like,
    is_dtype_equal,
    is_integer,
    is_integer_dtype,
    is_list_like,
    is_numeric_dtype,
    is_object_dtype,
)
from pandas.core.indexes.api import ensure_index
from pandas.core.methods.describe import _refine_percentiles
from pandas.util._decorators import doc
from pandas.util._validators import (
    validate_ascending,
    validate_bool_kwarg,
    validate_percentile,
)

from modin import pandas as pd
from modin.config import Backend, ShowBackendSwitchProgress
from modin.core.storage_formats.pandas.query_compiler_caster import (
    EXTENSION_NO_LOOKUP,
    QueryCompilerCaster,
)
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger, disable_logging
from modin.pandas.accessor import CachedAccessor, ModinAPI
from modin.pandas.api.extensions.extensions import EXTENSION_DICT_TYPE
from modin.pandas.utils import GET_BACKEND_DOC, SET_BACKEND_DOC, is_scalar
from modin.utils import (
    _inherit_docstrings,
    expanduser_path_arg,
    sentinel,
    try_cast_to_pandas,
)

from .utils import _doc_binary_op, is_full_grab_slice

if TYPE_CHECKING:
    from typing_extensions import Self

    from modin.core.storage_formats import BaseQueryCompiler

    from .dataframe import DataFrame
    from .indexing import _iLocIndexer, _LocIndexer
    from .resample import Resampler
    from .series import Series
    from .window import Expanding, Rolling, Window


# Do not lookup certain attributes in columns or index, as they're used for some
# special purposes, like serving remote context
_ATTRS_NO_LOOKUP = {
    "__name__",
    "_cache",
    "_ipython_canary_method_should_not_exist_",
    "_ipython_display_",
    "_repr_mimebundle_",
    # Also avoid looking up the attributes that we use to implement the
    # extension system.
} | EXTENSION_NO_LOOKUP


_DEFAULT_BEHAVIOUR = {
    "__init__",
    "__class__",
    "_get_index",
    "_set_index",
    "_pandas_class",
    "_get_axis_number",
    "empty",
    "index",
    "columns",
    "name",
    "dtypes",
    "dtype",
    "groupby",
    "_get_name",
    "_set_name",
    "_default_to_pandas",
    "_query_compiler",
    "_to_pandas",
    "_repartition",
    "_build_repr_df",
    "_reduce_dimension",
    "__repr__",
    "__len__",
    "__constructor__",
    "_create_or_update_from_compiler",
    "_update_inplace",
    # for persistance support;
    # see DataFrame methods docstrings for more
    "_inflate_light",
    "_inflate_full",
    "__reduce__",
    "__reduce_ex__",
    "_init",
} | _ATTRS_NO_LOOKUP

_doc_binary_op_kwargs = {"returns": "BasePandasDataset", "left": "BasePandasDataset"}


def _get_repr_axis_label_indexer(labels, num_for_repr):
    """
    Get the indexer for the given axis labels to be used for the repr.

    Parameters
    ----------
    labels : pandas.Index
        The axis labels.
    num_for_repr : int
        The number of elements to display.

    Returns
    -------
    slice or list
        The indexer to use for the repr.
    """
    if len(labels) <= num_for_repr:
        return slice(None)
    # At this point, the entire axis has len(labels) elements, and num_for_repr <
    # len(labels). We want to select a pandas subframe containing elements such that:
    #   - the repr of the pandas subframe will be the same as the repr of the entire
    #     frame.
    #   - the pandas repr will not be able to show all the elements and will put an
    #      ellipsis in the middle
    #
    # We accomplish this by selecting some elements from the front and some from the
    # back, with the front having at most 1 element more than the back. The total
    # number of elements will be num_for_repr + 1.

    if num_for_repr % 2 == 0:
        # If num_for_repr is even, take an extra element from the front.
        # The total number of elements we are selecting is (num_for_repr // 2) * 2 + 1
        # = num_for_repr + 1
        front_repr_num = num_for_repr // 2 + 1
        back_repr_num = num_for_repr // 2
    else:
        # If num_for_repr is odd, take an extra element from both the front and the
        # back. The total number of elements we are selecting is
        # (num_for_repr // 2) * 2 + 1 + 1 = num_for_repr + 1
        front_repr_num = num_for_repr // 2 + 1
        back_repr_num = num_for_repr // 2 + 1
    all_positions = range(len(labels))
    return list(all_positions[:front_repr_num]) + (
        [] if back_repr_num == 0 else list(all_positions[-back_repr_num:])
    )


@_inherit_docstrings(pandas.DataFrame, apilink=["pandas.DataFrame", "pandas.Series"])
class BasePandasDataset(QueryCompilerCaster, ClassLogger):
    """
    Implement most of the common code that exists in DataFrame/Series.

    Since both objects share the same underlying representation, and the algorithms
    are the same, we use this object to define the general behavior of those objects
    and then use those objects to define the output type.
    """

    # Pandas class that we pretend to be; usually it has the same name as our class
    # but lives in "pandas" namespace.
    _pandas_class = pandas.core.generic.NDFrame
    _query_compiler: BaseQueryCompiler
    _siblings: list[BasePandasDataset]

    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)
    _pinned: bool = False

    @cached_property
    def _is_dataframe(self) -> bool:
        """
        Tell whether this is a dataframe.

        Ideally, other methods of BasePandasDataset shouldn't care whether this
        is a dataframe or a series, but sometimes we need to know. This method
        is better than hasattr(self, "columns"), which for series will call
        self.__getattr__("columns"), which requires materializing the index.

        Returns
        -------
        bool : Whether this is a dataframe.
        """
        return issubclass(self._pandas_class, pandas.DataFrame)

    @abc.abstractmethod
    def _create_or_update_from_compiler(
        self, new_query_compiler: BaseQueryCompiler, inplace: bool = False
    ) -> Self | None:
        """
        Return or update a ``DataFrame`` or ``Series`` with given `new_query_compiler`.

        Parameters
        ----------
        new_query_compiler : BaseQueryCompiler
            QueryCompiler to use to manage the data.
        inplace : bool, default: False
            Whether or not to perform update or creation inplace.

        Returns
        -------
        DataFrame, Series or None
            None if update was done, ``DataFrame`` or ``Series`` otherwise.
        """
        pass

    def _add_sibling(self, sibling: BasePandasDataset) -> None:
        """
        Add a DataFrame or Series object to the list of siblings.

        Siblings are objects that share the same query compiler. This function is called
        when a shallow copy is made.

        Parameters
        ----------
        sibling : BasePandasDataset
            Dataset to add to siblings list.
        """
        sibling._siblings = self._siblings + [self]
        self._siblings += [sibling]
        for sib in self._siblings:
            sib._siblings += [sibling]

    def _build_repr_df(
        self, num_rows: int, num_cols: int
    ) -> pandas.DataFrame | pandas.Series:
        """
        Build pandas DataFrame for string representation.

        Parameters
        ----------
        num_rows : int
            Number of rows to show in string representation. If number of
            rows in this dataset is greater than `num_rows` then half of
            `num_rows` rows from the beginning and half of `num_rows` rows
            from the end are shown.
        num_cols : int
            Number of columns to show in string representation. If number of
            columns in this dataset is greater than `num_cols` then half of
            `num_cols` columns from the beginning and half of `num_cols`
            columns from the end are shown.

        Returns
        -------
        pandas.DataFrame or pandas.Series
            A pandas dataset with `num_rows` or fewer rows and `num_cols` or fewer columns.
        """
        # Fast track for empty dataframe.
        if len(self) == 0 or (
            self._is_dataframe and self._query_compiler.get_axis_len(1) == 0
        ):
            return pandas.DataFrame(
                index=self.index,
                columns=self.columns if self._is_dataframe else None,
            )
        row_indexer = _get_repr_axis_label_indexer(self.index, num_rows)
        if self._is_dataframe:
            indexer = row_indexer, _get_repr_axis_label_indexer(self.columns, num_cols)
        else:
            indexer = row_indexer
        return self.iloc[indexer]._query_compiler.to_pandas()

    def _update_inplace(self, new_query_compiler: BaseQueryCompiler) -> None:
        """
        Update the current DataFrame inplace.

        Parameters
        ----------
        new_query_compiler : BaseQueryCompiler
            The new QueryCompiler to use to manage the data.
        """
        old_query_compiler = self._query_compiler
        self._query_compiler = new_query_compiler
        for sib in self._siblings:
            sib._query_compiler = new_query_compiler
        old_query_compiler.free()

    def _validate_other(
        self,
        other,
        axis,
        dtype_check=False,
        compare_index=False,
    ):
        """
        Help to check validity of other in inter-df operations.

        Parameters
        ----------
        other : modin.pandas.BasePandasDataset
            Another dataset to validate against `self`.
        axis : {None, 0, 1}
            Specifies axis along which to do validation. When `1` or `None`
            is specified, validation is done along `index`, if `0` is specified
            validation is done along `columns` of `other` frame.
        dtype_check : bool, default: False
            Validates that both frames have compatible dtypes.
        compare_index : bool, default: False
            Compare Index if True.

        Returns
        -------
        BaseQueryCompiler or Any
            Other frame if it is determined to be valid.

        Raises
        ------
        ValueError
            If `other` is `Series` and its length is different from
            length of `self` `axis`.
        TypeError
            If any validation checks fail.
        """
        if isinstance(other, BasePandasDataset):
            return other._query_compiler
        if not is_list_like(other):
            # We skip dtype checking if the other is a scalar. Note that pandas
            # is_scalar can be misleading as it is False for almost all objects,
            # even when those objects should be treated as scalars. See e.g.
            # https://github.com/modin-project/modin/issues/5236. Therefore, we
            # detect scalars by checking that `other` is neither a list-like nor
            # another BasePandasDataset.
            return other
        axis = self._get_axis_number(axis) if axis is not None else 1
        result = other
        if axis == 0:
            if len(other) != len(self._query_compiler.index):
                raise ValueError(
                    f"Unable to coerce to Series, length must be {len(self._query_compiler.index)}: "
                    + f"given {len(other)}"
                )
        else:
            if len(other) != len(self._query_compiler.columns):
                raise ValueError(
                    f"Unable to coerce to Series, length must be {len(self._query_compiler.columns)}: "
                    + f"given {len(other)}"
                )
        if hasattr(other, "dtype"):
            other_dtypes = [other.dtype] * len(other)
        elif is_dict_like(other):
            other_dtypes = [
                other[label] if pandas.isna(other[label]) else type(other[label])
                for label in self._get_axis(axis)
                # The binary operation is applied for intersection of axis labels
                # and dictionary keys. So filtering out extra keys.
                if label in other
            ]
        else:
            other_dtypes = [x if pandas.isna(x) else type(x) for x in other]
        if compare_index:
            if not self.index.equals(other.index):
                raise TypeError("Cannot perform operation with non-equal index")
        # Do dtype checking.
        if dtype_check:
            self_dtypes = self._get_dtypes()
            if is_dict_like(other):
                # The binary operation is applied for the intersection of axis labels
                # and dictionary keys. So filtering `self_dtypes` to match the `other`
                # dictionary.
                self_dtypes = [
                    dtype
                    for label, dtype in zip(self._get_axis(axis), self._get_dtypes())
                    if label in other
                ]

            # TODO(https://github.com/modin-project/modin/issues/5239):
            # this spuriously rejects other that is a list including some
            # custom type that can be added to self's elements.
            for self_dtype, other_dtype in zip(self_dtypes, other_dtypes):
                if not (
                    (is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype))
                    or (is_numeric_dtype(self_dtype) and pandas.isna(other_dtype))
                    or (is_object_dtype(self_dtype) and is_object_dtype(other_dtype))
                    or (
                        lib.is_np_dtype(self_dtype, "mM")
                        and lib.is_np_dtype(self_dtype, "mM")
                    )
                    or is_dtype_equal(self_dtype, other_dtype)
                ):
                    raise TypeError("Cannot do operation with improper dtypes")
        return result

    def _validate_function(self, func, on_invalid=None) -> None:
        """
        Check the validity of the function which is intended to be applied to the frame.

        Parameters
        ----------
        func : object
        on_invalid : callable(str, cls), optional
            Function to call in case invalid `func` is met, `on_invalid` takes an error
            message and an exception type as arguments. If not specified raise an
            appropriate exception.
            **Note:** This parameter is a hack to concord with pandas error types.
        """

        def error_raiser(msg, exception=Exception):
            raise exception(msg)

        if on_invalid is None:
            on_invalid = error_raiser

        if isinstance(func, dict):
            [self._validate_function(fn, on_invalid) for fn in func.values()]
            return
            # We also could validate this, but it may be quite expensive for lazy-frames
            # if not all(idx in self._get_axis(axis) for idx in func.keys()):
            #     error_raiser("Invalid dict keys", KeyError)

        if not is_list_like(func):
            func = [func]

        for fn in func:
            if isinstance(fn, str):
                if not (hasattr(self, fn) or hasattr(np, fn)):
                    on_invalid(
                        f"'{fn}' is not a valid function for '{type(self).__name__}' object",
                        AttributeError,
                    )
            elif not callable(fn):
                on_invalid(
                    f"One of the passed functions has an invalid type: {type(fn)}: {fn}, "
                    + "only callable or string is acceptable.",
                    TypeError,
                )

    def _binary_op(self, op, other, **kwargs) -> Self:
        """
        Do binary operation between two datasets.

        Parameters
        ----------
        op : str
            Name of binary operation.
        other : modin.pandas.BasePandasDataset
            Second operand of binary operation.
        **kwargs : dict
            Additional parameters to binary operation.

        Returns
        -------
        modin.pandas.BasePandasDataset
            Result of binary operation.
        """
        # _axis indicates the operator will use the default axis
        if kwargs.pop("_axis", None) is None:
            if kwargs.get("axis", None) is not None:
                kwargs["axis"] = axis = self._get_axis_number(kwargs.get("axis", None))
            else:
                kwargs["axis"] = axis = 1
        else:
            axis = 0
        if kwargs.get("level", None) is not None:
            # Broadcast is an internally used argument
            kwargs.pop("broadcast", None)
            return self._default_to_pandas(
                getattr(self._pandas_class, op), other, **kwargs
            )
        other = self._validate_other(other, axis, dtype_check=True)
        exclude_list = [
            "__add__",
            "__radd__",
            "__and__",
            "__rand__",
            "__or__",
            "__ror__",
            "__xor__",
            "__rxor__",
        ]
        if op in exclude_list:
            kwargs.pop("axis")
        # Series logical operations take an additional fill_value argument that DF does not
        series_specialize_list = [
            "eq",
            "ge",
            "gt",
            "le",
            "lt",
            "ne",
        ]
        if not self._is_dataframe and op in series_specialize_list:
            op = "series_" + op
        new_query_compiler = getattr(self._query_compiler, op)(other, **kwargs)
        return self._create_or_update_from_compiler(new_query_compiler)

    def _default_to_pandas(self, op, *args, reason: str = None, **kwargs):
        """
        Convert dataset to pandas type and call a pandas function on it.

        Parameters
        ----------
        op : str
            Name of pandas function.
        *args : list
            Additional positional arguments to be passed to `op`.
        reason : str, optional
        **kwargs : dict
            Additional keywords arguments to be passed to `op`.

        Returns
        -------
        object
            Result of operation.
        """
        empty_self_str = "" if not self.empty else " for empty DataFrame"
        self._query_compiler._maybe_warn_on_default(
            message="`{}.{}`{}".format(
                type(self).__name__,
                op if isinstance(op, str) else op.__name__,
                empty_self_str,
            ),
            reason=reason,
        )

        args = try_cast_to_pandas(args)
        kwargs = try_cast_to_pandas(kwargs)
        pandas_obj = self._to_pandas()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            if callable(op):
                result = op(pandas_obj, *args, **kwargs)
            elif isinstance(op, str):
                # The inner `getattr` is ensuring that we are treating this object (whether
                # it is a DataFrame, Series, etc.) as a pandas object. The outer `getattr`
                # will get the operation (`op`) from the pandas version of the class and run
                # it on the object after we have converted it to pandas.
                attr = getattr(self._pandas_class, op)
                if isinstance(attr, property):
                    result = getattr(pandas_obj, op)
                else:
                    result = attr(pandas_obj, *args, **kwargs)
            else:
                ErrorMessage.catch_bugs_and_request_email(
                    failure_condition=True,
                    extra_log="{} is an unsupported operation".format(op),
                )
        if isinstance(result, pandas.DataFrame):
            from .dataframe import DataFrame

            return DataFrame(result)
        elif isinstance(result, pandas.Series):
            from .series import Series

            return Series(result)
        # inplace
        elif result is None:
            return self._create_or_update_from_compiler(
                getattr(pd, type(pandas_obj).__name__)(pandas_obj)._query_compiler,
                inplace=True,
            )
        else:
            try:
                if (
                    isinstance(result, (list, tuple))
                    and len(result) == 2
                    and isinstance(result[0], pandas.DataFrame)
                ):
                    # Some operations split the DataFrame into two (e.g. align). We need to wrap
                    # both of the returned results
                    if isinstance(result[1], pandas.DataFrame):
                        second = self.__constructor__(result[1])
                    else:
                        second = result[1]
                    return self.__constructor__(result[0]), second
                else:
                    return result
            except TypeError:
                return result

    @classmethod
    def _get_axis_number(cls, axis) -> int:
        """
        Convert axis name or number to axis index.

        Parameters
        ----------
        axis : int, str or pandas._libs.lib.NoDefault
            Axis name ('index' or 'columns') or number to be converted to axis index.

        Returns
        -------
        int
            0 or 1 - axis index in the array of axes stored in the dataframe.
        """
        if axis is lib.no_default:
            axis = None

        return cls._pandas_class._get_axis_number(axis) if axis is not None else 0

    @cached_property
    def __constructor__(self) -> type[Self]:
        """
        Construct DataFrame or Series object depending on self type.

        Returns
        -------
        modin.pandas.BasePandasDataset
            Constructed object.
        """
        return type(self)

    def abs(self) -> Self:  # noqa: RT01, D200
        """
        Return a `BasePandasDataset` with absolute numeric value of each element.
        """
        self._validate_dtypes(numeric_only=True)
        return self.__constructor__(query_compiler=self._query_compiler.abs())

    def _set_index(self, new_index) -> None:
        """
        Set the index for this DataFrame.

        Parameters
        ----------
        new_index : pandas.Index
            The new index to set this.
        """
        self._query_compiler.index = new_index

    def _get_index(self) -> pandas.Index:
        """
        Get the index for this DataFrame.

        Returns
        -------
        pandas.Index
            The union of all indexes across the partitions.
        """
        return self._query_compiler.index

    index: pandas.Index = property(_get_index, _set_index)

    def _get_axis(self, axis) -> pandas.Index:
        """
        Return index labels of the specified axis.

        Parameters
        ----------
        axis : {0, 1}
            Axis to return labels on.
            0 is for index, when 1 is for columns.

        Returns
        -------
        pandas.Index
        """
        return self.index if axis == 0 else self.columns

    def add(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `add`).
        """
        return self._binary_op(
            "add", other, axis=axis, level=level, fill_value=fill_value
        )

    def aggregate(
        self, func=None, axis=0, *args, **kwargs
    ) -> DataFrame | Series | Scalar:  # noqa: PR01, RT01, D200
        """
        Aggregate using one or more operations over the specified axis.
        """
        axis = self._get_axis_number(axis)
        result = None

        if axis == 0:
            result = self._aggregate(func, _axis=axis, *args, **kwargs)
        # TODO: handle case when axis == 1
        if result is None:
            kwargs.pop("is_transform", None)
            return self.apply(func, axis=axis, args=args, **kwargs)
        return result

    agg: DataFrame | Series | Scalar = aggregate

    def _aggregate(self, func, *args, **kwargs):
        """
        Aggregate using one or more operations over index axis.

        Parameters
        ----------
        func : function, str, list or dict
            Function to use for aggregating the data.
        *args : list
            Positional arguments to pass to func.
        **kwargs : dict
            Keyword arguments to pass to func.

        Returns
        -------
        scalar or BasePandasDataset

        See Also
        --------
        aggregate : Aggregate along any axis.
        """
        _axis = kwargs.pop("_axis", 0)
        kwargs.pop("_level", None)

        if isinstance(func, str):
            kwargs.pop("is_transform", None)
            return self._string_function(func, *args, **kwargs)

        # Dictionaries have complex behavior because they can be renamed here.
        elif func is None or isinstance(func, dict):
            return self._default_to_pandas("agg", func, *args, **kwargs)
        kwargs.pop("is_transform", None)
        return self.apply(func, axis=_axis, args=args, **kwargs)

    def _string_function(self, func, *args, **kwargs):
        """
        Execute a function identified by its string name.

        Parameters
        ----------
        func : str
            Function name to call on `self`.
        *args : list
            Positional arguments to pass to func.
        **kwargs : dict
            Keyword arguments to pass to func.

        Returns
        -------
        object
            Function result.
        """
        assert isinstance(func, str)
        f = getattr(self, func, None)
        if f is not None:
            if callable(f):
                return f(*args, **kwargs)
            assert len(args) == 0
            assert (
                len([kwarg for kwarg in kwargs if kwarg not in ["axis", "_level"]]) == 0
            )
            return f
        f = getattr(np, func, None)
        if f is not None:
            return self._default_to_pandas("agg", func, *args, **kwargs)
        raise ValueError("{} is an unknown string function".format(func))

    def _get_dtypes(self) -> list:
        """
        Get dtypes as list.

        Returns
        -------
        list
            Either a one-element list that contains `dtype` if object denotes a Series
            or a list that contains `dtypes` if object denotes a DataFrame.
        """
        if hasattr(self, "dtype"):
            return [self.dtype]
        else:
            return list(self.dtypes)

    def align(
        self,
        other,
        join="outer",
        axis=None,
        level=None,
        copy=None,
        fill_value=None,
        method=lib.no_default,
        limit=lib.no_default,
        fill_axis=lib.no_default,
        broadcast_axis=lib.no_default,
    ) -> tuple[Self, Self]:  # noqa: PR01, RT01, D200
        """
        Align two objects on their axes with the specified join method.
        """
        if (
            method is not lib.no_default
            or limit is not lib.no_default
            or fill_axis is not lib.no_default
        ):
            warnings.warn(
                "The 'method', 'limit', and 'fill_axis' keywords in "
                + f"{type(self).__name__}.align are deprecated and will be removed "
                + "in a future version. Call fillna directly on the returned objects "
                + "instead.",
                FutureWarning,
            )
        if fill_axis is lib.no_default:
            fill_axis = 0
        if method is lib.no_default:
            method = None
        if limit is lib.no_default:
            limit = None

        if broadcast_axis is not lib.no_default:
            msg = (
                f"The 'broadcast_axis' keyword in {type(self).__name__}.align is "
                + "deprecated and will be removed in a future version."
            )
            if broadcast_axis is not None:
                if self.ndim == 1 and other.ndim == 2:
                    msg += (
                        " Use left = DataFrame({col: left for col in right.columns}, "
                        + "index=right.index) before calling `left.align(right)` instead."
                    )
                elif self.ndim == 2 and other.ndim == 1:
                    msg += (
                        " Use right = DataFrame({col: right for col in left.columns}, "
                        + "index=left.index) before calling `left.align(right)` instead"
                    )
            warnings.warn(msg, FutureWarning)
        else:
            broadcast_axis = None

        left, right = self._query_compiler.align(
            other._query_compiler,
            join=join,
            axis=axis,
            level=level,
            copy=copy,
            fill_value=fill_value,
            method=method,
            limit=limit,
            fill_axis=fill_axis,
            broadcast_axis=broadcast_axis,
        )
        return self.__constructor__(query_compiler=left), self.__constructor__(
            query_compiler=right
        )

    @abc.abstractmethod
    def _reduce_dimension(self, query_compiler: BaseQueryCompiler) -> Series | Scalar:
        """
        Reduce the dimension of data from the `query_compiler`.

        Parameters
        ----------
        query_compiler : BaseQueryCompiler
            Query compiler to retrieve the data.

        Returns
        -------
        Series | Scalar
        """
        pass

    def all(
        self, axis=0, bool_only=False, skipna=True, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return whether all elements are True, potentially over an axis.
        """
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        if axis is not None:
            axis = self._get_axis_number(axis)
            if bool_only and axis == 0:
                if hasattr(self, "dtype"):
                    raise NotImplementedError(
                        "{}.{} does not implement numeric_only.".format(
                            type(self).__name__, "all"
                        )
                    )
                data_for_compute = self[self.columns[self.dtypes == np.bool_]]
                return data_for_compute.all(
                    axis=axis, bool_only=False, skipna=skipna, **kwargs
                )
            return self._reduce_dimension(
                self._query_compiler.all(
                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
                )
            )
        else:
            if bool_only:
                raise ValueError("Axis must be 0 or 1 (got {})".format(axis))
            # Reduce to a scalar if axis is None.
            result = self._reduce_dimension(
                # FIXME: Judging by pandas docs `**kwargs` serves only compatibility
                # purpose and does not affect the result, we shouldn't pass them to the query compiler.
                self._query_compiler.all(
                    axis=0,
                    bool_only=bool_only,
                    skipna=skipna,
                    **kwargs,
                )
            )
            if isinstance(result, BasePandasDataset):
                return result.all(
                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
                )
            return result

    def any(
        self, *, axis=0, bool_only=False, skipna=True, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return whether any element is True, potentially over an axis.
        """
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        if axis is not None:
            axis = self._get_axis_number(axis)
            if bool_only and axis == 0:
                if hasattr(self, "dtype"):
                    raise NotImplementedError(
                        "{}.{} does not implement numeric_only.".format(
                            type(self).__name__, "all"
                        )
                    )
                data_for_compute = self[self.columns[self.dtypes == np.bool_]]
                return data_for_compute.any(
                    axis=axis, bool_only=False, skipna=skipna, **kwargs
                )
            return self._reduce_dimension(
                self._query_compiler.any(
                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
                )
            )
        else:
            if bool_only:
                raise ValueError("Axis must be 0 or 1 (got {})".format(axis))
            # Reduce to a scalar if axis is None.
            result = self._reduce_dimension(
                self._query_compiler.any(
                    axis=0,
                    bool_only=bool_only,
                    skipna=skipna,
                    **kwargs,
                )
            )
            if isinstance(result, BasePandasDataset):
                return result.any(
                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
                )
            return result

    def apply(
        self,
        func,
        axis,
        raw,
        result_type,
        args,
        **kwds,
    ) -> BaseQueryCompiler:  # noqa: PR01, RT01, D200
        """
        Apply a function along an axis of the `BasePandasDataset`.
        """

        def error_raiser(msg, exception):
            """Convert passed exception to the same type as pandas do and raise it."""
            # HACK: to concord with pandas error types by replacing all of the
            # TypeErrors to the AssertionErrors
            exception = exception if exception is not TypeError else AssertionError
            raise exception(msg)

        self._validate_function(func, on_invalid=error_raiser)
        axis = self._get_axis_number(axis)
        if isinstance(func, str):
            # if axis != 1 function can be bounded to the Series, which doesn't
            # support axis parameter
            if axis == 1:
                kwds["axis"] = axis
            result = self._string_function(func, *args, **kwds)
            if isinstance(result, BasePandasDataset):
                return result._query_compiler
            return result
        elif isinstance(func, dict):
            if self._query_compiler.get_axis_len(1) != len(set(self.columns)):
                warnings.warn(
                    "duplicate column names not supported with apply().",
                    FutureWarning,
                    stacklevel=2,
                )
        query_compiler = self._query_compiler.apply(
            func,
            axis,
            args=args,
            raw=raw,
            result_type=result_type,
            **kwds,
        )
        return query_compiler

    def asfreq(
        self, freq, method=None, how=None, normalize=False, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Convert time series to specified frequency.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.asfreq(
                freq=freq,
                method=method,
                how=how,
                normalize=normalize,
                fill_value=fill_value,
            )
        )

    def asof(self, where, subset=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Return the last row(s) without any NaNs before `where`.
        """
        scalar = not is_list_like(where)
        if isinstance(where, pandas.Index):
            # Prevent accidental mutation of original:
            where = where.copy()
        else:
            if scalar:
                where = [where]
            where = pandas.Index(where)

        if subset is None:
            data = self
        else:
            # Only relevant for DataFrames:
            data = self[subset]
        no_na_index = data.dropna().index
        new_index = pandas.Index([no_na_index.asof(i) for i in where])
        result = self.reindex(new_index)
        result.index = where

        if scalar:
            # Need to return a Series:
            result = result.squeeze()
        return result

    def astype(
        self, dtype, copy=None, errors="raise"
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Cast a Modin object to a specified dtype `dtype`.
        """
        if copy is None:
            copy = True
        # dtype can be a series, a dict, or a scalar. If it's series,
        # convert it to a dict before passing it to the query compiler.
        if isinstance(dtype, (pd.Series, pandas.Series)):
            if not dtype.index.is_unique:
                raise ValueError("cannot reindex on an axis with duplicate labels")
            dtype = {column: dtype for column, dtype in dtype.items()}
        # If we got a series or dict originally, dtype is a dict now. Its keys
        # must be column names.
        if isinstance(dtype, dict):
            # avoid materializing columns in lazy mode. the query compiler
            # will handle errors where dtype dict includes keys that are not
            # in columns.
            if (
                not self._query_compiler.lazy_column_labels
                and not set(dtype.keys()).issubset(set(self._query_compiler.columns))
                and errors == "raise"
            ):
                raise KeyError(
                    "Only a column name can be used for the key in "
                    + "a dtype mappings argument."
                )

        if not copy:
            # If the new types match the old ones, then copying can be avoided
            if self._query_compiler.frame_has_materialized_dtypes:
                frame_dtypes = self._query_compiler.dtypes
                if isinstance(dtype, dict):
                    for col in dtype:
                        if dtype[col] != frame_dtypes[col]:
                            copy = True
                            break
                else:
                    if not (frame_dtypes == dtype).all():
                        copy = True
            else:
                copy = True

        if copy:
            new_query_compiler = self._query_compiler.astype(dtype, errors=errors)
            return self._create_or_update_from_compiler(new_query_compiler)
        return self

    @property
    def at(self, axis=None) -> _LocIndexer:  # noqa: PR01, RT01, D200
        """
        Get a single value for a row/column label pair.
        """
        from .indexing import _LocIndexer

        return _LocIndexer(self)

    def at_time(self, time, asof=False, axis=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Select values at particular time of day (e.g., 9:30AM).
        """
        if asof:
            # pandas raises NotImplementedError for asof=True, so we do, too.
            raise NotImplementedError("'asof' argument is not supported")
        return self.between_time(
            start_time=time, end_time=time, inclusive="both", axis=axis
        )

    @_inherit_docstrings(
        pandas.DataFrame.between_time, apilink="pandas.DataFrame.between_time"
    )
    def between_time(
        self,
        start_time,
        end_time,
        inclusive="both",
        axis=None,
    ) -> Self:  # noqa: PR01, RT01, D200
        return self._create_or_update_from_compiler(
            self._query_compiler.between_time(
                start_time=pandas.core.tools.times.to_time(start_time),
                end_time=pandas.core.tools.times.to_time(end_time),
                inclusive=inclusive,
                axis=self._get_axis_number(axis),
            )
        )

    def _deprecate_downcast(self, downcast, method_name: str):  # noqa: GL08
        if downcast is not lib.no_default:
            warnings.warn(
                f"The 'downcast' keyword in {method_name} is deprecated and "
                + "will be removed in a future version. Use "
                + "res.infer_objects(copy=False) to infer non-object dtype, or "
                + "pd.to_numeric with the 'downcast' keyword to downcast numeric "
                + "results.",
                FutureWarning,
            )
        else:
            downcast = None
        return downcast

    def bfill(
        self,
        *,
        axis=None,
        inplace=False,
        limit=None,
        limit_area=None,
        downcast=lib.no_default,
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Synonym for `DataFrame.fillna` with ``method='bfill'``.
        """
        if limit_area is not None:
            return self._default_to_pandas(
                "bfill",
                reason="'limit_area' parameter isn't supported",
                axis=axis,
                inplace=inplace,
                limit=limit,
                limit_area=limit_area,
                downcast=downcast,
            )
        downcast = self._deprecate_downcast(downcast, "bfill")
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore", ".*fillna with 'method' is deprecated", category=FutureWarning
            )
            return self.fillna(
                method="bfill",
                axis=axis,
                limit=limit,
                downcast=downcast,
                inplace=inplace,
            )

    def backfill(
        self, *, axis=None, inplace=False, limit=None, downcast=lib.no_default
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Synonym for `DataFrame.bfill`.
        """
        warnings.warn(
            "DataFrame.backfill/Series.backfill is deprecated. Use DataFrame.bfill/Series.bfill instead",
            FutureWarning,
        )
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            return self.bfill(
                axis=axis, inplace=inplace, limit=limit, downcast=downcast
            )

    def bool(self) -> bool:  # noqa: RT01, D200
        """
        Return the bool of a single element `BasePandasDataset`.
        """
        warnings.warn(
            f"{type(self).__name__}.bool is now deprecated and will be removed "
            + "in future version of pandas",
            FutureWarning,
        )
        shape = self.shape
        if shape != (1,) and shape != (1, 1):
            raise ValueError(
                """The PandasObject does not have exactly
                                1 element. Return the bool of a single
                                element PandasObject. The truth value is
                                ambiguous. Use a.empty, a.item(), a.any()
                                or a.all()."""
            )
        else:
            return self._to_pandas().bool()

    def clip(
        self, lower=None, upper=None, *, axis=None, inplace=False, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Trim values at input threshold(s).
        """
        # validate inputs
        if axis is not None:
            axis = self._get_axis_number(axis)
        self._validate_dtypes(numeric_only=True)
        inplace = validate_bool_kwarg(inplace, "inplace")
        axis = numpy_compat.function.validate_clip_with_axis(axis, (), kwargs)
        # any np.nan bounds are treated as None
        if lower is not None and np.any(np.isnan(lower)):
            lower = None
        if upper is not None and np.any(np.isnan(upper)):
            upper = None
        if is_list_like(lower) or is_list_like(upper):
            lower = self._validate_other(lower, axis)
            upper = self._validate_other(upper, axis)
        # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
        # purpose and does not affect the result, we shouldn't pass them to the query compiler.
        new_query_compiler = self._query_compiler.clip(
            lower=lower, upper=upper, axis=axis, **kwargs
        )
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def combine(
        self, other, func, fill_value=None, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Perform combination of `BasePandasDataset`-s according to `func`.
        """
        return self._binary_op(
            "combine", other, _axis=0, func=func, fill_value=fill_value, **kwargs
        )

    def combine_first(self, other) -> Self:  # noqa: PR01, RT01, D200
        """
        Update null elements with value in the same location in `other`.
        """
        return self._binary_op("combine_first", other, _axis=0)

    def copy(self, deep=True) -> Self:  # noqa: PR01, RT01, D200
        """
        Make a copy of the object's metadata.
        """
        if deep:
            return self.__constructor__(query_compiler=self._query_compiler.copy())
        new_obj = self.__constructor__(query_compiler=self._query_compiler)
        self._add_sibling(new_obj)
        return new_obj

    def count(
        self, axis=0, numeric_only=False
    ) -> Series | Scalar:  # noqa: PR01, RT01, D200
        """
        Count non-NA cells for `BasePandasDataset`.
        """
        axis = self._get_axis_number(axis)
        # select_dtypes is only implemented on DataFrames, but the numeric_only
        # flag will always be set to false by the Series frontend
        frame = self.select_dtypes([np.number, np.bool_]) if numeric_only else self

        return frame._reduce_dimension(
            frame._query_compiler.count(axis=axis, numeric_only=numeric_only)
        )

    def cummax(
        self, axis=None, skipna=True, *args, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return cumulative maximum over a `BasePandasDataset` axis.
        """
        axis = self._get_axis_number(axis)
        if axis == 1:
            self._validate_dtypes(numeric_only=True)
        return self.__constructor__(
            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
            # purpose and does not affect the result, we shouldn't pass them to the query compiler.
            query_compiler=self._query_compiler.cummax(
                fold_axis=axis, axis=axis, skipna=skipna, **kwargs
            )
        )

    def cummin(
        self, axis=None, skipna=True, *args, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return cumulative minimum over a `BasePandasDataset` axis.
        """
        axis = self._get_axis_number(axis)
        if axis == 1:
            self._validate_dtypes(numeric_only=True)
        return self.__constructor__(
            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
            # purpose and does not affect the result, we shouldn't pass them to the query compiler.
            query_compiler=self._query_compiler.cummin(
                fold_axis=axis, axis=axis, skipna=skipna, **kwargs
            )
        )

    def cumprod(
        self, axis=None, skipna=True, *args, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return cumulative product over a `BasePandasDataset` axis.
        """
        axis = self._get_axis_number(axis)
        self._validate_dtypes(numeric_only=True)
        return self.__constructor__(
            # FIXME: Judging by pandas docs `**kwargs` serves only compatibility
            # purpose and does not affect the result, we shouldn't pass them to the query compiler.
            query_compiler=self._query_compiler.cumprod(
                fold_axis=axis, axis=axis, skipna=skipna, **kwargs
            )
        )

    def cumsum(
        self, axis=None, skipna=True, *args, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return cumulative sum over a `BasePandasDataset` axis.
        """
        axis = self._get_axis_number(axis)
        self._validate_dtypes(numeric_only=True)
        return self.__constructor__(
            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
            # purpose and does not affect the result, we shouldn't pass them to the query compiler.
            query_compiler=self._query_compiler.cumsum(
                fold_axis=axis, axis=axis, skipna=skipna, **kwargs
            )
        )

    def describe(
        self,
        percentiles=None,
        include=None,
        exclude=None,
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Generate descriptive statistics.
        """
        # copied from pandas.core.describe.describe_ndframe
        percentiles = _refine_percentiles(percentiles)
        data = self
        if self._is_dataframe:
            # include/exclude arguments are ignored for Series
            if (include is None) and (exclude is None):
                # when some numerics are found, keep only numerics
                default_include: list[npt.DTypeLike] = [np.number]
                default_include.append("datetime")
                data = self.select_dtypes(include=default_include)
                if len(data.columns) == 0:
                    data = self
            elif include == "all":
                if exclude is not None:
                    msg = "exclude must be None when include is 'all'"
                    raise ValueError(msg)
                data = self
            else:
                data = self.select_dtypes(
                    include=include,
                    exclude=exclude,
                )
        if data.empty:
            # Match pandas error from concatenting empty list of series descriptions.
            raise ValueError("No objects to concatenate")
        return self.__constructor__(
            query_compiler=data._query_compiler.describe(percentiles=percentiles)
        )

    def diff(self, periods=1, axis=0) -> Self:  # noqa: PR01, RT01, D200
        """
        First discrete difference of element.
        """
        # Attempting to match pandas error behavior here
        if not isinstance(periods, int):
            raise ValueError(f"periods must be an int. got {type(periods)} instead")

        # Attempting to match pandas error behavior here
        for dtype in self._get_dtypes():
            if not (is_numeric_dtype(dtype) or lib.is_np_dtype(dtype, "mM")):
                raise TypeError(f"unsupported operand type for -: got {dtype}")

        axis = self._get_axis_number(axis)
        return self.__constructor__(
            query_compiler=self._query_compiler.diff(axis=axis, periods=periods)
        )

    def drop(
        self,
        labels=None,
        *,
        axis=0,
        index=None,
        columns=None,
        level=None,
        inplace=False,
        errors="raise",
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Drop specified labels from `BasePandasDataset`.
        """
        # TODO implement level
        if level is not None:
            return self._default_to_pandas(
                "drop",
                labels=labels,
                axis=axis,
                index=index,
                columns=columns,
                level=level,
                inplace=inplace,
                errors=errors,
            )

        inplace = validate_bool_kwarg(inplace, "inplace")
        if labels is not None:
            if index is not None or columns is not None:
                raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
            axis_name = pandas.DataFrame._get_axis_name(axis)
            axes = {axis_name: labels}
        elif index is not None or columns is not None:
            axes = {"index": index}
            if self.ndim == 2:
                axes["columns"] = columns
        else:
            raise ValueError(
                "Need to specify at least one of 'labels', 'index' or 'columns'"
            )

        for axis in ["index", "columns"]:
            if axis not in axes:
                axes[axis] = None
            elif axes[axis] is not None:
                if not is_list_like(axes[axis]):
                    axes[axis] = [axes[axis]]
                # In case of lazy execution we should bypass these error checking components
                # because they can force the materialization of the row or column labels.
                if (axis == "index" and self._query_compiler.lazy_row_labels) or (
                    axis == "columns" and self._query_compiler.lazy_column_labels
                ):
                    continue
                if errors == "raise":
                    non_existent = pandas.Index(axes[axis]).difference(
                        getattr(self, axis)
                    )
                    if len(non_existent):
                        raise KeyError(f"labels {non_existent} not contained in axis")
                else:
                    axes[axis] = [
                        obj for obj in axes[axis] if obj in getattr(self, axis)
                    ]
                    # If the length is zero, we will just do nothing
                    if not len(axes[axis]):
                        axes[axis] = None

        new_query_compiler = self._query_compiler.drop(
            index=axes["index"], columns=axes["columns"], errors=errors
        )
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def dropna(
        self,
        *,
        axis: Axis = 0,
        how: str | lib.NoDefault = lib.no_default,
        thresh: int | lib.NoDefault = lib.no_default,
        subset: IndexLabel = None,
        inplace: bool = False,
        ignore_index: bool = False,
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Remove missing values.
        """
        inplace = validate_bool_kwarg(inplace, "inplace")

        if is_list_like(axis):
            raise TypeError("supplying multiple axes to axis is no longer supported.")

        axis = self._get_axis_number(axis)
        if how is not None and how not in ["any", "all", lib.no_default]:
            raise ValueError("invalid how option: %s" % how)
        if how is None and thresh is None:
            raise TypeError("must specify how or thresh")
        if subset is not None:
            if axis == 1:
                indices = self.index.get_indexer_for(subset)
                check = indices == -1
                if check.any():
                    raise KeyError(list(np.compress(check, subset)))
            else:
                indices = self.columns.get_indexer_for(subset)
                check = indices == -1
                if check.any():
                    raise KeyError(list(np.compress(check, subset)))
        new_query_compiler = self._query_compiler.dropna(
            axis=axis, how=how, thresh=thresh, subset=subset
        )
        if ignore_index:
            new_query_compiler.index = pandas.RangeIndex(
                stop=len(new_query_compiler.index)
            )
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def droplevel(self, level, axis=0) -> Self:  # noqa: PR01, RT01, D200
        """
        Return `BasePandasDataset` with requested index / column level(s) removed.
        """
        axis = self._get_axis_number(axis)
        result = self.copy()
        if axis == 0:
            index_columns = result.index.names.copy()
            if is_integer(level):
                level = index_columns[level]
            elif is_list_like(level):
                level = [
                    index_columns[lev] if is_integer(lev) else lev for lev in level
                ]
            if is_list_like(level):
                for lev in level:
                    index_columns.remove(lev)
            else:
                index_columns.remove(level)
            if len(result.columns.names) > 1:
                # In this case, we are dealing with a MultiIndex column, so we need to
                # be careful when dropping the additional index column.
                if is_list_like(level):
                    drop_labels = [(lev, "") for lev in level]
                else:
                    drop_labels = [(level, "")]
                result = result.reset_index().drop(columns=drop_labels)
            else:
                result = result.reset_index().drop(columns=level)
            result = result.set_index(index_columns)
        else:
            result.columns = self.columns.droplevel(level)
        return result

    def drop_duplicates(
        self, keep="first", inplace=False, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return `BasePandasDataset` with duplicate rows removed.
        """
        inplace = validate_bool_kwarg(inplace, "inplace")
        ignore_index = kwargs.get("ignore_index", False)
        subset = kwargs.get("subset", None)
        if subset is not None:
            if is_list_like(subset):
                if not isinstance(subset, list):
                    subset = list(subset)
            else:
                subset = [subset]
            if len(diff := pandas.Index(subset).difference(self.columns)) > 0:
                raise KeyError(diff)
        result_qc = self._query_compiler.unique(
            keep=keep, ignore_index=ignore_index, subset=subset
        )
        result = self.__constructor__(query_compiler=result_qc)
        if inplace:
            self._update_inplace(result._query_compiler)
        else:
            return result

    def eq(self, other, axis="columns", level=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Get equality of `BasePandasDataset` and `other`, element-wise (binary operator `eq`).
        """
        return self._binary_op("eq", other, axis=axis, level=level, dtypes=np.bool_)

    def explode(
        self, column, ignore_index: bool = False
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Transform each element of a list-like to a row.
        """
        exploded = self.__constructor__(
            query_compiler=self._query_compiler.explode(column)
        )
        if ignore_index:
            exploded = exploded.reset_index(drop=True)
        return exploded

    def ewm(
        self,
        com: float | None = None,
        span: float | None = None,
        halflife: float | TimedeltaConvertibleTypes | None = None,
        alpha: float | None = None,
        min_periods: int | None = 0,
        adjust: bool = True,
        ignore_na: bool = False,
        axis: Axis = lib.no_default,
        times: str | np.ndarray | BasePandasDataset | None = None,
        method: str = "single",
    ) -> pandas.core.window.ewm.ExponentialMovingWindow:  # noqa: PR01, RT01, D200
        """
        Provide exponentially weighted (EW) calculations.
        """
        return self._default_to_pandas(
            "ewm",
            com=com,
            span=span,
            halflife=halflife,
            alpha=alpha,
            min_periods=min_periods,
            adjust=adjust,
            ignore_na=ignore_na,
            axis=axis,
            times=times,
            method=method,
        )

    def expanding(
        self, min_periods=1, axis=lib.no_default, method="single"
    ) -> Expanding:  # noqa: PR01, RT01, D200
        """
        Provide expanding window calculations.
        """
        from .window import Expanding

        if axis is not lib.no_default:
            axis = self._get_axis_number(axis)
            name = "expanding"
            if axis == 1:
                warnings.warn(
                    f"Support for axis=1 in {type(self).__name__}.{name} is "
                    + "deprecated and will be removed in a future version. "
                    + f"Use obj.T.{name}(...) instead",
                    FutureWarning,
                )
            else:
                warnings.warn(
                    f"The 'axis' keyword in {type(self).__name__}.{name} is "
                    + "deprecated and will be removed in a future version. "
                    + "Call the method without the axis keyword instead.",
                    FutureWarning,
                )
        else:
            axis = 0

        return Expanding(
            self,
            min_periods=min_periods,
            axis=axis,
            method=method,
        )

    def ffill(
        self,
        *,
        axis=None,
        inplace=False,
        limit=None,
        limit_area=None,
        downcast=lib.no_default,
    ) -> Self | None:  # noqa: PR01, RT01, D200
        """
        Synonym for `DataFrame.fillna` with ``method='ffill'``.
        """
        if limit_area is not None:
            return self._default_to_pandas(
                "ffill",
                reason="'limit_area' parameter isn't supported",
                axis=axis,
                inplace=inplace,
                limit=limit,
                limit_area=limit_area,
                downcast=downcast,
            )
        downcast = self._deprecate_downcast(downcast, "ffill")
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore", ".*fillna with 'method' is deprecated", category=FutureWarning
            )
            return self.fillna(
                method="ffill",
                axis=axis,
                limit=limit,
                downcast=downcast,
                inplace=inplace,
            )

    def pad(
        self, *, axis=None, inplace=False, limit=None, downcast=lib.no_default
    ) -> Self | None:  # noqa: PR01, RT01, D200
        """
        Synonym for `DataFrame.ffill`.
        """
        warnings.warn(
            "DataFrame.pad/Series.pad is deprecated. Use DataFrame.ffill/Series.ffill instead",
            FutureWarning,
        )
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=FutureWarning)
            return self.ffill(
                axis=axis, inplace=inplace, limit=limit, downcast=downcast
            )

    def fillna(
        self,
        squeeze_self,
        squeeze_value,
        value=None,
        method=None,
        axis=None,
        inplace=False,
        limit=None,
        downcast=lib.no_default,
    ) -> Self | None:
        """
        Fill NA/NaN values using the specified method.

        Parameters
        ----------
        squeeze_self : bool
            If True then self contains a Series object, if False then self contains
            a DataFrame object.
        squeeze_value : bool
            If True then value contains a Series object, if False then value contains
            a DataFrame object.
        value : scalar, dict, Series, or DataFrame, default: None
            Value to use to fill holes (e.g. 0), alternately a
            dict/Series/DataFrame of values specifying which value to use for
            each index (for a Series) or column (for a DataFrame).  Values not
            in the dict/Series/DataFrame will not be filled. This value cannot
            be a list.
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default: None
            Method to use for filling holes in reindexed Series
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use next valid observation to fill gap.
        axis : {None, 0, 1}, default: None
            Axis along which to fill missing values.
        inplace : bool, default: False
            If True, fill in-place. Note: this will modify any
            other views on this object (e.g., a no-copy slice for a column in a
            DataFrame).
        limit : int, default: None
            If method is specified, this is the maximum number of consecutive
            NaN values to forward/backward fill. In other words, if there is
            a gap with more than this number of consecutive NaNs, it will only
            be partially filled. If method is not specified, this is the
            maximum number of entries along the entire axis where NaNs will be
            filled. Must be greater than 0 if not None.
        downcast : dict, default: None
            A dict of item->dtype of what to downcast if possible,
            or the string 'infer' which will try to downcast to an appropriate
            equal type (e.g. float64 to int64 if possible).

        Returns
        -------
        Series, DataFrame or None
            Object with missing values filled or None if ``inplace=True``.
        """
        if method is not None:
            warnings.warn(
                f"{type(self).__name__}.fillna with 'method' is deprecated and "
                + "will raise in a future version. Use obj.ffill() or obj.bfill() "
                + "instead.",
                FutureWarning,
            )
        downcast = self._deprecate_downcast(downcast, "fillna")
        inplace = validate_bool_kwarg(inplace, "inplace")
        axis = self._get_axis_number(axis)
        if isinstance(value, (list, tuple)):
            raise TypeError(
                '"value" parameter must be a scalar or dict, but '
                + f'you passed a "{type(value).__name__}"'
            )
        if value is None and method is None:
            raise ValueError("must specify a fill method or value")
        if value is not None and method is not None:
            raise ValueError("cannot specify both a fill method and value")
        if method is not None and method not in ["backfill", "bfill", "pad", "ffill"]:
            expecting = "pad (ffill) or backfill (bfill)"
            msg = "Invalid fill method. Expecting {expecting}. Got {method}".format(
                expecting=expecting, method=method
            )
            raise ValueError(msg)
        if limit is not None:
            if not isinstance(limit, int):
                raise ValueError("Limit must be an integer")
            elif limit <= 0:
                raise ValueError("Limit must be greater than 0")

        if isinstance(value, BasePandasDataset):
            value = value._query_compiler

        new_query_compiler = self._query_compiler.fillna(
            squeeze_self=squeeze_self,
            squeeze_value=squeeze_value,
            value=value,
            method=method,
            axis=axis,
            inplace=False,
            limit=limit,
            downcast=downcast,
        )
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def filter(
        self, items=None, like=None, regex=None, axis=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Subset the `BasePandasDataset` rows or columns according to the specified index labels.
        """
        nkw = count_not_none(items, like, regex)
        if nkw > 1:
            raise TypeError(
                "Keyword arguments `items`, `like`, or `regex` are mutually exclusive"
            )
        if nkw == 0:
            raise TypeError("Must pass either `items`, `like`, or `regex`")
        if axis is None:
            axis = "columns"  # This is the default info axis for dataframes

        axis = self._get_axis_number(axis)
        labels = self.columns if axis else self.index

        if items is not None:
            bool_arr = labels.isin(items)
        elif like is not None:

            def f(x):
                return like in str(x)

            bool_arr = labels.map(f).tolist()
        else:

            def f(x):
                return matcher.search(str(x)) is not None

            matcher = re.compile(regex)
            bool_arr = labels.map(f).tolist()
        if not axis:
            return self[bool_arr]
        return self[self.columns[bool_arr]]

    def first(self, offset) -> Self | None:  # noqa: PR01, RT01, D200
        """
        Select initial periods of time series data based on a date offset.
        """
        warnings.warn(
            "first is deprecated and will be removed in a future version. "
            + "Please create a mask and filter using `.loc` instead",
            FutureWarning,
        )
        return self._create_or_update_from_compiler(
            self._query_compiler.first(offset=to_offset(offset))
        )

    def first_valid_index(self) -> int:  # noqa: RT01, D200
        """
        Return index for first non-NA value or None, if no non-NA value is found.
        """
        return self._query_compiler.first_valid_index()

    def floordiv(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `floordiv`).
        """
        return self._binary_op(
            "floordiv", other, axis=axis, level=level, fill_value=fill_value
        )

    def ge(self, other, axis="columns", level=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Get greater than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ge`).
        """
        return self._binary_op("ge", other, axis=axis, level=level, dtypes=np.bool_)

    def get(
        self, key, default=None
    ) -> DataFrame | Series | Scalar:  # noqa: PR01, RT01, D200
        """
        Get item from object for given key.
        """
        # Match pandas behavior here
        try:
            return self.__getitem__(key)
        except (KeyError, ValueError, IndexError):
            return default

    def gt(self, other, axis="columns", level=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Get greater than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `gt`).
        """
        return self._binary_op("gt", other, axis=axis, level=level, dtypes=np.bool_)

    def head(self, n=5) -> Self:  # noqa: PR01, RT01, D200
        """
        Return the first `n` rows.
        """
        return self.iloc[:n]

    @property
    def iat(self, axis=None) -> _iLocIndexer:  # noqa: PR01, RT01, D200
        """
        Get a single value for a row/column pair by integer position.
        """
        from .indexing import _iLocIndexer

        return _iLocIndexer(self)

    def idxmax(
        self, axis=0, skipna=True, numeric_only=False
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return index of first occurrence of maximum over requested axis.
        """
        axis = self._get_axis_number(axis)
        return self._reduce_dimension(
            self._query_compiler.idxmax(
                axis=axis, skipna=skipna, numeric_only=numeric_only
            )
        )

    def idxmin(
        self, axis=0, skipna=True, numeric_only=False
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return index of first occurrence of minimum over requested axis.
        """
        axis = self._get_axis_number(axis)
        return self._reduce_dimension(
            self._query_compiler.idxmin(
                axis=axis, skipna=skipna, numeric_only=numeric_only
            )
        )

    def infer_objects(self, copy=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Attempt to infer better dtypes for object columns.
        """
        new_query_compiler = self._query_compiler.infer_objects()
        return self._create_or_update_from_compiler(
            new_query_compiler, inplace=False if copy is None else not copy
        )

    def convert_dtypes(
        self,
        infer_objects: bool = True,
        convert_string: bool = True,
        convert_integer: bool = True,
        convert_boolean: bool = True,
        convert_floating: bool = True,
        dtype_backend: DtypeBackend = "numpy_nullable",
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.convert_dtypes(
                infer_objects=infer_objects,
                convert_string=convert_string,
                convert_integer=convert_integer,
                convert_boolean=convert_boolean,
                convert_floating=convert_floating,
                dtype_backend=dtype_backend,
            )
        )

    def isin(self, values) -> Self:  # noqa: PR01, RT01, D200
        """
        Whether elements in `BasePandasDataset` are contained in `values`.
        """
        from .series import Series

        ignore_indices = isinstance(values, Series)
        values = getattr(values, "_query_compiler", values)
        return self.__constructor__(
            query_compiler=self._query_compiler.isin(
                values=values, ignore_indices=ignore_indices
            )
        )

    def isna(self) -> Self:  # noqa: RT01, D200
        """
        Detect missing values.
        """
        return self.__constructor__(query_compiler=self._query_compiler.isna())

    isnull: Self = isna

    @property
    def iloc(self) -> _iLocIndexer:  # noqa: RT01, D200
        """
        Purely integer-location based indexing for selection by position.
        """
        from .indexing import _iLocIndexer

        return _iLocIndexer(self)

    @_inherit_docstrings(pandas.DataFrame.kurt, apilink="pandas.DataFrame.kurt")
    def kurt(self, axis=0, skipna=True, numeric_only=False, **kwargs) -> Series | float:
        return self._stat_operation("kurt", axis, skipna, numeric_only, **kwargs)

    kurtosis: Series | float = kurt

    def last(self, offset) -> Self:  # noqa: PR01, RT01, D200
        """
        Select final periods of time series data based on a date offset.
        """
        warnings.warn(
            "last is deprecated and will be removed in a future version. "
            + "Please create a mask and filter using `.loc` instead",
            FutureWarning,
        )

        return self._create_or_update_from_compiler(
            self._query_compiler.last(offset=to_offset(offset))
        )

    def last_valid_index(self) -> int:  # noqa: RT01, D200
        """
        Return index for last non-NA value or None, if no non-NA value is found.
        """
        return self._query_compiler.last_valid_index()

    def le(self, other, axis="columns", level=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Get less than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `le`).
        """
        return self._binary_op("le", other, axis=axis, level=level, dtypes=np.bool_)

    def lt(self, other, axis="columns", level=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Get less than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `lt`).
        """
        return self._binary_op("lt", other, axis=axis, level=level, dtypes=np.bool_)

    @property
    def loc(self) -> _LocIndexer:  # noqa: RT01, D200
        """
        Get a group of rows and columns by label(s) or a boolean array.
        """
        from .indexing import _LocIndexer

        return _LocIndexer(self)

    def mask(
        self,
        cond,
        other=lib.no_default,
        *,
        inplace: bool = False,
        axis: Optional[Axis] = None,
        level: Optional[Level] = None,
    ) -> Self | None:  # noqa: PR01, RT01, D200
        """
        Replace values where the condition is True.
        """
        return self._create_or_update_from_compiler(
            self._query_compiler.mask(
                cond,
                other=other,
                inplace=False,
                axis=axis,
                level=level,
            ),
            inplace=inplace,
        )

    def max(
        self,
        axis: Axis = 0,
        skipna=True,
        numeric_only=False,
        **kwargs,
    ) -> Series | None:  # noqa: PR01, RT01, D200
        """
        Return the maximum of the values over the requested axis.
        """
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        orig_axis = axis
        axis = self._get_axis_number(axis)
        data = self._validate_dtypes_min_max(axis, numeric_only)
        res = data._reduce_dimension(
            data._query_compiler.max(
                axis=axis,
                skipna=skipna,
                numeric_only=numeric_only,
                **kwargs,
            )
        )
        if orig_axis is None:
            res = res._reduce_dimension(
                res._query_compiler.max(
                    axis=0,
                    skipna=skipna,
                    numeric_only=False,
                    **kwargs,
                )
            )
        return res

    def min(
        self,
        axis: Axis = 0,
        skipna: bool = True,
        numeric_only=False,
        **kwargs,
    ) -> Series | None:  # noqa: PR01, RT01, D200
        """
        Return the minimum of the values over the requested axis.
        """
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        orig_axis = axis
        axis = self._get_axis_number(axis)
        data = self._validate_dtypes_min_max(axis, numeric_only)
        res = data._reduce_dimension(
            data._query_compiler.min(
                axis=axis,
                skipna=skipna,
                numeric_only=numeric_only,
                **kwargs,
            )
        )
        if orig_axis is None:
            res = res._reduce_dimension(
                res._query_compiler.min(
                    axis=0,
                    skipna=skipna,
                    numeric_only=False,
                    **kwargs,
                )
            )
        return res

    def _stat_operation(
        self,
        op_name: str,
        axis: Optional[Union[int, str]],
        skipna: bool,
        numeric_only: Optional[bool] = False,
        **kwargs,
    ):
        """
        Do common statistic reduce operations under frame.

        Parameters
        ----------
        op_name : str
            Name of method to apply.
        axis : int or str
            Axis to apply method on.
        skipna : bool
            Exclude NA/null values when computing the result.
        numeric_only : bool, default: False
            Include only float, int, boolean columns. If None, will attempt
            to use everything, then use only numeric data.
        **kwargs : dict
            Additional keyword arguments to pass to `op_name`.

        Returns
        -------
        scalar, Series or DataFrame
            `scalar` - self is Series and level is not specified.
            `Series` - self is Series and level is specified, or
                self is DataFrame and level is not specified.
            `DataFrame` - self is DataFrame and level is specified.
        """
        axis = self._get_axis_number(axis) if axis is not None else None
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        if op_name == "median":
            numpy_compat.function.validate_median((), kwargs)
        elif op_name in ("sem", "var", "std"):
            val_kwargs = {k: v for k, v in kwargs.items() if k != "ddof"}
            numpy_compat.function.validate_stat_ddof_func((), val_kwargs, fname=op_name)
        else:
            numpy_compat.function.validate_stat_func((), kwargs, fname=op_name)

        if not numeric_only:
            self._validate_dtypes(numeric_only=True)

        data = (
            self._get_numeric_data(axis if axis is not None else 0)
            if numeric_only
            else self
        )
        result_qc = getattr(data._query_compiler, op_name)(
            axis=axis,
            skipna=skipna,
            numeric_only=numeric_only,
            **kwargs,
        )
        return (
            self._reduce_dimension(result_qc)
            if isinstance(result_qc, type(self._query_compiler))
            # scalar case
            else result_qc
        )

    def memory_usage(
        self, index=True, deep=False
    ) -> Series | None:  # noqa: PR01, RT01, D200
        """
        Return the memory usage of the `BasePandasDataset`.
        """
        return self._reduce_dimension(
            self._query_compiler.memory_usage(index=index, deep=deep)
        )

    def mod(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `mod`).
        """
        return self._binary_op(
            "mod", other, axis=axis, level=level, fill_value=fill_value
        )

    def mode(
        self, axis=0, numeric_only=False, dropna=True
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get the mode(s) of each element along the selected axis.
        """
        axis = self._get_axis_number(axis)
        return self.__constructor__(
            query_compiler=self._query_compiler.mode(
                axis=axis, numeric_only=numeric_only, dropna=dropna
            )
        )

    def mul(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get multiplication of `BasePandasDataset` and `other`, element-wise (binary operator `mul`).
        """
        return self._binary_op(
            "mul", other, axis=axis, level=level, fill_value=fill_value
        )

    multiply: Self = mul

    def ne(self, other, axis="columns", level=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Get Not equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ne`).
        """
        return self._binary_op("ne", other, axis=axis, level=level, dtypes=np.bool_)

    def notna(self) -> Self:  # noqa: RT01, D200
        """
        Detect existing (non-missing) values.
        """
        return self.__constructor__(query_compiler=self._query_compiler.notna())

    notnull: Self = notna

    def nunique(self, axis=0, dropna=True) -> Series | int:  # noqa: PR01, RT01, D200
        """
        Return number of unique elements in the `BasePandasDataset`.
        """
        axis = self._get_axis_number(axis)
        return self._reduce_dimension(
            self._query_compiler.nunique(axis=axis, dropna=dropna)
        )

    def pct_change(
        self,
        periods=1,
        fill_method=lib.no_default,
        limit=lib.no_default,
        freq=None,
        **kwargs,
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Percentage change between the current and a prior element.
        """
        if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
            warnings.warn(
                "The 'fill_method' keyword being not None and the 'limit' keyword in "
                + f"{type(self).__name__}.pct_change are deprecated and will be removed "
                + "in a future version. Either fill in any non-leading NA values prior "
                + "to calling pct_change or specify 'fill_method=None' to not fill NA "
                + "values.",
                FutureWarning,
            )
        if fill_method is lib.no_default:
            if self.isna().values.any():
                warnings.warn(
                    "The default fill_method='pad' in "
                    + f"{type(self).__name__}.pct_change is deprecated and will be "
                    + "removed in a future version. Call ffill before calling "
                    + "pct_change to retain current behavior and silence this warning.",
                    FutureWarning,
                )
            fill_method = "pad"
        if limit is lib.no_default:
            limit = None

        # Attempting to match pandas error behavior here
        if not isinstance(periods, int):
            raise ValueError(f"periods must be an int. got {type(periods)} instead")

        # Attempting to match pandas error behavior here
        for dtype in self._get_dtypes():
            if not is_numeric_dtype(dtype):
                raise TypeError(f"unsupported operand type for /: got {dtype}")

        return self.__constructor__(
            query_compiler=self._query_compiler.pct_change(
                periods=periods,
                fill_method=fill_method,
                limit=limit,
                freq=freq,
                **kwargs,
            )
        )

    def pipe(
        self, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
    ) -> T:  # noqa: PR01, RT01, D200
        """
        Apply chainable functions that expect `BasePandasDataset`.
        """
        return pipe(self, func, *args, **kwargs)

    def pop(self, item) -> Series | Scalar:  # noqa: PR01, RT01, D200
        """
        Return item and drop from frame. Raise KeyError if not found.
        """
        result = self[item]
        del self[item]
        return result

    def pow(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `pow`).
        """
        return self._binary_op(
            "pow", other, axis=axis, level=level, fill_value=fill_value
        )

    def quantile(
        self, q, axis, numeric_only, interpolation, method
    ) -> DataFrame | Series | Scalar:  # noqa: PR01, RT01, D200
        """
        Return values at the given quantile over requested axis.
        """
        axis = self._get_axis_number(axis)

        def check_dtype(t):
            return is_numeric_dtype(t) or lib.is_np_dtype(t, "mM")

        numeric_only_df = self
        if not numeric_only:
            # If not numeric_only and columns, then check all columns are either
            # numeric, timestamp, or timedelta
            if not axis and not all(check_dtype(t) for t in self._get_dtypes()):
                raise TypeError("can't multiply sequence by non-int of type 'float'")
            # If over rows, then make sure that all dtypes are equal for not
            # numeric_only
            elif axis:
                for i in range(1, len(self._get_dtypes())):
                    pre_dtype = self._get_dtypes()[i - 1]
                    curr_dtype = self._get_dtypes()[i]
                    if not is_dtype_equal(pre_dtype, curr_dtype):
                        raise TypeError(
                            "Cannot compare type '{0}' with type '{1}'".format(
                                pre_dtype, curr_dtype
                            )
                        )
        else:
            numeric_only_df = self.drop(
                columns=[
                    i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])
                ]
            )

        # check that all qs are between 0 and 1
        validate_percentile(q)
        axis = numeric_only_df._get_axis_number(axis)
        if isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list, tuple)):
            return numeric_only_df.__constructor__(
                query_compiler=numeric_only_df._query_compiler.quantile_for_list_of_values(
                    q=q,
                    axis=axis,
                    # `numeric_only=True` has already been processed by using `self.drop` function
                    numeric_only=False,
                    interpolation=interpolation,
                    method=method,
                )
            )
        else:
            result = numeric_only_df._reduce_dimension(
                numeric_only_df._query_compiler.quantile_for_single_value(
                    q=q,
                    axis=axis,
                    # `numeric_only=True` has already been processed by using `self.drop` function
                    numeric_only=False,
                    interpolation=interpolation,
                    method=method,
                )
            )
            if isinstance(result, BasePandasDataset):
                result.name = q
            return result

    @_inherit_docstrings(pandas.DataFrame.rank, apilink="pandas.DataFrame.rank")
    def rank(
        self,
        axis=0,
        method: str = "average",
        numeric_only=False,
        na_option: str = "keep",
        ascending: bool = True,
        pct: bool = False,
    ) -> Self:
        if axis is None:
            raise ValueError(
                f"No axis named None for object type {type(self).__name__}"
            )
        axis = self._get_axis_number(axis)
        return self.__constructor__(
            query_compiler=self._query_compiler.rank(
                axis=axis,
                method=method,
                numeric_only=numeric_only,
                na_option=na_option,
                ascending=ascending,
                pct=pct,
            )
        )

    def _copy_index_metadata(self, source, destination):  # noqa: PR01, RT01, D200
        """
        Copy Index metadata from `source` to `destination` inplace.
        """
        if hasattr(source, "name") and hasattr(destination, "name"):
            destination.name = source.name
        if hasattr(source, "names") and hasattr(destination, "names"):
            destination.names = source.names
        return destination

    def _ensure_index(self, index_like, axis=0):  # noqa: PR01, RT01, D200
        """
        Ensure that we have an index from some index-like object.
        """
        if (
            self._query_compiler.has_multiindex(axis=axis)
            and not isinstance(index_like, pandas.Index)
            and is_list_like(index_like)
            and len(index_like) > 0
            and isinstance(index_like[0], tuple)
        ):
            try:
                return pandas.MultiIndex.from_tuples(index_like)
            except TypeError:
                # not all tuples
                pass
        return ensure_index(index_like)

    def reindex(
        self,
        index=None,
        columns=None,
        copy=True,
        **kwargs,
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Conform `BasePandasDataset` to new index with optional filling logic.
        """
        new_query_compiler = None
        if index is not None:
            if not isinstance(index, pandas.Index) or not index.equals(self.index):
                new_query_compiler = self._query_compiler.reindex(
                    axis=0, labels=index, **kwargs
                )
        if new_query_compiler is None:
            new_query_compiler = self._query_compiler
        final_query_compiler = None
        if columns is not None:
            if not isinstance(index, pandas.Index) or not columns.equals(self.columns):
                final_query_compiler = new_query_compiler.reindex(
                    axis=1, labels=columns, **kwargs
                )
        if final_query_compiler is None:
            final_query_compiler = new_query_compiler
        return self._create_or_update_from_compiler(
            final_query_compiler, inplace=False if copy is None else not copy
        )

    def rename_axis(
        self,
        mapper=lib.no_default,
        *,
        index=lib.no_default,
        columns=lib.no_default,
        axis=0,
        copy=None,
        inplace=False,
    ) -> DataFrame | Series | None:  # noqa: PR01, RT01, D200
        """
        Set the name of the axis for the index or columns.
        """
        axes = {"index": index, "columns": columns}

        if copy is None:
            copy = True

        if axis is not None:
            axis = self._get_axis_number(axis)

        inplace = validate_bool_kwarg(inplace, "inplace")

        if mapper is not lib.no_default:
            # Use v0.23 behavior if a scalar or list
            non_mapper = is_scalar(mapper) or (
                is_list_like(mapper) and not is_dict_like(mapper)
            )
            if non_mapper:
                return self._set_axis_name(mapper, axis=axis, inplace=inplace)
            else:
                raise ValueError("Use `.rename` to alter labels with a mapper.")
        else:
            # Use new behavior.  Means that index and/or columns is specified
            result = self if inplace else self.copy(deep=copy)

            for axis in range(self.ndim):
                v = axes.get(pandas.DataFrame._get_axis_name(axis))
                if v is lib.no_default:
                    continue
                non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))
                if non_mapper:
                    newnames = v
                else:

                    def _get_rename_function(mapper):
                        if isinstance(mapper, (dict, BasePandasDataset)):

                            def f(x):
                                if x in mapper:
                                    return mapper[x]
                                else:
                                    return x

                        else:
                            f = mapper

                        return f

                    f = _get_rename_function(v)
                    curnames = self.index.names if axis == 0 else self.columns.names
                    newnames = [f(name) for name in curnames]
                result._set_axis_name(newnames, axis=axis, inplace=True)
            if not inplace:
                return result

    def reorder_levels(self, order, axis=0) -> Self:  # noqa: PR01, RT01, D200
        """
        Rearrange index levels using input order.
        """
        axis = self._get_axis_number(axis)
        new_labels = self._get_axis(axis).reorder_levels(order)
        return self.set_axis(new_labels, axis=axis)

    def resample(
        self,
        rule,
        axis: Axis = lib.no_default,
        closed: Optional[str] = None,
        label: Optional[str] = None,
        convention: str = lib.no_default,
        kind: Optional[str] = lib.no_default,
        on: Level = None,
        level: Level = None,
        origin: str | TimestampConvertibleTypes = "start_day",
        offset: Optional[TimedeltaConvertibleTypes] = None,
        group_keys=False,
    ) -> Resampler:  # noqa: PR01, RT01, D200
        """
        Resample time-series data.
        """
        from .resample import Resampler

        if axis is not lib.no_default:
            axis = self._get_axis_number(axis)
            if axis == 1:
                warnings.warn(
                    "DataFrame.resample with axis=1 is deprecated. Do "
                    + "`frame.T.resample(...)` without axis instead.",
                    FutureWarning,
                )
            else:
                warnings.warn(
                    f"The 'axis' keyword in {type(self).__name__}.resample is "
                    + "deprecated and will be removed in a future version.",
                    FutureWarning,
                )
        else:
            axis = 0

        return Resampler(
            dataframe=self,
            rule=rule,
            axis=axis,
            closed=closed,
            label=label,
            convention=convention,
            kind=kind,
            on=on,
            level=level,
            origin=origin,
            offset=offset,
            group_keys=group_keys,
        )

    def reset_index(
        self,
        level: IndexLabel = None,
        *,
        drop: bool = False,
        inplace: bool = False,
        col_level: Hashable = 0,
        col_fill: Hashable = "",
        allow_duplicates=lib.no_default,
        names: Hashable | Sequence[Hashable] = None,
    ) -> DataFrame | Series | None:  # noqa: PR01, RT01, D200
        """
        Reset the index, or a level of it.
        """
        inplace = validate_bool_kwarg(inplace, "inplace")
        # Error checking for matching pandas. Pandas does not allow you to
        # insert a dropped index into a DataFrame if these columns already
        # exist.
        if (
            not drop
            and not (
                self._query_compiler.lazy_column_labels
                or self._query_compiler.lazy_row_labels
            )
            and not self._query_compiler.has_multiindex()
            and all(n in self.columns for n in ["level_0", "index"])
        ):
            raise ValueError("cannot insert level_0, already exists")
        new_query_compiler = self._query_compiler.reset_index(
            drop=drop,
            level=level,
            col_level=col_level,
            col_fill=col_fill,
            allow_duplicates=allow_duplicates,
            names=names,
        )
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def radd(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `radd`).
        """
        return self._binary_op(
            "radd", other, axis=axis, level=level, fill_value=fill_value
        )

    def rfloordiv(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `rfloordiv`).
        """
        return self._binary_op(
            "rfloordiv", other, axis=axis, level=level, fill_value=fill_value
        )

    def rmod(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `rmod`).
        """
        return self._binary_op(
            "rmod", other, axis=axis, level=level, fill_value=fill_value
        )

    def rmul(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get Multiplication of dataframe and other, element-wise (binary operator `rmul`).
        """
        return self._binary_op(
            "rmul", other, axis=axis, level=level, fill_value=fill_value
        )

    def rolling(
        self,
        window,
        min_periods: int | None = None,
        center: bool = False,
        win_type: str | None = None,
        on: str | None = None,
        axis: Axis = lib.no_default,
        closed: str | None = None,
        step: int | None = None,
        method: str = "single",
    ) -> Rolling | Window:  # noqa: PR01, RT01, D200
        """
        Provide rolling window calculations.
        """
        if axis is not lib.no_default:
            axis = self._get_axis_number(axis)
            name = "rolling"
            if axis == 1:
                warnings.warn(
                    f"Support for axis=1 in {type(self).__name__}.{name} is "
                    + "deprecated and will be removed in a future version. "
                    + f"Use obj.T.{name}(...) instead",
                    FutureWarning,
                )
            else:
                warnings.warn(
                    f"The 'axis' keyword in {type(self).__name__}.{name} is "
                    + "deprecated and will be removed in a future version. "
                    + "Call the method without the axis keyword instead.",
                    FutureWarning,
                )
        else:
            axis = 0

        if win_type is not None:
            from .window import Window

            return Window(
                self,
                window=window,
                min_periods=min_periods,
                center=center,
                win_type=win_type,
                on=on,
                axis=axis,
                closed=closed,
                step=step,
                method=method,
            )
        from .window import Rolling

        return Rolling(
            self,
            window=window,
            min_periods=min_periods,
            center=center,
            win_type=win_type,
            on=on,
            axis=axis,
            closed=closed,
            step=step,
            method=method,
        )

    def round(self, decimals=0, *args, **kwargs) -> Self:  # noqa: PR01, RT01, D200
        """
        Round a `BasePandasDataset` to a variable number of decimal places.
        """
        # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
        # purpose and does not affect the result, we shouldn't pass them to the query compiler.
        return self.__constructor__(
            query_compiler=self._query_compiler.round(decimals=decimals, **kwargs)
        )

    def rpow(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `rpow`).
        """
        return self._binary_op(
            "rpow", other, axis=axis, level=level, fill_value=fill_value
        )

    def rsub(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `rsub`).
        """
        return self._binary_op(
            "rsub", other, axis=axis, level=level, fill_value=fill_value
        )

    def rtruediv(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `rtruediv`).
        """
        return self._binary_op(
            "rtruediv", other, axis=axis, level=level, fill_value=fill_value
        )

    rdiv: Self = rtruediv

    def sample(
        self,
        n: int | None = None,
        frac: float | None = None,
        replace: bool = False,
        weights=None,
        random_state: RandomState | None = None,
        axis: Axis | None = None,
        ignore_index: bool = False,
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return a random sample of items from an axis of object.
        """
        axis = self._get_axis_number(axis)
        if axis:
            axis_labels = self.columns
            axis_length = len(axis_labels)
        else:
            # Getting rows requires indices instead of labels. RangeIndex provides this.
            axis_labels = pandas.RangeIndex(len(self))
            axis_length = len(axis_labels)
        if weights is not None:
            # Index of the weights Series should correspond to the index of the
            # Dataframe in order to sample
            if isinstance(weights, BasePandasDataset):
                weights = weights.reindex(self._get_axis(axis))
            # If weights arg is a string, the weights used for sampling will
            # the be values in the column corresponding to that string
            if isinstance(weights, str):
                if axis == 0:
                    try:
                        weights = self[weights]
                    except KeyError:
                        raise KeyError("String passed to weights not a valid column")
                else:
                    raise ValueError(
                        "Strings can only be passed to "
                        + "weights when sampling from rows on "
                        + "a DataFrame"
                    )
            weights = pandas.Series(weights, dtype="float64")

            if len(weights) != axis_length:
                raise ValueError(
                    "Weights and axis to be sampled must be of same length"
                )
            if (weights == np.inf).any() or (weights == -np.inf).any():
                raise ValueError("weight vector may not include `inf` values")
            if (weights < 0).any():
                raise ValueError("weight vector many not include negative values")
            # weights cannot be NaN when sampling, so we must set all nan
            # values to 0
            weights = weights.fillna(0)
            # If passed in weights are not equal to 1, renormalize them
            # otherwise numpy sampling function will error
            weights_sum = weights.sum()
            if weights_sum != 1:
                if weights_sum != 0:
                    weights = weights / weights_sum
                else:
                    raise ValueError("Invalid weights: weights sum to zero")
            weights = weights.values

        if n is None and frac is None:
            # default to n = 1 if n and frac are both None (in accordance with
            # pandas specification)
            n = 1
        elif n is not None and frac is None and n % 1 != 0:
            # n must be an integer
            raise ValueError("Only integers accepted as `n` values")
        elif n is None and frac is not None:
            # compute the number of samples based on frac
            n = int(round(frac * axis_length))
        elif n is not None and frac is not None:
            # Pandas specification does not allow both n and frac to be passed
            # in
            raise ValueError("Please enter a value for `frac` OR `n`, not both")
        if n < 0:
            raise ValueError(
                "A negative number of rows requested. Please provide positive value."
            )
        if n == 0:
            # This returns an empty object, and since it is a weird edge case that
            # doesn't need to be distributed, we default to pandas for n=0.
            # We don't need frac to be set to anything since n is already 0.
            return self._default_to_pandas(
                "sample",
                n=n,
                frac=None,
                replace=replace,
                weights=weights,
                random_state=random_state,
                axis=axis,
                ignore_index=ignore_index,
            )
        if random_state is not None:
            # Get a random number generator depending on the type of
            # random_state that is passed in
            if isinstance(random_state, int):
                random_num_gen = np.random.RandomState(random_state)
            elif isinstance(random_state, np.random.RandomState):
                random_num_gen = random_state
            else:
                # random_state must be an int or a numpy RandomState object
                raise ValueError(
                    "Please enter an `int` OR a "
                    + "np.random.RandomState for random_state"
                )
            # choose random numbers and then get corresponding labels from
            # chosen axis
            sample_indices = random_num_gen.choice(
                np.arange(0, axis_length), size=n, replace=replace, p=weights
            )
            samples = axis_labels[sample_indices]
        else:
            # randomly select labels from chosen axis
            samples = np.random.choice(
                a=axis_labels, size=n, replace=replace, p=weights
            )
        if axis:
            query_compiler = self._query_compiler.getitem_column_array(samples)
            return self.__constructor__(query_compiler=query_compiler)
        else:
            query_compiler = self._query_compiler.getitem_row_array(samples)
            return self.__constructor__(query_compiler=query_compiler)

    def sem(
        self,
        axis: Axis = 0,
        skipna: bool = True,
        ddof: int = 1,
        numeric_only=False,
        **kwargs,
    ) -> Series | float:  # noqa: PR01, RT01, D200
        """
        Return unbiased standard error of the mean over requested axis.
        """
        return self._stat_operation(
            "sem", axis, skipna, numeric_only, ddof=ddof, **kwargs
        )

    def mean(
        self,
        axis: Axis = 0,
        skipna=True,
        numeric_only=False,
        **kwargs,
    ) -> Series | float:  # noqa: PR01, RT01, D200
        """
        Return the mean of the values over the requested axis.
        """
        return self._stat_operation("mean", axis, skipna, numeric_only, **kwargs)

    def median(
        self,
        axis: Axis = 0,
        skipna=True,
        numeric_only=False,
        **kwargs,
    ) -> Series | float:  # noqa: PR01, RT01, D200
        """
        Return the mean of the values over the requested axis.
        """
        return self._stat_operation("median", axis, skipna, numeric_only, **kwargs)

    def set_axis(
        self,
        labels,
        *,
        axis: Axis = 0,
        copy=None,
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Assign desired index to given axis.
        """
        if copy is None:
            copy = True
        obj = self.copy() if copy else self
        setattr(obj, pandas.DataFrame._get_axis_name(axis), labels)
        return obj

    def set_flags(
        self, *, copy: bool = False, allows_duplicate_labels: Optional[bool] = None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return a new `BasePandasDataset` with updated flags.
        """
        return self._default_to_pandas(
            pandas.DataFrame.set_flags,
            copy=copy,
            allows_duplicate_labels=allows_duplicate_labels,
        )

    @property
    def flags(self):
        return self._default_to_pandas(lambda df: df.flags)

    def shift(
        self,
        periods: int = 1,
        freq=None,
        axis: Axis = 0,
        fill_value: Hashable = lib.no_default,
        suffix=None,
    ) -> Self | DataFrame:  # noqa: PR01, RT01, D200
        """
        Shift index by desired number of periods with an optional time `freq`.
        """
        if suffix:
            return self._default_to_pandas(
                lambda df: df.shift(
                    periods=periods,
                    freq=freq,
                    axis=axis,
                    fill_value=fill_value,
                    suffix=suffix,
                )
            )

        if freq is not None and fill_value is not lib.no_default:
            raise ValueError(
                "Cannot pass both 'freq' and 'fill_value' to "
                + f"{type(self).__name__}.shift"
            )

        if periods == 0:
            # Check obvious case first
            return self.copy()
        return self._create_or_update_from_compiler(
            new_query_compiler=self._query_compiler.shift(
                periods, freq, axis, fill_value
            ),
            inplace=False,
        )

    def skew(
        self,
        axis: Axis = 0,
        skipna: bool = True,
        numeric_only=False,
        **kwargs,
    ) -> Series | float:  # noqa: PR01, RT01, D200
        """
        Return unbiased skew over requested axis.
        """
        return self._stat_operation("skew", axis, skipna, numeric_only, **kwargs)

    def sort_index(
        self,
        *,
        axis=0,
        level=None,
        ascending=True,
        inplace=False,
        kind="quicksort",
        na_position="last",
        sort_remaining=True,
        ignore_index: bool = False,
        key: Optional[IndexKeyFunc] = None,
    ) -> Self | None:  # noqa: PR01, RT01, D200
        """
        Sort object by labels (along an axis).
        """
        # pandas throws this exception. See pandas issie #39434
        if ascending is None:
            raise ValueError(
                "the `axis` parameter is not supported in the pandas implementation of argsort()"
            )
        axis = self._get_axis_number(axis)
        inplace = validate_bool_kwarg(inplace, "inplace")
        new_query_compiler = self._query_compiler.sort_index(
            axis=axis,
            level=level,
            ascending=ascending,
            inplace=inplace,
            kind=kind,
            na_position=na_position,
            sort_remaining=sort_remaining,
            ignore_index=ignore_index,
            key=key,
        )
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def sort_values(
        self,
        by,
        *,
        axis=0,
        ascending=True,
        inplace: bool = False,
        kind="quicksort",
        na_position="last",
        ignore_index: bool = False,
        key: Optional[IndexKeyFunc] = None,
    ) -> Self | None:  # noqa: PR01, RT01, D200
        """
        Sort by the values along either axis.
        """
        axis = self._get_axis_number(axis)
        inplace = validate_bool_kwarg(inplace, "inplace")
        ascending = validate_ascending(ascending)
        if axis == 0:
            result = self._query_compiler.sort_rows_by_column_values(
                by,
                ascending=ascending,
                kind=kind,
                na_position=na_position,
                ignore_index=ignore_index,
                key=key,
            )
        else:
            result = self._query_compiler.sort_columns_by_row_values(
                by,
                ascending=ascending,
                kind=kind,
                na_position=na_position,
                ignore_index=ignore_index,
                key=key,
            )
        return self._create_or_update_from_compiler(result, inplace)

    def std(
        self,
        axis: Axis = 0,
        skipna: bool = True,
        ddof: int = 1,
        numeric_only=False,
        **kwargs,
    ) -> Series | float:  # noqa: PR01, RT01, D200
        """
        Return sample standard deviation over requested axis.
        """
        return self._stat_operation(
            "std", axis, skipna, numeric_only, ddof=ddof, **kwargs
        )

    def sub(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `sub`).
        """
        return self._binary_op(
            "sub", other, axis=axis, level=level, fill_value=fill_value
        )

    subtract: Self = sub

    def swapaxes(self, axis1, axis2, copy=None) -> Self:  # noqa: PR01, RT01, D200
        """
        Interchange axes and swap values axes appropriately.
        """
        if copy is None:
            copy = True
        axis1 = self._get_axis_number(axis1)
        axis2 = self._get_axis_number(axis2)
        if axis1 != axis2:
            return self.transpose()
        if copy:
            return self.copy()
        return self

    def swaplevel(self, i=-2, j=-1, axis=0) -> Self:  # noqa: PR01, RT01, D200
        """
        Swap levels `i` and `j` in a `MultiIndex`.
        """
        axis = self._get_axis_number(axis)
        idx = self.index if axis == 0 else self.columns
        return self.set_axis(idx.swaplevel(i, j), axis=axis)

    def tail(self, n=5) -> Self:  # noqa: PR01, RT01, D200
        """
        Return the last `n` rows.
        """
        if n != 0:
            return self.iloc[-n:]
        return self.iloc[len(self) :]

    def take(self, indices, axis=0, **kwargs) -> Self:  # noqa: PR01, RT01, D200
        """
        Return the elements in the given *positional* indices along an axis.
        """
        axis = self._get_axis_number(axis)
        slice_obj = indices if axis == 0 else (slice(None), indices)
        return self.iloc[slice_obj]

    def to_clipboard(
        self, excel=True, sep=None, **kwargs
    ):  # pragma: no cover  # noqa: PR01, RT01, D200
        """
        Copy object to the system clipboard.
        """
        return self._default_to_pandas("to_clipboard", excel=excel, sep=sep, **kwargs)

    @expanduser_path_arg("path_or_buf")
    def to_csv(
        self,
        path_or_buf=None,
        sep=",",
        na_rep="",
        float_format=None,
        columns=None,
        header=True,
        index=True,
        index_label=None,
        mode="w",
        encoding=None,
        compression="infer",
        quoting=None,
        quotechar='"',
        lineterminator=None,
        chunksize=None,
        date_format=None,
        doublequote=True,
        escapechar=None,
        decimal=".",
        errors: str = "strict",
        storage_options: StorageOptions = None,
    ) -> str | None:  # pragma: no cover
        from modin.core.execution.dispatching.factories.dispatcher import (
            FactoryDispatcher,
        )

        return FactoryDispatcher.to_csv(
            self._query_compiler,
            path_or_buf=path_or_buf,
            sep=sep,
            na_rep=na_rep,
            float_format=float_format,
            columns=columns,
            header=header,
            index=index,
            index_label=index_label,
            mode=mode,
            encoding=encoding,
            compression=compression,
            quoting=quoting,
            quotechar=quotechar,
            lineterminator=lineterminator,
            chunksize=chunksize,
            date_format=date_format,
            doublequote=doublequote,
            escapechar=escapechar,
            decimal=decimal,
            errors=errors,
            storage_options=storage_options,
        )

    @expanduser_path_arg("excel_writer")
    def to_excel(
        self,
        excel_writer,
        sheet_name="Sheet1",
        na_rep="",
        float_format=None,
        columns=None,
        header=True,
        index=True,
        index_label=None,
        startrow=0,
        startcol=0,
        engine=None,
        merge_cells=True,
        inf_rep="inf",
        freeze_panes=None,
        storage_options: StorageOptions = None,
        engine_kwargs=None,
    ) -> None:  # pragma: no cover  # noqa: PR01, RT01, D200
        """
        Write object to an Excel sheet.
        """
        return self._default_to_pandas(
            "to_excel",
            excel_writer,
            sheet_name=sheet_name,
            na_rep=na_rep,
            float_format=float_format,
            columns=columns,
            header=header,
            index=index,
            index_label=index_label,
            startrow=startrow,
            startcol=startcol,
            engine=engine,
            merge_cells=merge_cells,
            inf_rep=inf_rep,
            freeze_panes=freeze_panes,
            storage_options=storage_options,
            engine_kwargs=engine_kwargs,
        )

    def to_dict(self, orient="dict", into=dict, index=True) -> dict:
        return self._query_compiler.dataframe_to_dict(orient, into, index)

    @expanduser_path_arg("path_or_buf")
    def to_hdf(
        self,
        path_or_buf,
        key: str,
        mode: Literal["a", "w", "r+"] = "a",
        complevel: int | None = None,
        complib: Literal["zlib", "lzo", "bzip2", "blosc"] | None = None,
        append: bool = False,
        format: Literal["fixed", "table"] | None = None,
        index: bool = True,
        min_itemsize: int | dict[str, int] | None = None,
        nan_rep=None,
        dropna: bool | None = None,
        data_columns: Literal[True] | list[str] | None = None,
        errors: str = "strict",
        encoding: str = "UTF-8",
    ) -> None:  # pragma: no cover  # noqa: PR01, RT01, D200
        """
        Write the contained data to an HDF5 file using HDFStore.
        """
        return self._default_to_pandas(
            "to_hdf",
            path_or_buf,
            key=key,
            mode=mode,
            complevel=complevel,
            complib=complib,
            append=append,
            format=format,
            index=index,
            min_itemsize=min_itemsize,
            nan_rep=nan_rep,
            dropna=dropna,
            data_columns=data_columns,
            errors=errors,
            encoding=encoding,
        )

    @expanduser_path_arg("path_or_buf")
    def to_json(
        self,
        path_or_buf=None,
        orient=None,
        date_format=None,
        double_precision=10,
        force_ascii=True,
        date_unit="ms",
        default_handler=None,
        lines=False,
        compression="infer",
        index=None,
        indent=None,
        storage_options: StorageOptions = None,
        mode="w",
    ) -> str | None:  # pragma: no cover  # noqa: PR01, RT01, D200
        """
        Convert the object to a JSON string.
        """
        from modin.core.execution.dispatching.factories.dispatcher import (
            FactoryDispatcher,
        )

        return FactoryDispatcher.to_json(
            self._query_compiler,
            path_or_buf,
            orient=orient,
            date_format=date_format,
            double_precision=double_precision,
            force_ascii=force_ascii,
            date_unit=date_unit,
            default_handler=default_handler,
            lines=lines,
            compression=compression,
            index=index,
            indent=indent,
            storage_options=storage_options,
            mode=mode,
        )

    @expanduser_path_arg("buf")
    def to_latex(
        self,
        buf=None,
        columns=None,
        header=True,
        index=True,
        na_rep="NaN",
        formatters=None,
        float_format=None,
        sparsify=None,
        index_names=True,
        bold_rows=False,
        column_format=None,
        longtable=None,
        escape=None,
        encoding=None,
        decimal=".",
        multicolumn=None,
        multicolumn_format=None,
        multirow=None,
        caption=None,
        label=None,
        position=None,
    ) -> str | None:  # pragma: no cover  # noqa: PR01, RT01, D200
        """
        Render object to a LaTeX tabular, longtable, or nested table.
        """
        return self._default_to_pandas(
            "to_latex",
            buf=buf,
            columns=columns,
            header=header,
            index=index,
            na_rep=na_rep,
            formatters=formatters,
            float_format=float_format,
            sparsify=sparsify,
            index_names=index_names,
            bold_rows=bold_rows,
            column_format=column_format,
            longtable=longtable,
            escape=escape,
            encoding=encoding,
            decimal=decimal,
            multicolumn=multicolumn,
            multicolumn_format=multicolumn_format,
            multirow=multirow,
            caption=caption,
            label=label,
            position=position,
        )

    @expanduser_path_arg("buf")
    def to_markdown(
        self,
        buf=None,
        mode: str = "wt",
        index: bool = True,
        storage_options: StorageOptions = None,
        **kwargs,
    ) -> str:  # noqa: PR01, RT01, D200
        """
        Print `BasePandasDataset` in Markdown-friendly format.
        """
        return self._default_to_pandas(
            "to_markdown",
            buf=buf,
            mode=mode,
            index=index,
            storage_options=storage_options,
            **kwargs,
        )

    @expanduser_path_arg("path")
    def to_pickle(
        self,
        path,
        compression: CompressionOptions = "infer",
        protocol: int = pkl.HIGHEST_PROTOCOL,
        storage_options: StorageOptions = None,
    ) -> None:  # pragma: no cover  # noqa: PR01, D200
        """
        Pickle (serialize) object to file.
        """
        from modin.pandas import to_pickle

        to_pickle(
            self,
            path,
            compression=compression,
            protocol=protocol,
            storage_options=storage_options,
        )

    def _to_bare_numpy(
        self, dtype=None, copy=False, na_value=lib.no_default
    ):  # noqa: PR01, RT01, D200
        """
        Convert the `BasePandasDataset` to a NumPy array.
        """
        return self._query_compiler.to_numpy(
            dtype=dtype,
            copy=copy,
            na_value=na_value,
        )

    def to_numpy(
        self, dtype=None, copy=False, na_value=lib.no_default
    ) -> np.ndarray:  # noqa: PR01, RT01, D200
        """
        Convert the `BasePandasDataset` to a NumPy array or a Modin wrapper for NumPy array.
        """
        from modin.config import ModinNumpy

        if ModinNumpy.get():
            from ..numpy.arr import array

            return array(self, copy=copy)

        return self._to_bare_numpy(
            dtype=dtype,
            copy=copy,
            na_value=na_value,
        )

    # TODO(williamma12): When this gets implemented, have the series one call this.
    def to_period(
        self, freq=None, axis=0, copy=None
    ) -> Self:  # pragma: no cover  # noqa: PR01, RT01, D200
        """
        Convert `BasePandasDataset` from DatetimeIndex to PeriodIndex.
        """
        return self._default_to_pandas("to_period", freq=freq, axis=axis, copy=copy)

    @expanduser_path_arg("buf")
    def to_string(
        self,
        buf=None,
        columns=None,
        col_space=None,
        header=True,
        index=True,
        na_rep="NaN",
        formatters=None,
        float_format=None,
        sparsify=None,
        index_names=True,
        justify=None,
        max_rows=None,
        min_rows=None,
        max_cols=None,
        show_dimensions=False,
        decimal=".",
        line_width=None,
        max_colwidth=None,
        encoding=None,
    ) -> str | None:  # noqa: PR01, RT01, D200
        """
        Render a `BasePandasDataset` to a console-friendly tabular output.
        """
        return self._default_to_pandas(
            "to_string",
            buf=buf,
            columns=columns,
            col_space=col_space,
            header=header,
            index=index,
            na_rep=na_rep,
            formatters=formatters,
            float_format=float_format,
            sparsify=sparsify,
            index_names=index_names,
            justify=justify,
            max_rows=max_rows,
            max_cols=max_cols,
            show_dimensions=show_dimensions,
            decimal=decimal,
            line_width=line_width,
            max_colwidth=max_colwidth,
            encoding=encoding,
        )

    def to_sql(
        self,
        name,
        con,
        schema=None,
        if_exists="fail",
        index=True,
        index_label=None,
        chunksize=None,
        dtype=None,
        method=None,
    ) -> int | None:  # noqa: PR01, D200
        """
        Write records stored in a `BasePandasDataset` to a SQL database.
        """
        new_query_compiler = self._query_compiler
        # writing the index to the database by inserting it to the DF
        if index:
            new_query_compiler = new_query_compiler.reset_index()
            if index_label is not None:
                if not is_list_like(index_label):
                    index_label = [index_label]
                new_query_compiler.columns = list(index_label) + list(
                    new_query_compiler.columns[len(index_label) :]
                )
            # so pandas._to_sql will not write the index to the database as well
            index = False

        from modin.core.execution.dispatching.factories.dispatcher import (
            FactoryDispatcher,
        )

        FactoryDispatcher.to_sql(
            new_query_compiler,
            name=name,
            con=con,
            schema=schema,
            if_exists=if_exists,
            index=index,
            index_label=index_label,
            chunksize=chunksize,
            dtype=dtype,
            method=method,
        )

    # TODO(williamma12): When this gets implemented, have the series one call this.
    def to_timestamp(
        self, freq=None, how="start", axis=0, copy=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Cast to DatetimeIndex of timestamps, at *beginning* of period.
        """
        return self._default_to_pandas(
            "to_timestamp", freq=freq, how=how, axis=axis, copy=copy
        )

    def to_xarray(self):  # noqa: PR01, RT01, D200
        """
        Return an xarray object from the `BasePandasDataset`.
        """
        return self._default_to_pandas("to_xarray")

    def truediv(
        self, other, axis="columns", level=None, fill_value=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `truediv`).
        """
        return self._binary_op(
            "truediv", other, axis=axis, level=level, fill_value=fill_value
        )

    div: Self = truediv
    divide: Self = truediv

    def truncate(
        self, before=None, after=None, axis=None, copy=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Truncate a `BasePandasDataset` before and after some index value.
        """
        axis = self._get_axis_number(axis)
        if (
            not self._get_axis(axis).is_monotonic_increasing
            and not self._get_axis(axis).is_monotonic_decreasing
        ):
            raise ValueError("truncate requires a sorted index")

        if before is not None and after is not None and before > after:
            raise ValueError(f"Truncate: {after} must be after {before}")

        s = slice(*self._get_axis(axis).slice_locs(before, after))
        slice_obj = s if axis == 0 else (slice(None), s)
        return self.iloc[slice_obj]

    def transform(
        self, func, axis=0, *args, **kwargs
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.
        """
        kwargs["is_transform"] = True
        self._validate_function(func)
        try:
            result = self.agg(func, axis=axis, *args, **kwargs)
        except (TypeError, pandas.errors.SpecificationError):
            raise
        except Exception as err:
            raise ValueError("Transform function failed") from err
        if getattr(result, "_pandas_class", None) not in (
            pandas.Series,
            pandas.DataFrame,
        ) or not result.index.equals(self.index):
            raise ValueError("Function did not transform")
        return result

    def tz_convert(
        self, tz, axis=0, level=None, copy=None
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Convert tz-aware axis to target time zone.
        """
        if copy is None:
            copy = True
        return self._create_or_update_from_compiler(
            self._query_compiler.tz_convert(
                tz, axis=self._get_axis_number(axis), level=level, copy=copy
            ),
            inplace=(not copy),
        )

    def tz_localize(
        self, tz, axis=0, level=None, copy=None, ambiguous="raise", nonexistent="raise"
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Localize tz-naive index of a `BasePandasDataset` to target time zone.
        """
        if copy is None:
            copy = True
        return self._create_or_update_from_compiler(
            self._query_compiler.tz_localize(
                tz,
                axis=self._get_axis_number(axis),
                level=level,
                copy=copy,
                ambiguous=ambiguous,
                nonexistent=nonexistent,
            ),
            inplace=(not copy),
        )

    def interpolate(
        self,
        method="linear",
        *,
        axis=0,
        limit=None,
        inplace=False,
        limit_direction: Optional[str] = None,
        limit_area=None,
        downcast=lib.no_default,
        **kwargs,
    ) -> Self:  # noqa: PR01, RT01, D200
        if downcast is not lib.no_default:
            warnings.warn(
                f"The 'downcast' keyword in {type(self).__name__}.interpolate "
                + "is deprecated and will be removed in a future version. "
                + "Call result.infer_objects(copy=False) on the result instead.",
                FutureWarning,
            )
        else:
            downcast = None

        return self._create_or_update_from_compiler(
            self._query_compiler.interpolate(
                method=method,
                axis=axis,
                limit=limit,
                inplace=False,
                limit_direction=limit_direction,
                limit_area=limit_area,
                downcast=downcast,
                **kwargs,
            ),
            inplace=inplace,
        )

    # TODO: uncomment the following lines when #3331 issue will be closed
    # @prepend_to_notes(
    #     """
    #     In comparison with pandas, Modin's ``value_counts`` returns Series with ``MultiIndex``
    #     only if multiple columns were passed via the `subset` parameter, otherwise, the resulted
    #     Series's index will be a regular single dimensional ``Index``.
    #     """
    # )
    @_inherit_docstrings(
        pandas.DataFrame.value_counts, apilink="pandas.DataFrame.value_counts"
    )
    def value_counts(
        self,
        subset: Sequence[Hashable] | None = None,
        normalize: bool = False,
        sort: bool = True,
        ascending: bool = False,
        dropna: bool = True,
    ) -> Series:
        if subset is None:
            subset = self._query_compiler.columns
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message=".*groupby keys will be sorted anyway.*",
                category=UserWarning,
            )
            counted_values = self.groupby(
                by=subset, dropna=dropna, observed=True, sort=False
            ).size()
        if sort:
            if counted_values.name is None:
                counted_values.name = 0
            by = counted_values.name
            result = counted_values._query_compiler.sort_rows_by_column_values(
                columns=by,
                ascending=ascending,
            )
            counted_values = self._create_or_update_from_compiler(result)
            if isinstance(counted_values, pd.DataFrame):
                counted_values = counted_values.squeeze(axis=1)
        if normalize:
            counted_values = counted_values / counted_values.sum()
        # TODO: uncomment when strict compability mode will be implemented:
        # https://github.com/modin-project/modin/issues/3411
        # if STRICT_COMPABILITY and not isinstance(counted_values.index, MultiIndex):
        #     counted_values.index = pandas.MultiIndex.from_arrays(
        #         [counted_values.index], names=counted_values.index.names
        #     )
        # https://pandas.pydata.org/pandas-docs/version/2.0/whatsnew/v2.0.0.html#value-counts-sets-the-resulting-name-to-count
        counted_values.name = "proportion" if normalize else "count"
        return counted_values

    def var(
        self,
        axis: Axis = 0,
        skipna: bool = True,
        ddof: int = 1,
        numeric_only=False,
        **kwargs,
    ) -> Series | float:  # noqa: PR01, RT01, D200
        """
        Return unbiased variance over requested axis.
        """
        return self._stat_operation(
            "var", axis, skipna, numeric_only, ddof=ddof, **kwargs
        )

    def __abs__(self) -> Self:
        """
        Return a `BasePandasDataset` with absolute numeric value of each element.

        Returns
        -------
        BasePandasDataset
            Object containing the absolute value of each element.
        """
        return self.abs()

    @_doc_binary_op(
        operation="union", bin_op="and", right="other", **_doc_binary_op_kwargs
    )
    def __and__(self, other) -> Self:
        return self._binary_op("__and__", other, axis=0)

    @_doc_binary_op(
        operation="union", bin_op="rand", right="other", **_doc_binary_op_kwargs
    )
    def __rand__(self, other) -> Self:
        return self._binary_op("__rand__", other, axis=0)

    def __array__(
        self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
    ) -> np.ndarray:
        """
        Return the values as a NumPy array.

        Parameters
        ----------
        dtype : str or np.dtype, optional
            The dtype of returned array.
        copy : bool, default: None
            This parameter has no effect; the method always returns a copy of
            the data.

        Returns
        -------
        arr : np.ndarray
            NumPy representation of Modin object.
        """
        return self._to_bare_numpy(dtype)

    def __copy__(self, deep=True) -> Self:
        """
        Return the copy of the `BasePandasDataset`.

        Parameters
        ----------
        deep : bool, default: True
            Whether the copy should be deep or not.

        Returns
        -------
        BasePandasDataset
        """
        return self.copy(deep=deep)

    def __deepcopy__(self, memo=None) -> Self:
        """
        Return the deep copy of the `BasePandasDataset`.

        Parameters
        ----------
        memo : Any, optional
           Deprecated parameter.

        Returns
        -------
        BasePandasDataset
        """
        return self.copy(deep=True)

    @_doc_binary_op(
        operation="equality comparison",
        bin_op="eq",
        right="other",
        **_doc_binary_op_kwargs,
    )
    def __eq__(self, other) -> Self:
        return self.eq(other)

    def __finalize__(self, other, method=None, **kwargs) -> Self:
        """
        Propagate metadata from `other` to `self`.

        Parameters
        ----------
        other : BasePandasDataset
            The object from which to get the attributes that we are going
            to propagate.
        method : str, optional
            A passed method name providing context on where `__finalize__`
            was called.
        **kwargs : dict
            Additional keywords arguments to be passed to `__finalize__`.

        Returns
        -------
        BasePandasDataset
        """
        return self._default_to_pandas("__finalize__", other, method=method, **kwargs)

    @_doc_binary_op(
        operation="greater than or equal comparison",
        bin_op="ge",
        right="right",
        **_doc_binary_op_kwargs,
    )
    def __ge__(self, right) -> Self:
        return self.ge(right)

    def __getitem__(self, key) -> Self:
        """
        Retrieve dataset according to `key`.

        Parameters
        ----------
        key : callable, scalar, slice, str or tuple
            The global row index to retrieve data from.

        Returns
        -------
        BasePandasDataset
            Located dataset.
        """
        if not self._query_compiler.lazy_row_count and len(self) == 0:
            return self._default_to_pandas("__getitem__", key)
        # see if we can slice the rows
        # This lets us reuse code in pandas to error check
        indexer = None
        if isinstance(key, slice):
            indexer = self.index._convert_slice_indexer(key, kind="getitem")
        if indexer is not None:
            return self._getitem_slice(indexer)
        else:
            return self._getitem(key)

    def xs(
        self,
        key,
        axis=0,
        level=None,
        drop_level: bool = True,
    ) -> Self:  # noqa: PR01, RT01, D200
        """
        Return cross-section from the Series/DataFrame.
        """
        axis = self._get_axis_number(axis)
        labels = self.columns if axis else self.index

        if isinstance(key, list):
            # deprecated in pandas, to be removed in 2.0
            warnings.warn(
                "Passing lists as key for xs is deprecated and will be removed in a "
                + "future version. Pass key as a tuple instead.",
                FutureWarning,
            )

        if level is not None:
            if not isinstance(labels, pandas.MultiIndex):
                raise TypeError("Index must be a MultiIndex")
            loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level)

            # create the tuple of the indexer
            _indexer = [slice(None)] * self.ndim
            _indexer[axis] = loc
            indexer = tuple(_indexer)

            result = self.iloc[indexer]
            setattr(result, self._pandas_class._get_axis_name(axis), new_ax)
            return result

        if axis == 1:
            if drop_level:
                return self[key]
            index = self.columns
        else:
            index = self.index

        new_index = None
        if isinstance(index, pandas.MultiIndex):
            loc, new_index = index._get_loc_level(key, level=0)
            if not drop_level:
                if is_integer(loc):
                    new_index = index[loc : loc + 1]
                else:
                    new_index = index[loc]
        else:
            loc = index.get_loc(key)

            if isinstance(loc, np.ndarray):
                if loc.dtype == np.bool_:
                    (loc,) = loc.nonzero()
                # Note: pandas uses self._take_with_is_copy here
                return self.take(loc, axis=axis)

            if not is_scalar(loc):
                new_index = index[loc]

        if is_scalar(loc) and axis == 0:
            # In this case loc should be an integer
            if self.ndim == 1:
                # if we encounter an array-like and we only have 1 dim
                # that means that their are list/ndarrays inside the Series!
                # so just return them (pandas GH 6394)
                return self.iloc[loc]

            result = self.iloc[loc]
        elif is_scalar(loc):
            result = self.iloc[:, slice(loc, loc + 1)]
        elif axis == 1:
            result = self.iloc[:, loc]
        else:
            result = self.iloc[loc]
            if new_index is None:
                raise RuntimeError(
                    "`new_index` variable shouldn't be equal to None here, something went wrong."
                )
            result.index = new_index

        # Note: pandas does result._set_is_copy here
        return result

    __hash__ = None

    def _setitem_slice(self, key: slice, value) -> None:
        """
        Set rows specified by `key` slice with `value`.

        Parameters
        ----------
        key : location or index-based slice
            Key that points rows to modify.
        value : object
            Value to assing to the rows.
        """
        indexer = self.index._convert_slice_indexer(key, kind="getitem")
        self.iloc[indexer] = value

    def _getitem_slice(self, key: slice) -> Self:
        """
        Get rows specified by `key` slice.

        Parameters
        ----------
        key : location or index-based slice
            Key that points to rows to retrieve.

        Returns
        -------
        modin.pandas.BasePandasDataset
            Selected rows.
        """
        if is_full_grab_slice(
            key,
            # Avoid triggering shape computation for lazy executions
            sequence_len=(None if self._query_compiler.lazy_row_count else len(self)),
        ):
            return self.copy()
        return self.iloc[key]

    @_doc_binary_op(
        operation="greater than comparison",
        bin_op="gt",
        right="right",
        **_doc_binary_op_kwargs,
    )
    def __gt__(self, right) -> Self:
        return self.gt(right)

    def __invert__(self) -> Self:
        """
        Apply bitwise inverse to each element of the `BasePandasDataset`.

        Returns
        -------
        BasePandasDataset
            New BasePandasDataset containing bitwise inverse to each value.
        """
        if not all(is_bool_dtype(d) or is_integer_dtype(d) for d in self._get_dtypes()):
            raise TypeError(
                "bad operand type for unary ~: '{}'".format(
                    next(
                        d
                        for d in self._get_dtypes()
                        if not (is_bool_dtype(d) or is_integer_dtype(d))
                    )
                )
            )
        return self.__constructor__(query_compiler=self._query_compiler.invert())

    @_doc_binary_op(
        operation="less than or equal comparison",
        bin_op="le",
        right="right",
        **_doc_binary_op_kwargs,
    )
    def __le__(self, right) -> Self:
        return self.le(right)

    def __len__(self) -> int:
        """
        Return length of info axis.

        Returns
        -------
        int
        """
        return self._query_compiler.get_axis_len(0)

    @_doc_binary_op(
        operation="less than comparison",
        bin_op="lt",
        right="right",
        **_doc_binary_op_kwargs,
    )
    def __lt__(self, right) -> Self:
        return self.lt(right)

    def __matmul__(self, other) -> Self | np.ndarray | Scalar:
        """
        Compute the matrix multiplication between the `BasePandasDataset` and `other`.

        Parameters
        ----------
        other : BasePandasDataset or array-like
            The other object to compute the matrix product with.

        Returns
        -------
        BasePandasDataset, np.ndarray or scalar
        """
        return self.dot(other)

    @_doc_binary_op(
        operation="not equal comparison",
        bin_op="ne",
        right="other",
        **_doc_binary_op_kwargs,
    )
    def __ne__(self, other) -> Self:
        return self.ne(other)

    def __neg__(self) -> Self:
        """
        Change the sign for every value of self.

        Returns
        -------
        BasePandasDataset
        """
        self._validate_dtypes(numeric_only=True)
        return self.__constructor__(query_compiler=self._query_compiler.negative())

    def __nonzero__(self):
        """
        Evaluate `BasePandasDataset` as boolean object.

        Raises
        ------
        ValueError
            Always since truth value for self is ambiguous.
        """
        raise ValueError(
            f"The truth value of a {self.__class__.__name__} is ambiguous. "
            + "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
        )

    __bool__ = __nonzero__

    @_doc_binary_op(
        operation="disjunction",
        bin_op="or",
        right="other",
        **_doc_binary_op_kwargs,
    )
    def __or__(self, other) -> Self:
        return self._binary_op("__or__", other, axis=0)

    @_doc_binary_op(
        operation="disjunction",
        bin_op="ror",
        right="other",
        **_doc_binary_op_kwargs,
    )
    def __ror__(self, other) -> Self:
        return self._binary_op("__ror__", other, axis=0)

    def __sizeof__(self) -> int:
        """
        Generate the total memory usage for an `BasePandasDataset`.

        Returns
        -------
        int
        """
        return self._query_compiler.sizeof()

    def __str__(self) -> str:  # pragma: no cover
        """
        Return str(self).

        Returns
        -------
        str
        """
        return repr(self)

    @_doc_binary_op(
        operation="exclusive disjunction",
        bin_op="xor",
        right="other",
        **_doc_binary_op_kwargs,
    )
    def __xor__(self, other) -> Self:
        return self._binary_op("__xor__", other, axis=0)

    @_doc_binary_op(
        operation="exclusive disjunction",
        bin_op="rxor",
        right="other",
        **_doc_binary_op_kwargs,
    )
    def __rxor__(self, other) -> Self:
        return self._binary_op("__rxor__", other, axis=0)

    @property
    def size(self) -> int:  # noqa: RT01, D200
        """
        Return an int representing the number of elements in this `BasePandasDataset` object.
        """
        return len(self._query_compiler.index) * len(self._query_compiler.columns)

    @property
    def values(self) -> np.ndarray:  # noqa: RT01, D200
        """
        Return a NumPy representation of the `BasePandasDataset`.
        """
        return self.to_numpy()

    def _repartition(self, axis: Optional[int] = None) -> Self:
        """
        Repartitioning Modin objects to get ideal partitions inside.

        Allows to improve performance where the query compiler can't improve
        yet by doing implicit repartitioning.

        Parameters
        ----------
        axis : {0, 1, None}, optional
            The axis along which the repartitioning occurs.
            `None` is used for repartitioning along both axes.

        Returns
        -------
        DataFrame or Series
            The repartitioned dataframe or series, depending on the original type.
        """
        allowed_axis_values = (0, 1, None)
        if axis not in allowed_axis_values:
            raise ValueError(
                f"Passed `axis` parameter: {axis}, but should be one of {allowed_axis_values}"
            )
        return self.__constructor__(
            query_compiler=self._query_compiler.repartition(axis=axis)
        )

    @disable_logging
    def __getattribute__(self, item) -> Any:
        """
        Return item from the `BasePandasDataset`.

        Parameters
        ----------
        item : hashable
            Item to get.

        Returns
        -------
        Any
        """
        # NOTE that to get an attribute, python calls __getattribute__() first and
        # then falls back to __getattr__() if the former raises an AttributeError.

        if item not in EXTENSION_NO_LOOKUP:
            extensions_result = self._getattribute__from_extension_impl(
                item, __class__._extensions
            )
            if extensions_result is not sentinel:
                return extensions_result

        attr = super().__getattribute__(item)
        if item not in _DEFAULT_BEHAVIOUR and not self._query_compiler.lazy_shape:
            # We default to pandas on empty DataFrames. This avoids a large amount of
            # pain in underlying implementation and returns a result immediately rather
            # than dealing with the edge cases that empty DataFrames have.
            if callable(attr) and self.empty and hasattr(self._pandas_class, item):

                def default_handler(*args, **kwargs):
                    return self._default_to_pandas(item, *args, **kwargs)

                return default_handler
        return attr

    def __array_ufunc__(
        self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any
    ) -> DataFrame | Series | Any:
        """
        Apply the `ufunc` to the `BasePandasDataset`.

        Parameters
        ----------
        ufunc : np.ufunc
            The NumPy ufunc to apply.
        method : str
            The method to apply.
        *inputs : tuple
            The inputs to the ufunc.
        **kwargs : dict
            Additional keyword arguments.

        Returns
        -------
        BasePandasDataset
            The result of the ufunc applied to the `BasePandasDataset`.
        """
        return self._query_compiler.do_array_ufunc_implementation(
            self, ufunc, method, *inputs, **kwargs
        )

    def __array_function__(
        self,
        func: np.func,
        types: tuple,
        args: tuple,
        kwargs: dict,
    ) -> DataFrame | Series | Any:
        """
        Apply `func` to the `BasePandasDataset`.

        This function implements NEP18-style dispatch for certain NumPy functions:
        https://numpy.org/neps/nep-0018-array-function-protocol.html#nep18

        By default, this function will transparently call __array__, followed by __array_function__
        on the returned NumPy array. We implement this function to prevent bugs with the extension
        system when another backend overrides this method.

        Parameters
        ----------
        func : np.func
            The NumPy func to apply.
        types : tuple
            The types of the args.
        args : tuple
            The args to the func.
        kwargs : dict
            Additional keyword arguments.

        Returns
        -------
        DataFrame | Series | Any
            The result of applying the function to this dataset. By default, it will return
            a NumPy array.
        """
        return self._query_compiler.do_array_function_implementation(
            self, func, types, args, kwargs
        )

    # namespace for additional Modin functions that are not available in Pandas
    modin: ModinAPI = CachedAccessor("modin", ModinAPI)

    @disable_logging
    def is_backend_pinned(self) -> bool:
        """
        Get whether this object's data is pinned to a particular backend.

        Returns
        -------
        bool
            True if the data is pinned.
        """
        return self._pinned

    def _set_backend_pinned(
        self, pinned: bool, inplace: bool = False
    ) -> Optional[Self]:
        """
        Update whether this object's data is pinned to a particular backend.

        Parameters
        ----------
        pinned : bool
            Whether the data is pinned.

        inplace : bool, default: False
            Whether to update the object in place.

        Returns
        -------
        Optional[Self]
            The object with the new pin state, if `inplace` is False. Otherwise, None.
        """
        change = (self.is_backend_pinned() and not pinned) or (
            not self.is_backend_pinned() and pinned
        )
        if inplace:
            self._pinned = pinned
            return None
        else:
            if change:
                new_obj = self.__constructor__(query_compiler=self._query_compiler)
                new_obj._pinned = pinned
                return new_obj
            return self

    @doc(SET_BACKEND_DOC, class_name=__qualname__)
    def set_backend(
        self, backend: str, inplace: bool = False, *, switch_operation: str = None
    ) -> Optional[Self]:
        # TODO(https://github.com/modin-project/modin/issues/7467): refactor
        # to avoid this cyclic import in most places we do I/O, e.g. in
        # modin/pandas/io.py
        from modin.core.execution.dispatching.factories.dispatcher import (
            FactoryDispatcher,
        )

        progress_split_count = 2
        progress_iter = iter(range(progress_split_count))
        self_backend = self.get_backend()
        normalized_backend = Backend.normalize(backend)
        if normalized_backend != self_backend:
            max_rows, max_cols = self._query_compiler._max_shape()
            # Format the transfer string to be relatively short, but informative. Each
            # backend is given an allowable width of 10 and the shape integers use the
            # general format to use scientific notation when needed.
            std_field_length = 10
            operation_str = switch_operation
            self_backend_str = self_backend
            normalized_backend_str = normalized_backend
            if switch_operation is None:
                operation_str = ""
            # Provide the switch_operation; and specifically only the method, so
            # DataFrame.merge would become "merge"
            operation_str = operation_str.split(".")[-1]
            # truncate all strings to the field length if needed
            if len(operation_str) > 15:
                operation_str = operation_str[: 15 - 3] + "..."
            if len(self_backend_str) > std_field_length:
                self_backend_str = self_backend_str[: std_field_length - 3] + "..."
            if len(normalized_backend_str) > std_field_length:
                normalized_backend_str = (
                    normalized_backend_str[: std_field_length - 3] + "..."
                )

            # format the estimated max shape
            max_shape_str = f"({max_rows:.0g}, {max_cols:.0g})"
            desc = (
                f"Transfer: {self_backend_str:>10.10} → {normalized_backend_str:<10.10} "
                + f" | {operation_str:^15.15} ≃ {max_shape_str:<10}"
            )

            if ShowBackendSwitchProgress.get():
                try:
                    from tqdm.auto import trange

                    progress_iter = iter(
                        trange(
                            progress_split_count, desc=desc, bar_format="{desc} [{bar}]"
                        )
                    )
                except ImportError:
                    # Fallback to simple print statement when tqdm is not available.
                    # Print to stderr to match tqdm's behavior.

                    print(desc, file=sys.stderr)  # noqa: T201
            else:
                # Use a dummy progress iterator with no side effects if we do
                # not want to show the progress.
                progress_iter = iter(range(progress_split_count))
        else:
            return None if inplace else self
        # If tqdm is imported and a conversion is necessary, then display a progress bar.
        # Otherwise, use fallback print statements.
        next(progress_iter)

        # Attempt to transfer data based on the following preference order.
        # 1. The `self._query_compiler.move_to()`, if implemented.
        # 2. Otherwise, tries the other `query_compiler`'s `move_from()` method.
        # 3. If both methods return `NotImplemented`, it falls back to materializing
        #    as a pandas DataFrame, and then creates a new `query_compiler` on the
        #    specified backend using `from_pandas`.
        query_compiler = self._query_compiler.move_to(backend)
        if query_compiler is NotImplemented:
            query_compiler = FactoryDispatcher._get_prepared_factory_for_backend(
                backend
            ).io_cls.query_compiler_cls.move_from(
                self._query_compiler,
            )
        if query_compiler is NotImplemented:
            pandas_self = self._query_compiler.to_pandas()
            next(progress_iter)
            query_compiler = FactoryDispatcher.from_pandas(
                df=pandas_self, backend=backend
            )
        else:
            next(progress_iter)
        try:
            next(progress_iter)
        except StopIteration:
            # Last call to next informs tqdm that the operation is done
            pass
        if inplace:
            self._update_inplace(query_compiler)
            # Always unpin after an explicit set_backend operation
            self._pinned = False
            return None
        else:
            return self.__constructor__(query_compiler=query_compiler)

    move_to = set_backend

    @doc(GET_BACKEND_DOC, class_name=__qualname__)
    @disable_logging
    def get_backend(self) -> str:
        return self._query_compiler.get_backend()

    @disable_logging
    def __setattr__(self, key: str, value: Any) -> None:
        """
        Set attribute on this `BasePandasDataset`.

        Parameters
        ----------
        key : str
            The attribute name.
        value : Any
            The attribute value.

        Returns
        -------
        None
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(key, __class__._extensions)
        if extension is not sentinel and hasattr(extension, "__set__"):
            return extension.__set__(self, value)
        return super().__setattr__(key, value)

    @disable_logging
    def __delattr__(self, name) -> None:
        """
        Delete attribute on this `BasePandasDataset`.

        Parameters
        ----------
        name : str
            The attribute name.

        Returns
        -------
        None
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(name, __class__._extensions)
        if extension is not sentinel and hasattr(extension, "__delete__"):
            return extension.__delete__(self)
        return super().__delattr__(name)

    @disable_logging
    @_inherit_docstrings(QueryCompilerCaster._get_query_compiler)
    def _get_query_compiler(self):
        return getattr(self, "_query_compiler", None)


================================================
FILE: modin/pandas/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses ``DataFrame`` class, that is distributed version of ``pandas.DataFrame``."""

from __future__ import annotations

import datetime
import functools
import itertools
import os
import re
import sys
import warnings
from typing import (
    IO,
    TYPE_CHECKING,
    Any,
    Hashable,
    Iterable,
    Iterator,
    Optional,
    Sequence,
    Union,
)

import numpy as np
import pandas
from pandas import Categorical
from pandas._libs import lib
from pandas._typing import (
    CompressionOptions,
    FilePath,
    IndexLabel,
    Scalar,
    StorageOptions,
    WriteBuffer,
)
from pandas.core.common import apply_if_callable, get_cython_func
from pandas.core.dtypes.common import (
    infer_dtype_from_object,
    is_dict_like,
    is_list_like,
    is_numeric_dtype,
)
from pandas.core.indexes.frozen import FrozenList
from pandas.io.formats.info import DataFrameInfo
from pandas.util._decorators import doc
from pandas.util._validators import validate_bool_kwarg

from modin.config import PersistentPickle
from modin.core.storage_formats.pandas.query_compiler_caster import (
    EXTENSION_DICT_TYPE,
    EXTENSION_NO_LOOKUP,
)
from modin.error_message import ErrorMessage
from modin.logging import disable_logging
from modin.pandas.io import from_non_pandas, from_pandas, to_pandas
from modin.utils import (
    MODIN_UNNAMED_SERIES_LABEL,
    _inherit_docstrings,
    expanduser_path_arg,
    hashable,
    import_optional_dependency,
    sentinel,
    try_cast_to_pandas,
)

from .accessor import CachedAccessor, SparseFrameAccessor
from .base import _ATTRS_NO_LOOKUP, BasePandasDataset
from .groupby import DataFrameGroupBy
from .iterator import PartitionIterator
from .series import Series
from .utils import (
    GET_BACKEND_DOC,
    SET_BACKEND_DOC,
    SET_DATAFRAME_ATTRIBUTE_WARNING,
    _doc_binary_op,
    cast_function_modin2pandas,
)

if TYPE_CHECKING:
    from typing_extensions import Self

    from modin.core.storage_formats import BaseQueryCompiler


@_inherit_docstrings(
    pandas.DataFrame, excluded=[pandas.DataFrame.__init__], apilink="pandas.DataFrame"
)
class DataFrame(BasePandasDataset):
    """
    Modin distributed representation of ``pandas.DataFrame``.

    Internally, the data can be divided into partitions along both columns and rows
    in order to parallelize computations and utilize the user's hardware as much as possible.

    Inherit common for ``DataFrame``-s and ``Series`` functionality from the
    `BasePandasDataset` class.

    Parameters
    ----------
    data : DataFrame, Series, pandas.DataFrame, ndarray, Iterable or dict, optional
        Dict can contain ``Series``, arrays, constants, dataclass or list-like objects.
        If data is a dict, column order follows insertion-order.
    index : Index or array-like, optional
        Index to use for resulting frame. Will default to ``RangeIndex`` if no
        indexing information part of input data and no index provided.
    columns : Index or array-like, optional
        Column labels to use for resulting frame. Will default to
        ``RangeIndex`` if no column labels are provided.
    dtype : str, np.dtype, or pandas.ExtensionDtype, optional
        Data type to force. Only a single dtype is allowed. If None, infer.
    copy : bool, default: False
        Copy data from inputs. Only affects ``pandas.DataFrame`` / 2d ndarray input.
    query_compiler : BaseQueryCompiler, optional
        A query compiler object to create the ``DataFrame`` from.

    Notes
    -----
    ``DataFrame`` can be created either from passed `data` or `query_compiler`. If both
    parameters are provided, data source will be prioritized in the next order:

    1) Modin ``DataFrame`` or ``Series`` passed with `data` parameter.
    2) Query compiler from the `query_compiler` parameter.
    3) Various pandas/NumPy/Python data structures passed with `data` parameter.

    The last option is less desirable since import of such data structures is very
    inefficient, please use previously created Modin structures from the fist two
    options or import data using highly efficient Modin IO tools (for example
    ``pd.read_csv``).
    """

    _pandas_class = pandas.DataFrame
    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)

    def __init__(
        self,
        data=None,
        index=None,
        columns=None,
        dtype=None,
        copy=None,
        query_compiler: BaseQueryCompiler = None,
    ) -> None:
        from modin.numpy import array

        # Siblings are other dataframes that share the same query compiler. We
        # use this list to update inplace when there is a shallow copy.
        self._siblings = []
        if isinstance(data, (DataFrame, Series)):
            self._query_compiler = data._query_compiler.copy()
            if index is not None and any(i not in data.index for i in index):
                raise NotImplementedError(
                    "Passing non-existant columns or index values to constructor not"
                    + " yet implemented."
                )
            if isinstance(data, Series):
                # We set the column name if it is not in the provided Series
                if data.name is None:
                    self.columns = [0] if columns is None else columns
                # If the columns provided are not in the named Series, pandas clears
                # the DataFrame and sets columns to the columns provided.
                elif columns is not None and data.name not in columns:
                    self._query_compiler = from_pandas(
                        pandas.DataFrame(columns=columns)
                    )._query_compiler
                if index is not None:
                    self._query_compiler = data.loc[index]._query_compiler
            elif columns is None and index is None:
                data._add_sibling(self)
            else:
                if columns is not None and any(i not in data.columns for i in columns):
                    raise NotImplementedError(
                        "Passing non-existant columns or index values to constructor not"
                        + " yet implemented."
                    )
                if index is None:
                    index = slice(None)
                if columns is None:
                    columns = slice(None)
                self._query_compiler = data.loc[index, columns]._query_compiler
        elif isinstance(data, array):
            self._query_compiler = data._query_compiler.copy()
            if copy is not None and not copy:
                data._add_sibling(self)
            if columns is not None and not isinstance(columns, pandas.Index):
                columns = pandas.Index(columns)
            if columns is not None:
                obj_with_new_columns = self.set_axis(columns, axis=1, copy=False)
                self._update_inplace(obj_with_new_columns._query_compiler)
            if index is not None:
                obj_with_new_index = self.set_axis(index, axis=0, copy=False)
                self._update_inplace(obj_with_new_index._query_compiler)
            if dtype is not None:
                casted_obj = self.astype(dtype, copy=False)
                self._query_compiler = casted_obj._query_compiler
        # Check type of data and use appropriate constructor
        elif query_compiler is None:
            distributed_frame = from_non_pandas(data, index, columns, dtype)
            if distributed_frame is not None:
                self._query_compiler = distributed_frame._query_compiler
                return

            if isinstance(data, pandas.Index):
                pass
            elif (
                is_list_like(data)
                and not is_dict_like(data)
                and not isinstance(data, np.ndarray)
            ):
                old_dtype = getattr(data, "dtype", None)
                values = [
                    obj._to_pandas() if isinstance(obj, Series) else obj for obj in data
                ]
                try:
                    data = type(data)(values, dtype=old_dtype)
                except TypeError:
                    data = values
            elif is_dict_like(data) and not isinstance(
                data, (pandas.Series, Series, pandas.DataFrame, DataFrame)
            ):
                if columns is not None:
                    data = {key: value for key, value in data.items() if key in columns}

                if len(data) and all(isinstance(v, Series) for v in data.values()):
                    from .general import concat

                    new_qc = concat(
                        data.values(), axis=1, keys=data.keys()
                    )._query_compiler

                    if dtype is not None:
                        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
                    if index is not None:
                        new_qc = new_qc.reindex(axis=0, labels=index)
                    if columns is not None:
                        new_qc = new_qc.reindex(axis=1, labels=columns)

                    self._query_compiler = new_qc
                    return

                data = {
                    k: v._to_pandas() if isinstance(v, Series) else v
                    for k, v in data.items()
                }
            pandas_df = pandas.DataFrame(
                data=data, index=index, columns=columns, dtype=dtype, copy=copy
            )
            if pandas_df.size >= 1_000_000:
                warnings.warn(
                    "Distributing {} object. This may take some time.".format(
                        type(data)
                    )
                )
            self._query_compiler = from_pandas(pandas_df)._query_compiler
        else:
            self._query_compiler = query_compiler

    def __repr__(self) -> str:
        """
        Return a string representation for a particular ``DataFrame``.

        Returns
        -------
        str
        """
        num_rows = pandas.get_option("display.max_rows") or len(self)
        num_cols = pandas.get_option(
            "display.max_columns"
        ) or self._query_compiler.get_axis_len(1)
        result = repr(self._build_repr_df(num_rows, num_cols))
        if len(self) > num_rows or self._query_compiler.get_axis_len(1) > num_cols:
            # The split here is so that we don't repr pandas row lengths.
            return result.rsplit("\n\n", 1)[0] + "\n\n[{0} rows x {1} columns]".format(
                *self.shape
            )
        else:
            return result

    def _repr_html_(self) -> str:  # pragma: no cover
        """
        Return a html representation for a particular ``DataFrame``.

        Returns
        -------
        str
        """
        num_rows = pandas.get_option("display.max_rows") or 60
        num_cols = pandas.get_option("display.max_columns") or 20

        # We use pandas _repr_html_ to get a string of the HTML representation
        # of the dataframe.
        result = self._build_repr_df(num_rows, num_cols)._repr_html_()
        if len(self) > num_rows or self._query_compiler.get_axis_len(1) > num_cols:
            # We split so that we insert our correct dataframe dimensions.
            return result.split("<p>")[
                0
            ] + "<p>{0} rows x {1} columns</p>\n</div>".format(*self.shape)
        else:
            return result

    def _get_columns(self) -> pandas.Index:
        """
        Get the columns for this ``DataFrame``.

        Returns
        -------
        pandas.Index
            The union of all indexes across the partitions.
        """
        return self._query_compiler.columns

    def _set_columns(self, new_columns) -> None:
        """
        Set the columns for this ``DataFrame``.

        Parameters
        ----------
        new_columns : list-like, Index
            The new index to set.
        """
        self._query_compiler.columns = new_columns

    columns: pandas.Index = property(_get_columns, _set_columns)

    @property
    def ndim(self) -> int:  # noqa: RT01, D200
        """
        Return the number of dimensions of the underlying data, by definition 2.
        """
        return 2

    def drop_duplicates(
        self, subset=None, *, keep="first", inplace=False, ignore_index=False
    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200
        """
        Return ``DataFrame`` with duplicate rows removed.
        """
        return super(DataFrame, self).drop_duplicates(
            subset=subset, keep=keep, inplace=inplace, ignore_index=ignore_index
        )

    @property
    def dtypes(self) -> pandas.Series:  # noqa: RT01, D200
        """
        Return the dtypes in the ``DataFrame``.
        """
        return self._query_compiler.dtypes

    def duplicated(self, subset=None, keep="first") -> Series:  # noqa: PR01, RT01, D200
        """
        Return boolean ``Series`` denoting duplicate rows.
        """
        df = self[subset] if subset is not None else self
        new_qc = df._query_compiler.duplicated(keep=keep)
        duplicates = self._reduce_dimension(new_qc)
        return duplicates

    @property
    def empty(self) -> bool:  # noqa: RT01, D200
        """
        Indicate whether ``DataFrame`` is empty.
        """
        return self._query_compiler.get_axis_len(1) == 0 or len(self) == 0

    @property
    def axes(self) -> list[pandas.Index]:  # noqa: RT01, D200
        """
        Return a list representing the axes of the ``DataFrame``.
        """
        return [self.index, self.columns]

    @property
    def shape(self) -> tuple[int, int]:  # noqa: RT01, D200
        """
        Return a tuple representing the dimensionality of the ``DataFrame``.
        """
        return len(self), self._query_compiler.get_axis_len(1)

    def add_prefix(self, prefix, axis=None) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Prefix labels with string `prefix`.
        """
        axis = 1 if axis is None else self._get_axis_number(axis)
        return self.__constructor__(
            query_compiler=self._query_compiler.add_prefix(prefix, axis)
        )

    def add_suffix(self, suffix, axis=None) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Suffix labels with string `suffix`.
        """
        axis = 1 if axis is None else self._get_axis_number(axis)
        return self.__constructor__(
            query_compiler=self._query_compiler.add_suffix(suffix, axis)
        )

    def map(self, func, na_action: Optional[str] = None, **kwargs) -> DataFrame:
        if not callable(func):
            raise ValueError("'{0}' object is not callable".format(type(func)))
        return self.__constructor__(
            query_compiler=self._query_compiler.map(func, na_action=na_action, **kwargs)
        )

    def applymap(self, func, na_action: Optional[str] = None, **kwargs) -> DataFrame:
        warnings.warn(
            "DataFrame.applymap has been deprecated. Use DataFrame.map instead.",
            FutureWarning,
        )
        return self.map(func, na_action=na_action, **kwargs)

    def apply(
        self,
        func,
        axis=0,
        raw=False,
        result_type=None,
        args=(),
        by_row="compat",
        engine="python",
        engine_kwargs=None,
        **kwargs,
    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Apply a function along an axis of the ``DataFrame``.
        """
        if by_row != "compat" or engine != "python" or engine_kwargs:
            # TODO: add test
            return self._default_to_pandas(
                pandas.DataFrame.apply,
                func=func,
                axis=axis,
                raw=raw,
                result_type=result_type,
                args=args,
                by_row=by_row,
                engine=engine,
                engine_kwargs=engine_kwargs,
                **kwargs,
            )

        func = cast_function_modin2pandas(func)
        axis = self._get_axis_number(axis)
        query_compiler = super(DataFrame, self).apply(
            func,
            axis=axis,
            raw=raw,
            result_type=result_type,
            args=args,
            **kwargs,
        )
        if not isinstance(query_compiler, type(self._query_compiler)):
            # A scalar was returned
            return query_compiler

        if result_type == "reduce":
            output_type = Series
        elif result_type == "broadcast":
            output_type = DataFrame
        # the 'else' branch also handles 'result_type == "expand"' since it makes the output type
        # depend on the `func` result (Series for a scalar, DataFrame for list-like)
        else:
            reduced_index = pandas.Index([MODIN_UNNAMED_SERIES_LABEL])
            if query_compiler.get_axis(axis).equals(
                reduced_index
            ) or query_compiler.get_axis(axis ^ 1).equals(reduced_index):
                output_type = Series
            else:
                output_type = DataFrame

        return output_type(query_compiler=query_compiler)

    def groupby(
        self,
        by=None,
        axis=lib.no_default,
        level=None,
        as_index=True,
        sort=True,
        group_keys=True,
        observed=lib.no_default,
        dropna: bool = True,
    ):  # noqa: PR01, RT01, D200
        """
        Group ``DataFrame`` using a mapper or by a ``Series`` of columns.
        """
        if axis is not lib.no_default:
            axis = self._get_axis_number(axis)
            if axis == 1:
                warnings.warn(
                    "DataFrame.groupby with axis=1 is deprecated. Do "
                    + "`frame.T.groupby(...)` without axis instead.",
                    FutureWarning,
                )
            else:
                warnings.warn(
                    "The 'axis' keyword in DataFrame.groupby is deprecated and "
                    + "will be removed in a future version.",
                    FutureWarning,
                )
        else:
            axis = 0

        axis = self._get_axis_number(axis)
        idx_name = None
        # Drop here indicates whether or not to drop the data column before doing the
        # groupby. The typical pandas behavior is to drop when the data came from this
        # dataframe. When a string, Series directly from this dataframe, or list of
        # strings is passed in, the data used for the groupby is dropped before the
        # groupby takes place.
        drop = False

        return_tuple_when_iterating = False
        if (
            not isinstance(by, (pandas.Series, Series))
            and is_list_like(by)
            and len(by) == 1
        ):
            by = by[0]
            return_tuple_when_iterating = True

        if callable(by):
            by = self.index.map(by)
        elif hashable(by) and not isinstance(by, (pandas.Grouper, FrozenList)):
            drop = by in self.columns
            idx_name = by
            if by is not None and by in self._query_compiler.get_index_names(axis):
                # In this case we pass the string value of the name through to the
                # partitions. This is more efficient than broadcasting the values.
                level, by = by, None
            elif level is None:
                by = self.__getitem__(by)._query_compiler
        elif isinstance(by, Series):
            drop = by._parent is self
            idx_name = by.name
            by = by._query_compiler
        elif isinstance(by, pandas.Grouper):
            drop = by.key in self
        elif is_list_like(by):
            # fastpath for multi column groupby
            if axis == 0 and all(
                (
                    (hashable(o) and (o in self))
                    or isinstance(o, Series)
                    or (isinstance(o, pandas.Grouper) and o.key in self)
                    or (is_list_like(o) and len(o) == len(self._get_axis(axis)))
                )
                for o in by
            ):
                has_external = False
                processed_by = []

                for current_by in by:
                    if isinstance(current_by, pandas.Grouper):
                        processed_by.append(current_by)
                        has_external = True
                    elif hashable(current_by):
                        processed_by.append(current_by)
                    elif isinstance(current_by, Series):
                        if current_by._parent is self:
                            processed_by.append(current_by.name)
                        else:
                            processed_by.append(current_by._query_compiler)
                            has_external = True
                    else:
                        has_external = True
                        processed_by.append(current_by)

                by = processed_by

                if not has_external:
                    by = self[processed_by]._query_compiler

                drop = True
            else:
                mismatch = len(by) != len(self._get_axis(axis))
                if mismatch and all(
                    hashable(obj)
                    and (
                        obj in self or obj in self._query_compiler.get_index_names(axis)
                    )
                    for obj in by
                ):
                    # In the future, we will need to add logic to handle this, but for now
                    # we default to pandas in this case.
                    pass
                elif mismatch and any(
                    hashable(obj) and obj not in self.columns for obj in by
                ):
                    names = [o.name if isinstance(o, Series) else o for o in by]
                    raise KeyError(next(x for x in names if x not in self))
        return DataFrameGroupBy(
            self,
            by,
            axis,
            level,
            as_index,
            sort,
            group_keys,
            idx_name,
            observed=observed,
            drop=drop,
            dropna=dropna,
            return_tuple_when_iterating=return_tuple_when_iterating,
            backend_pinned=self.is_backend_pinned(),
        )

    def keys(self) -> pandas.Index:  # noqa: RT01, D200
        """
        Get columns of the ``DataFrame``.
        """
        return self.columns

    def transpose(self, copy=False, *args) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Transpose index and columns.
        """
        # FIXME: Judging by pandas docs `*args` serves only compatibility purpose
        # and does not affect the result, we shouldn't pass it to the query compiler.
        return self.__constructor__(
            query_compiler=self._query_compiler.transpose(*args)
        )

    # To enable dynamic backend switching, we must use a `def` so the lookup of `self.transpose`
    # is performed dynamically, whereas declaring `T = property(transpose)` makes it always use
    # the originally-defined version without the switching wrapper.
    @property
    def T(self) -> DataFrame:
        return self.transpose()

    def add(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get addition of ``DataFrame`` and `other`, element-wise (binary operator `add`).
        """
        return self._binary_op(
            "add",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def assign(self, **kwargs) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Assign new columns to a ``DataFrame``.
        """
        df = self.copy()
        for k, v in kwargs.items():
            if callable(v):
                df[k] = v(df)
            else:
                df[k] = v
        return df

    def boxplot(
        self,
        column=None,
        by=None,
        ax=None,
        fontsize=None,
        rot=0,
        grid=True,
        figsize=None,
        layout=None,
        return_type=None,
        backend=None,
        **kwargs,
    ):  # noqa: PR01, RT01, D200
        """
        Make a box plot from ``DataFrame`` columns.
        """
        return to_pandas(self).boxplot(
            column=column,
            by=by,
            ax=ax,
            fontsize=fontsize,
            rot=rot,
            grid=grid,
            figsize=figsize,
            layout=layout,
            return_type=return_type,
            backend=backend,
            **kwargs,
        )

    def combine(
        self, other, func, fill_value=None, overwrite=True
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Perform column-wise combine with another ``DataFrame``.
        """
        return super(DataFrame, self).combine(
            other, func, fill_value=fill_value, overwrite=overwrite
        )

    def compare(
        self,
        other,
        align_axis=1,
        keep_shape: bool = False,
        keep_equal: bool = False,
        result_names=("self", "other"),
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Compare to another ``DataFrame`` and show the differences.
        """
        if not isinstance(other, DataFrame):
            raise TypeError(f"Cannot compare DataFrame to {type(other)}")
        other = self._validate_other(other, 0, compare_index=True)
        return self.__constructor__(
            query_compiler=self._query_compiler.compare(
                other,
                align_axis=align_axis,
                keep_shape=keep_shape,
                keep_equal=keep_equal,
                result_names=result_names,
            )
        )

    def corr(
        self, method="pearson", min_periods=1, numeric_only=False
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Compute pairwise correlation of columns, excluding NA/null values.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.corr(
                method=method,
                min_periods=min_periods,
                numeric_only=numeric_only,
            )
        )

    def corrwith(
        self, other, axis=0, drop=False, method="pearson", numeric_only=False
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Compute pairwise correlation.
        """
        if not isinstance(other, (Series, DataFrame)):
            raise TypeError(f"unsupported type: {type(other)}")
        return self.__constructor__(
            query_compiler=self._query_compiler.corrwith(
                other=other._query_compiler,
                axis=axis,
                drop=drop,
                method=method,
                numeric_only=numeric_only,
            )
        )

    def cov(
        self, min_periods=None, ddof: Optional[int] = 1, numeric_only=False
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Compute pairwise covariance of columns, excluding NA/null values.
        """
        cov_df = self
        if numeric_only:
            cov_df = self.drop(
                columns=[
                    i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])
                ]
            )

        if min_periods is not None and min_periods > len(cov_df):
            result = np.empty((cov_df.shape[1], cov_df.shape[1]))
            result.fill(np.nan)
            return cov_df.__constructor__(result)

        return cov_df.__constructor__(
            query_compiler=cov_df._query_compiler.cov(
                min_periods=min_periods, ddof=ddof
            )
        )

    def dot(self, other) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Compute the matrix multiplication between the ``DataFrame`` and `other`.
        """
        if isinstance(other, BasePandasDataset):
            common = self.columns.union(other.index)
            if len(common) > self._query_compiler.get_axis_len(1) or len(common) > len(
                other
            ):
                raise ValueError("Matrices are not aligned")

            qc = other.reindex(index=common)._query_compiler
            if isinstance(other, DataFrame):
                return self.__constructor__(
                    query_compiler=self._query_compiler.dot(
                        qc, squeeze_self=False, squeeze_other=False
                    )
                )
            else:
                return self._reduce_dimension(
                    query_compiler=self._query_compiler.dot(
                        qc, squeeze_self=False, squeeze_other=True
                    )
                )

        other = np.asarray(other)
        if self.shape[1] != other.shape[0]:
            raise ValueError(
                "Dot product shape mismatch, {} vs {}".format(self.shape, other.shape)
            )

        if len(other.shape) > 1:
            return self.__constructor__(
                query_compiler=self._query_compiler.dot(other, squeeze_self=False)
            )

        return self._reduce_dimension(
            query_compiler=self._query_compiler.dot(other, squeeze_self=False)
        )

    def eq(
        self, other, axis="columns", level=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Perform equality comparison of ``DataFrame`` and `other` (binary operator `eq`).
        """
        return self._binary_op(
            "eq", other, axis=axis, level=level, broadcast=isinstance(other, Series)
        )

    def equals(self, other) -> bool:  # noqa: PR01, RT01, D200
        """
        Test whether two objects contain the same elements.
        """
        if isinstance(other, pandas.DataFrame):
            # Copy into a Modin DataFrame to simplify logic below
            other = self.__constructor__(other)

        if (
            type(self) is not type(other)
            or not self.index.equals(other.index)
            or not self.columns.equals(other.columns)
        ):
            return False

        result = self.__constructor__(
            query_compiler=self._query_compiler.equals(other._query_compiler)
        )
        return result.all(axis=None)

    def _update_var_dicts_in_kwargs(self, expr, kwargs) -> None:
        """
        Copy variables with "@" prefix in `local_dict` and `global_dict` keys of kwargs.

        Parameters
        ----------
        expr : str
            The expression string to search variables with "@" prefix.
        kwargs : dict
            See the documentation for eval() for complete details on the keyword arguments accepted by query().
        """
        if "@" not in expr:
            return
        frame = sys._getframe()
        try:
            # TODO(https://github.com/modin-project/modin/issues/4478): fix this
            f_locals = frame.f_back.f_back.f_back.f_back.f_back.f_back.f_locals
            f_globals = frame.f_back.f_back.f_back.f_back.f_back.f_back.f_globals
        finally:
            del frame
        local_names = set(re.findall(r"@([\w]+)", expr))
        local_dict = {}
        global_dict = {}

        for name in local_names:
            for dct_out, dct_in in ((local_dict, f_locals), (global_dict, f_globals)):
                try:
                    dct_out[name] = dct_in[name]
                except KeyError:
                    pass

        if local_dict:
            local_dict.update(kwargs.get("local_dict") or {})
            kwargs["local_dict"] = local_dict
        if global_dict:
            global_dict.update(kwargs.get("global_dict") or {})
            kwargs["global_dict"] = global_dict

    def eval(self, expr, inplace=False, **kwargs):  # noqa: PR01, RT01, D200
        """
        Evaluate a string describing operations on ``DataFrame`` columns.
        """
        from modin.core.computation.eval import _check_engine

        self._update_var_dicts_in_kwargs(expr, kwargs)
        inplace = validate_bool_kwarg(inplace, "inplace")

        if _check_engine(kwargs.get("engine", None)) == "numexpr":
            # on numexpr engine, pandas.eval returns np.array if input is not of pandas
            # type, so we can't use pandas eval [1]. Even if we could, pandas eval seems
            # to convert all the data to numpy and then do the numexpr add, which is
            # slow for modin. The user would not really be getting the benefit of
            # numexpr.
            # [1] https://github.com/pandas-dev/pandas/blob/934eebb532cf50e872f40638a788000be6e4dda4/pandas/core/computation/align.py#L78
            return self._default_to_pandas(
                pandas.DataFrame.eval, expr, inplace=inplace, **kwargs
            )

        from modin.core.computation.eval import eval as _eval

        kwargs["level"] = kwargs.pop("level", 0) + 1
        index_resolvers = self._get_index_resolvers()
        column_resolvers = self._get_cleaned_column_resolvers()
        resolvers = column_resolvers, index_resolvers
        if "target" not in kwargs:
            kwargs["target"] = self
        kwargs["resolvers"] = tuple(kwargs.get("resolvers", ())) + resolvers

        return _eval(expr, inplace=inplace, **kwargs)

    def fillna(
        self,
        value=None,
        *,
        method=None,
        axis=None,
        inplace=False,
        limit=None,
        downcast=lib.no_default,
    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200
        """
        Fill NA/NaN values using the specified method.
        """
        return super(DataFrame, self).fillna(
            squeeze_self=False,
            squeeze_value=isinstance(value, Series),
            value=value,
            method=method,
            axis=axis,
            inplace=inplace,
            limit=limit,
            downcast=downcast,
        )

    def floordiv(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get integer division of ``DataFrame`` and `other`, element-wise (binary operator `floordiv`).
        """
        return self._binary_op(
            "floordiv",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    @classmethod
    def from_dict(
        cls, data, orient="columns", dtype=None, columns=None
    ) -> DataFrame:  # pragma: no cover # noqa: PR01, RT01, D200
        """
        Construct ``DataFrame`` from dict of array-like or dicts.
        """
        ErrorMessage.default_to_pandas("`from_dict`")
        return from_pandas(
            pandas.DataFrame.from_dict(
                data, orient=orient, dtype=dtype, columns=columns
            )
        )

    @classmethod
    def from_records(
        cls,
        data,
        index=None,
        exclude=None,
        columns=None,
        coerce_float=False,
        nrows=None,
    ) -> DataFrame:  # pragma: no cover # noqa: PR01, RT01, D200
        """
        Convert structured or record ndarray to ``DataFrame``.
        """
        ErrorMessage.default_to_pandas("`from_records`")
        return from_pandas(
            pandas.DataFrame.from_records(
                data,
                index=index,
                exclude=exclude,
                columns=columns,
                coerce_float=coerce_float,
                nrows=nrows,
            )
        )

    def ge(
        self, other, axis="columns", level=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get greater than or equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `ge`).
        """
        return self._binary_op(
            "ge", other, axis=axis, level=level, broadcast=isinstance(other, Series)
        )

    def gt(
        self, other, axis="columns", level=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get greater than comparison of ``DataFrame`` and `other`, element-wise (binary operator `ge`).
        """
        return self._binary_op(
            "gt", other, axis=axis, level=level, broadcast=isinstance(other, Series)
        )

    def hist(
        data,
        column: IndexLabel | None = None,
        by=None,
        grid: bool = True,
        xlabelsize: int | None = None,
        xrot: float | None = None,
        ylabelsize: int | None = None,
        yrot: float | None = None,
        ax=None,
        sharex: bool = False,
        sharey: bool = False,
        figsize: tuple[int, int] | None = None,
        layout: tuple[int, int] | None = None,
        bins: int | Sequence[int] = 10,
        backend: str | None = None,
        legend: bool = False,
        **kwargs,
    ):  # pragma: no cover # noqa: PR01, RT01, D200
        """
        Make a histogram of the ``DataFrame``.
        """
        return data._default_to_pandas(
            pandas.DataFrame.hist,
            column=column,
            by=by,
            grid=grid,
            xlabelsize=xlabelsize,
            xrot=xrot,
            ylabelsize=ylabelsize,
            yrot=yrot,
            ax=ax,
            sharex=sharex,
            sharey=sharey,
            figsize=figsize,
            layout=layout,
            bins=bins,
            backend=backend,
            legend=legend,
            **kwargs,
        )

    def info(
        self,
        verbose: Optional[bool] = None,
        buf: Optional[IO[str]] = None,
        max_cols: Optional[int] = None,
        memory_usage: Optional[Union[bool, str]] = None,
        show_counts: Optional[bool] = None,
    ) -> None:  # noqa: PR01, D200
        """
        Print a concise summary of the ``DataFrame``.
        """
        info = DataFrameInfo(
            data=self,
            memory_usage=memory_usage,
        )
        info.render(
            buf=buf,
            max_cols=max_cols,
            verbose=verbose,
            show_counts=show_counts,
        )

    def insert(
        self, loc, column, value, allow_duplicates=lib.no_default
    ) -> None:  # noqa: PR01, D200
        """
        Insert column into ``DataFrame`` at specified location.
        """
        from modin.numpy import array

        if (
            isinstance(value, (DataFrame, pandas.DataFrame))
            or isinstance(value, (array, np.ndarray))
            and len(value.shape) > 1
        ):
            if isinstance(value, (array, np.ndarray)) and value.shape[1] != 1:
                raise ValueError(
                    f"Expected a 1D array, got an array with shape {value.shape}"
                )
            elif (
                isinstance(value, (DataFrame, pandas.DataFrame)) and value.shape[1] != 1
            ):
                raise ValueError(
                    "Expected a one-dimensional object, got a DataFrame with "
                    + f"{len(value.columns)} columns instead."
                )
            value = value.squeeze(axis=1)
        if not self._query_compiler.lazy_row_count and len(self) == 0:
            if not hasattr(value, "index"):
                try:
                    value = pandas.Series(value)
                except (TypeError, ValueError, IndexError):
                    raise ValueError(
                        "Cannot insert into a DataFrame with no defined index "
                        + "and a value that cannot be converted to a "
                        + "Series"
                    )
            new_index = value.index.copy()
            new_columns = self.columns.insert(loc, column)
            new_query_compiler = self.__constructor__(
                value, index=new_index, columns=new_columns
            )._query_compiler
        elif self._query_compiler.get_axis_len(1) == 0 and loc == 0:
            new_index = self.index
            new_query_compiler = self.__constructor__(
                data=value,
                columns=[column],
                index=None if len(new_index) == 0 else new_index,
            )._query_compiler
        else:
            if (
                is_list_like(value)
                and not isinstance(value, (pandas.Series, Series))
                and len(value) != len(self)
            ):
                raise ValueError(
                    "Length of values ({}) does not match length of index ({})".format(
                        len(value), len(self)
                    )
                )
            if allow_duplicates is not True and column in self.columns:
                raise ValueError(f"cannot insert {column}, already exists")
            columns_len = self._query_compiler.get_axis_len(1)
            if not -columns_len <= loc <= columns_len:
                raise IndexError(
                    f"index {loc} is out of bounds for axis 0 with size {columns_len}"
                )
            elif loc < 0:
                raise ValueError("unbounded slice")
            if isinstance(value, (Series, array)):
                value = value._query_compiler
            new_query_compiler = self._query_compiler.insert(loc, column, value)

        self._update_inplace(new_query_compiler=new_query_compiler)

    def isna(self) -> DataFrame:
        """
        Detect missing values.

        Returns
        -------
        DataFrame
            The result of detecting missing values.
        """
        return super(DataFrame, self).isna()

    def isnull(self) -> DataFrame:
        """
        Detect missing values.

        Returns
        -------
        DataFrame
            The result of detecting missing values.
        """
        return super(DataFrame, self).isnull()

    def iterrows(self) -> Iterable[tuple[Hashable, Series]]:  # noqa: D200
        """
        Iterate over ``DataFrame`` rows as (index, ``Series``) pairs.
        """

        def iterrow_builder(s):
            """Return tuple of the given `s` parameter name and the parameter themself."""
            return s.name, s

        partition_iterator = PartitionIterator(self, 0, iterrow_builder)
        for v in partition_iterator:
            yield v

    def items(self) -> Iterable[tuple[Hashable, Series]]:  # noqa: D200
        """
        Iterate over (column name, ``Series``) pairs.
        """

        def items_builder(s):
            """Return tuple of the given `s` parameter name and the parameter themself."""
            return s.name, s

        partition_iterator = PartitionIterator(self, 1, items_builder)
        for v in partition_iterator:
            yield v

    def itertuples(
        self, index=True, name="Pandas"
    ) -> Iterable[tuple[Any, ...]]:  # noqa: PR01, D200
        """
        Iterate over ``DataFrame`` rows as ``namedtuple``-s.
        """

        def itertuples_builder(s):
            """Return the next ``namedtuple``."""
            return next(s._to_pandas().to_frame().T.itertuples(index=index, name=name))

        partition_iterator = PartitionIterator(self, 0, itertuples_builder)
        for v in partition_iterator:
            yield v

    def join(
        self,
        other,
        on=None,
        how="left",
        lsuffix="",
        rsuffix="",
        sort=False,
        validate=None,
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Join columns of another ``DataFrame``.
        """
        if on is not None and not isinstance(other, (Series, DataFrame)):
            raise ValueError(
                "Joining multiple DataFrames only supported for joining on index"
            )
        if validate is not None:
            return self._default_to_pandas(
                pandas.DataFrame.join,
                other,
                on=on,
                how=how,
                lsuffix=lsuffix,
                rsuffix=rsuffix,
                sort=sort,
                validate=validate,
            )

        if isinstance(other, Series):
            if other.name is None:
                raise ValueError("Other Series must have a name")
            other = self.__constructor__(other)
        if on is not None or how == "cross":
            return self.__constructor__(
                query_compiler=self._query_compiler.join(
                    other._query_compiler,
                    on=on,
                    how=how,
                    lsuffix=lsuffix,
                    rsuffix=rsuffix,
                    sort=sort,
                    validate=validate,
                )
            )
        if isinstance(other, DataFrame):
            # Joining the empty DataFrames with either index or columns is
            # fast. It gives us proper error checking for the edge cases that
            # would otherwise require a lot more logic.
            new_columns = (
                pandas.DataFrame(columns=self.columns)
                .join(
                    pandas.DataFrame(columns=other.columns),
                    lsuffix=lsuffix,
                    rsuffix=rsuffix,
                )
                .columns
            )
            other = [other]
        else:
            new_columns = (
                pandas.DataFrame(columns=self.columns)
                .join(
                    [pandas.DataFrame(columns=obj.columns) for obj in other],
                    lsuffix=lsuffix,
                    rsuffix=rsuffix,
                )
                .columns
            )
        new_frame = self.__constructor__(
            query_compiler=self._query_compiler.concat(
                1, [obj._query_compiler for obj in other], join=how, sort=sort
            )
        )
        new_frame.columns = new_columns
        return new_frame

    def isetitem(self, loc, value) -> None:
        return self._default_to_pandas(
            pandas.DataFrame.isetitem,
            loc=loc,
            value=value,
        )

    def le(
        self, other, axis="columns", level=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get less than or equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).
        """
        return self._binary_op(
            "le", other, axis=axis, level=level, broadcast=isinstance(other, Series)
        )

    def lt(
        self, other, axis="columns", level=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get less than comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).
        """
        return self._binary_op(
            "lt", other, axis=axis, level=level, broadcast=isinstance(other, Series)
        )

    def melt(
        self,
        id_vars=None,
        value_vars=None,
        var_name=None,
        value_name="value",
        col_level=None,
        ignore_index=True,
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Unpivot a ``DataFrame`` from wide to long format, optionally leaving identifiers set.
        """
        if id_vars is None:
            id_vars = []
        if not is_list_like(id_vars):
            id_vars = [id_vars]
        if value_vars is None:
            value_vars = self.columns.drop(id_vars)
        if var_name is None:
            columns_name = self._query_compiler.get_index_name(axis=1)
            var_name = columns_name if columns_name is not None else "variable"
        return self.__constructor__(
            query_compiler=self._query_compiler.melt(
                id_vars=id_vars,
                value_vars=value_vars,
                var_name=var_name,
                value_name=value_name,
                col_level=col_level,
                ignore_index=ignore_index,
            )
        )

    def merge(
        self,
        right,
        how="inner",
        on=None,
        left_on=None,
        right_on=None,
        left_index=False,
        right_index=False,
        sort=False,
        suffixes=("_x", "_y"),
        copy=None,
        indicator=False,
        validate=None,
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Merge ``DataFrame`` or named ``Series`` objects with a database-style join.
        """
        if copy is None:
            copy = True
        if isinstance(right, Series):
            if right.name is None:
                raise ValueError("Cannot merge a Series without a name")
            else:
                right = right.to_frame()
        if not isinstance(right, DataFrame):
            raise TypeError(
                f"Can only merge Series or DataFrame objects, a {type(right)} was passed"
            )

        # If we are joining on the index and we are using
        # default parameters we can map this to a join
        if left_index and right_index and not indicator:
            return self.join(
                right, how=how, lsuffix=suffixes[0], rsuffix=suffixes[1], sort=sort
            )

        return self.__constructor__(
            query_compiler=self._query_compiler.merge(
                right._query_compiler,
                how=how,
                on=on,
                left_on=left_on,
                right_on=right_on,
                left_index=left_index,
                right_index=right_index,
                sort=sort,
                suffixes=suffixes,
                copy=copy,
                indicator=indicator,
                validate=validate,
            )
        )

    def mod(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get modulo of ``DataFrame`` and `other`, element-wise (binary operator `mod`).
        """
        return self._binary_op(
            "mod",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def mul(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get multiplication of ``DataFrame`` and `other`, element-wise (binary operator `mul`).
        """
        return self._binary_op(
            "mul",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    multiply = mul

    def rmul(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get multiplication of ``DataFrame`` and `other`, element-wise (binary operator `mul`).
        """
        return self._binary_op(
            "rmul",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def ne(
        self, other, axis="columns", level=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get not equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `ne`).
        """
        return self._binary_op(
            "ne", other, axis=axis, level=level, broadcast=isinstance(other, Series)
        )

    def nlargest(self, n, columns, keep="first") -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Return the first `n` rows ordered by `columns` in descending order.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.nlargest(n, columns, keep)
        )

    def nsmallest(
        self, n, columns, keep="first"
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Return the first `n` rows ordered by `columns` in ascending order.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.nsmallest(
                n=n, columns=columns, keep=keep
            )
        )

    def unstack(
        self, level=-1, fill_value=None, sort=True
    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Pivot a level of the (necessarily hierarchical) index labels.
        """
        if not sort:
            # TODO: it should be easy to add support for sort == False
            return self._default_to_pandas(
                pandas.DataFrame.unstack, level=level, fill_value=fill_value, sort=sort
            )

        # This ensures that non-pandas MultiIndex objects are caught.
        is_multiindex = len(self.index.names) > 1
        if not is_multiindex or (
            is_multiindex and is_list_like(level) and len(level) == self.index.nlevels
        ):
            return self._reduce_dimension(
                query_compiler=self._query_compiler.unstack(level, fill_value)
            )
        else:
            return self.__constructor__(
                query_compiler=self._query_compiler.unstack(level, fill_value)
            )

    def pivot(
        self, *, columns, index=lib.no_default, values=lib.no_default
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Return reshaped ``DataFrame`` organized by given index / column values.
        """
        if index is lib.no_default:
            index = None
        if values is lib.no_default:
            values = None

        # if values is not specified, it should be the remaining columns not in
        # index or columns
        if values is None:
            values = list(self.columns)
            if index is not None:
                values = [v for v in values if v not in index]
            if columns is not None:
                values = [v for v in values if v not in columns]

        return self.__constructor__(
            query_compiler=self._query_compiler.pivot(
                index=index, columns=columns, values=values
            )
        )

    def pivot_table(
        self,
        values=None,
        index=None,
        columns=None,
        aggfunc="mean",
        fill_value=None,
        margins=False,
        dropna=True,
        margins_name="All",
        observed=lib.no_default,
        sort=True,
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Create a spreadsheet-style pivot table as a ``DataFrame``.
        """
        # Convert callable to a string aggregation name if possible
        if hashable(aggfunc):
            aggfunc = get_cython_func(aggfunc) or aggfunc

        result = self.__constructor__(
            query_compiler=self._query_compiler.pivot_table(
                index=index,
                values=values,
                columns=columns,
                aggfunc=aggfunc,
                fill_value=fill_value,
                margins=margins,
                dropna=dropna,
                margins_name=margins_name,
                observed=observed,
                sort=sort,
            )
        )
        return result

    @property
    def plot(
        self,
        x=None,
        y=None,
        kind="line",
        ax=None,
        subplots=False,
        sharex=None,
        sharey=False,
        layout=None,
        figsize=None,
        use_index=True,
        title=None,
        grid=None,
        legend=True,
        style=None,
        logx=False,
        logy=False,
        loglog=False,
        xticks=None,
        yticks=None,
        xlim=None,
        ylim=None,
        rot=None,
        fontsize=None,
        colormap=None,
        table=False,
        yerr=None,
        xerr=None,
        secondary_y=False,
        sort_columns=False,
        **kwargs,
    ):  # noqa: PR01, RT01, D200
        """
        Make plots of ``DataFrame``.
        """
        return self._to_pandas().plot

    def pow(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get exponential power of ``DataFrame`` and `other`, element-wise (binary operator `pow`).
        """
        if isinstance(other, Series):
            return self._default_to_pandas(
                "pow", other, axis=axis, level=level, fill_value=fill_value
            )
        return self._binary_op(
            "pow",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def prod(
        self,
        axis=0,
        skipna=True,
        numeric_only=False,
        min_count=0,
        **kwargs,
    ):  # noqa: PR01, RT01, D200
        """
        Return the product of the values over the requested axis.
        """
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        axis = self._get_axis_number(axis)

        axis_to_apply = self.columns if axis else self.index
        if (
            skipna is not False
            and numeric_only is False
            and min_count > len(axis_to_apply)
            # This fast path is only suitable for the default backend
            and self._query_compiler.get_pandas_backend() is None
        ):
            new_index = self.columns if not axis else self.index
            # >>> pd.DataFrame([1,2,3,4], dtype="int64[pyarrow]").prod(min_count=10)
            # 0    <NA>
            # dtype: int64[pyarrow]
            return Series(
                [np.nan] * len(new_index),
                index=new_index,
                dtype=pandas.api.types.pandas_dtype("float64"),
            )

        data = self._validate_dtypes_prod_mean(axis, numeric_only, ignore_axis=True)
        if min_count > 1:
            return data._reduce_dimension(
                data._query_compiler.prod_min_count(
                    axis=axis,
                    skipna=skipna,
                    numeric_only=numeric_only,
                    min_count=min_count,
                    **kwargs,
                )
            )
        return data._reduce_dimension(
            data._query_compiler.prod(
                axis=axis,
                skipna=skipna,
                numeric_only=numeric_only,
                min_count=min_count,
                **kwargs,
            )
        )

    product = prod

    def quantile(
        self,
        q=0.5,
        axis=0,
        numeric_only=False,
        interpolation="linear",
        method="single",
    ) -> Union[DataFrame, Series]:
        return super(DataFrame, self).quantile(
            q=q,
            axis=axis,
            numeric_only=numeric_only,
            interpolation=interpolation,
            method=method,
        )

    # methods and fields we need to use pandas.DataFrame.query
    _AXIS_ORDERS = ["index", "columns"]
    _get_index_resolvers = pandas.DataFrame._get_index_resolvers

    def _get_axis_resolvers(self, axis: str) -> dict:  # noqa: GL08
        # forked from pandas because we only want to update the index if there's more
        # than one level of the index.
        # index or columns
        axis_index = getattr(self, axis)
        d = {}
        prefix = axis[0]

        for i, name in enumerate(axis_index.names):
            if name is not None:
                key = level = name
            else:
                # prefix with 'i' or 'c' depending on the input axis
                # e.g., you must do ilevel_0 for the 0th level of an unnamed
                # multiiindex
                key = f"{prefix}level_{i}"
                level = i

            level_values = axis_index.get_level_values(level)
            s = level_values.to_series()
            if axis_index.nlevels > 1:
                s.index = axis_index
            d[key] = s

        # put the index/columns itself in the dict
        if axis_index.nlevels > 2:
            dindex = axis_index
        else:
            dindex = axis_index.to_series()

        d[axis] = dindex
        return d

    def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:  # noqa: RT01
        """
        Return the special character free column resolvers of a dataframe.

        Column names with special characters are 'cleaned up' so that they can
        be referred to by backtick quoting.
        Used in `DataFrame.eval`.

        Notes
        -----
        Copied from pandas.
        """
        from modin.core.computation.parsing import clean_column_name

        return {
            clean_column_name(k): v for k, v in self.items() if not isinstance(k, int)
        }

    def query(
        self, expr, inplace=False, **kwargs
    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200
        """
        Query the columns of a ``DataFrame`` with a boolean expression.
        """
        self._update_var_dicts_in_kwargs(expr, kwargs)
        self._validate_eval_query(expr, **kwargs)
        inplace = validate_bool_kwarg(inplace, "inplace")
        if not isinstance(expr, str):
            msg = f"expr must be a string to be evaluated, {type(expr)} given"
            raise ValueError(msg)
        # HACK: this condition kind of breaks the idea of backend agnostic API as all queries
        # _should_ work fine for all of the engines using `pandas.DataFrame.query(...)` approach.
        # However, at this point we know that we can execute simple queries way more efficiently
        # using the QC's API directly in case of pandas backend. Ideally, we have to make it work
        # with the 'pandas.query' approach the same as good the direct QC call is. But investigating
        # and fixing the root cause of the perf difference appears to be much more complicated
        # than putting this hack here. Hopefully, we'll get rid of it soon:
        # https://github.com/modin-project/modin/issues/6499
        try:
            new_query_compiler = self._query_compiler.rowwise_query(expr, **kwargs)
        except NotImplementedError:
            # a non row-wise query was passed, falling back to the
            # implementation forked from pandas.DataFrame.query. This
            # implementation will effectively evaluate the condition at the
            # modin.pandas API level, so that e.g. we interpret
            # df.query("col > 0") as df.loc[df.col > 0]
            kwargs["target"] = None
            res = self.eval(expr, **kwargs)

            try:
                result = self.loc[res]
            except ValueError:
                # when res is multi-dimensional loc raises, but this is
                # sometimes a valid query.
                result = self[res]

            new_query_compiler = result._query_compiler
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def rename(
        self,
        mapper=None,
        index=None,
        columns=None,
        axis=None,
        copy=None,
        inplace=False,
        level=None,
        errors="ignore",
    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200
        """
        Alter axes labels.
        """
        inplace = validate_bool_kwarg(inplace, "inplace")
        if mapper is None and index is None and columns is None:
            raise TypeError("must pass an index to rename")
        # We have to do this with the args because of how rename handles kwargs. It
        # doesn't ignore None values passed in, so we have to filter them ourselves.
        args = locals()
        kwargs = {k: v for k, v in args.items() if v is not None and k != "self"}
        # inplace should always be true because this is just a copy, and we will use the
        # results after.
        kwargs["inplace"] = False
        axis = self._get_axis_number(axis)
        if index is not None or (mapper is not None and axis == 0):
            new_index = pandas.DataFrame(index=self.index).rename(**kwargs).index
        else:
            new_index = None
        if columns is not None or (mapper is not None and axis == 1):
            new_columns = (
                pandas.DataFrame(columns=self.columns).rename(**kwargs).columns
            )
        else:
            new_columns = None

        if inplace:
            obj = self
        else:
            obj = self.copy()
        if new_index is not None:
            obj.index = new_index
        if new_columns is not None:
            obj.columns = new_columns

        if not inplace:
            return obj

    def reindex(
        self,
        labels=None,
        *,
        index=None,
        columns=None,
        axis=None,
        method=None,
        copy=None,
        level=None,
        fill_value=np.nan,
        limit=None,
        tolerance=None,
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        axis = self._get_axis_number(axis)
        if axis == 0 and labels is not None:
            index = labels
        elif labels is not None:
            columns = labels
        return super(DataFrame, self).reindex(
            index=index,
            columns=columns,
            method=method,
            copy=copy,
            level=level,
            fill_value=fill_value,
            limit=limit,
            tolerance=tolerance,
        )

    def replace(
        self,
        to_replace=None,
        value=lib.no_default,
        *,
        inplace: bool = False,
        limit=None,
        regex: bool = False,
        method: str | lib.NoDefault = lib.no_default,
    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200
        """
        Replace values given in `to_replace` with `value`.
        """
        inplace = validate_bool_kwarg(inplace, "inplace")
        new_query_compiler = self._query_compiler.replace(
            to_replace=to_replace,
            value=value,
            inplace=False,
            limit=limit,
            regex=regex,
            method=method,
        )
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def rfloordiv(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get integer division of ``DataFrame`` and `other`, element-wise (binary operator `rfloordiv`).
        """
        return self._binary_op(
            "rfloordiv",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def radd(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get addition of ``DataFrame`` and `other`, element-wise (binary operator `radd`).
        """
        return self._binary_op(
            "radd",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def rmod(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get modulo of ``DataFrame`` and `other`, element-wise (binary operator `rmod`).
        """
        return self._binary_op(
            "rmod",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def rpow(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get exponential power of ``DataFrame`` and `other`, element-wise (binary operator `rpow`).
        """
        if isinstance(other, Series):
            return self._default_to_pandas(
                "rpow", other, axis=axis, level=level, fill_value=fill_value
            )
        return self._binary_op(
            "rpow",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def rsub(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get subtraction of ``DataFrame`` and `other`, element-wise (binary operator `rsub`).
        """
        return self._binary_op(
            "rsub",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    def rtruediv(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `rtruediv`).
        """
        return self._binary_op(
            "rtruediv",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    rdiv = rtruediv

    def select_dtypes(
        self, include=None, exclude=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Return a subset of the ``DataFrame``'s columns based on the column dtypes.
        """
        # Validates arguments for whether both include and exclude are None or
        # if they are disjoint. Also invalidates string dtypes.
        pandas.DataFrame().select_dtypes(include, exclude)

        if include and not is_list_like(include):
            include = [include]
        elif include is None:
            include = []
        if exclude and not is_list_like(exclude):
            exclude = [exclude]
        elif exclude is None:
            exclude = []

        sel = tuple(map(set, (include, exclude)))
        include, exclude = map(lambda x: set(map(infer_dtype_from_object, x)), sel)
        include_these = pandas.Series(not bool(include), index=self.columns)
        exclude_these = pandas.Series(not bool(exclude), index=self.columns)

        def is_dtype_instance_mapper(column, dtype):
            return column, functools.partial(issubclass, dtype.type)

        for column, f in itertools.starmap(
            is_dtype_instance_mapper, self.dtypes.items()
        ):
            if include:  # checks for the case of empty include or exclude
                include_these[column] = any(map(f, include))
            if exclude:
                exclude_these[column] = not any(map(f, exclude))

        dtype_indexer = include_these & exclude_these
        indicate = [
            i for i in range(len(dtype_indexer.values)) if not dtype_indexer.values[i]
        ]
        return self.drop(columns=self.columns[indicate], inplace=False)

    def set_index(
        self, keys, *, drop=True, append=False, inplace=False, verify_integrity=False
    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200
        """
        Set the ``DataFrame`` index using existing columns.
        """
        inplace = validate_bool_kwarg(inplace, "inplace")
        if not isinstance(keys, list):
            keys = [keys]

        if any(
            isinstance(col, (pandas.Index, Series, np.ndarray, list, Iterator))
            for col in keys
        ):
            if inplace:
                frame = self
            else:
                frame = self.copy()
            if drop:
                keys = [k if is_list_like(k) else frame.pop(k) for k in keys]
            keys = try_cast_to_pandas(keys)
            # These are single-threaded objects, so we might as well let pandas do the
            # calculation so that it matches.
            frame.index = (
                pandas.DataFrame(index=self.index)
                .set_index(keys, append=append, verify_integrity=verify_integrity)
                .index
            )
            if not inplace:
                return frame
            else:
                return

        missing = []
        for col in keys:
            # everything else gets tried as a key;
            # see https://github.com/pandas-dev/pandas/issues/24969
            try:
                found = col in self.columns
            except TypeError as err:
                raise TypeError(
                    'The parameter "keys" may be a column key, one-dimensional '
                    + "array, or a list containing only valid column keys and "
                    + f"one-dimensional arrays. Received column of type {type(col)}"
                ) from err
            else:
                if not found:
                    missing.append(col)
        # If the missing column is a "primitive", return the errors.
        # Otherwise we let the query compiler figure out what to do with
        # the keys
        if missing and not hasattr(missing[0], "__dict__"):
            # The keys are a primitive type
            raise KeyError(f"None of {missing} are in the columns")

        new_query_compiler = self._query_compiler.set_index_from_columns(
            keys, drop=drop, append=append
        )

        if verify_integrity and not new_query_compiler.index.is_unique:
            duplicates = new_query_compiler.index[
                new_query_compiler.index.duplicated()
            ].unique()
            raise ValueError(f"Index has duplicate keys: {duplicates}")

        return self._create_or_update_from_compiler(new_query_compiler, inplace=inplace)

    sparse = CachedAccessor("sparse", SparseFrameAccessor)

    def squeeze(
        self, axis=None
    ) -> Union[DataFrame, Series, Scalar]:  # noqa: PR01, RT01, D200
        """
        Squeeze 1 dimensional axis objects into scalars.
        """
        axis = self._get_axis_number(axis) if axis is not None else None
        if axis is None and (
            self._query_compiler.get_axis_len(1) == 1 or len(self) == 1
        ):
            return Series(query_compiler=self._query_compiler).squeeze()
        if axis == 1 and self._query_compiler.get_axis_len(1) == 1:
            self._query_compiler._shape_hint = "column"
            return Series(query_compiler=self._query_compiler)
        if axis == 0 and len(self) == 1:
            qc = self.T._query_compiler
            qc._shape_hint = "column"
            return Series(query_compiler=qc)
        else:
            return self.copy()

    def stack(
        self, level=-1, dropna=lib.no_default, sort=lib.no_default, future_stack=False
    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Stack the prescribed level(s) from columns to index.
        """
        if future_stack:
            return self._default_to_pandas(
                pandas.DataFrame.stack,
                level=level,
                dropna=dropna,
                sort=sort,
                future_stack=future_stack,
            )

        # FutureWarnings only needed if future_stack == True
        if dropna is lib.no_default:
            dropna = True
        if sort is lib.no_default:
            sort = True

        # This ensures that non-pandas MultiIndex objects are caught.
        is_multiindex = len(self.columns.names) > 1
        if not is_multiindex or (
            is_multiindex and is_list_like(level) and len(level) == self.columns.nlevels
        ):
            return self._reduce_dimension(
                query_compiler=self._query_compiler.stack(level, dropna, sort)
            )
        else:
            return self.__constructor__(
                query_compiler=self._query_compiler.stack(level, dropna, sort)
            )

    def sub(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get subtraction of ``DataFrame`` and `other`, element-wise (binary operator `sub`).
        """
        return self._binary_op(
            "sub",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    subtract = sub

    def sum(
        self,
        axis=0,
        skipna=True,
        numeric_only=False,
        min_count=0,
        **kwargs,
    ) -> Series:  # noqa: PR01, RT01, D200
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        """
        Return the sum of the values over the requested axis.
        """
        axis = self._get_axis_number(axis)
        axis_to_apply = self.columns if axis else self.index
        if (
            skipna is not False
            and numeric_only is False
            and min_count > len(axis_to_apply)
            # This fast path is only suitable for the default backend
            and self._query_compiler.get_pandas_backend() is None
        ):
            new_index = self.columns if not axis else self.index
            return Series(
                [np.nan] * len(new_index),
                index=new_index,
                dtype=pandas.api.types.pandas_dtype("float64"),
            )

        # We cannot add datetime types, so if we are summing a column with
        # dtype datetime64 and cannot ignore non-numeric types, we must throw a
        # TypeError.
        if numeric_only is False and any(
            dtype == pandas.api.types.pandas_dtype("datetime64[ns]")
            for dtype in self.dtypes
        ):
            raise TypeError(
                "'DatetimeArray' with dtype datetime64[ns] does not support reduction 'sum'"
            )

        data = self._get_numeric_data(axis) if numeric_only else self

        if min_count > 1:
            return data._reduce_dimension(
                data._query_compiler.sum_min_count(
                    axis=axis,
                    skipna=skipna,
                    numeric_only=numeric_only,
                    min_count=min_count,
                    **kwargs,
                )
            )
        return data._reduce_dimension(
            data._query_compiler.sum(
                axis=axis,
                skipna=skipna,
                numeric_only=numeric_only,
                min_count=min_count,
                **kwargs,
            )
        )

    @expanduser_path_arg("path")
    def to_feather(
        self, path, **kwargs
    ) -> None:  # pragma: no cover # noqa: PR01, RT01, D200
        """
        Write a ``DataFrame`` to the binary Feather format.
        """
        return self._default_to_pandas(pandas.DataFrame.to_feather, path, **kwargs)

    def to_gbq(
        self,
        destination_table,
        project_id=None,
        chunksize=None,
        reauth=False,
        if_exists="fail",
        auth_local_webserver=True,
        table_schema=None,
        location=None,
        progress_bar=True,
        credentials=None,
    ) -> None:  # pragma: no cover # noqa: PR01, RT01, D200
        """
        Write a ``DataFrame`` to a Google BigQuery table.
        """
        return self._default_to_pandas(
            pandas.DataFrame.to_gbq,
            destination_table,
            project_id=project_id,
            chunksize=chunksize,
            reauth=reauth,
            if_exists=if_exists,
            auth_local_webserver=auth_local_webserver,
            table_schema=table_schema,
            location=location,
            progress_bar=progress_bar,
            credentials=credentials,
        )

    @expanduser_path_arg("path")
    def to_orc(
        self, path=None, *, engine="pyarrow", index=None, engine_kwargs=None
    ) -> Union[bytes, None]:
        return self._default_to_pandas(
            pandas.DataFrame.to_orc,
            path=path,
            engine=engine,
            index=index,
            engine_kwargs=engine_kwargs,
        )

    @expanduser_path_arg("buf")
    def to_html(
        self,
        buf=None,
        columns=None,
        col_space=None,
        header=True,
        index=True,
        na_rep="NaN",
        formatters=None,
        float_format=None,
        sparsify=None,
        index_names=True,
        justify=None,
        max_rows=None,
        max_cols=None,
        show_dimensions=False,
        decimal=".",
        bold_rows=True,
        classes=None,
        escape=True,
        notebook=False,
        border=None,
        table_id=None,
        render_links=False,
        encoding=None,
    ) -> Union[str, None]:  # noqa: PR01, RT01, D200
        """
        Render a ``DataFrame`` as an HTML table.
        """
        return self._default_to_pandas(
            pandas.DataFrame.to_html,
            buf=buf,
            columns=columns,
            col_space=col_space,
            header=header,
            index=index,
            na_rep=na_rep,
            formatters=formatters,
            float_format=float_format,
            sparsify=sparsify,
            index_names=index_names,
            justify=justify,
            max_rows=max_rows,
            max_cols=max_cols,
            show_dimensions=show_dimensions,
            decimal=decimal,
            bold_rows=bold_rows,
            classes=classes,
            escape=escape,
            notebook=notebook,
            border=border,
            table_id=table_id,
            render_links=render_links,
            encoding=None,
        )

    @expanduser_path_arg("path")
    def to_parquet(
        self,
        path=None,
        engine="auto",
        compression="snappy",
        index=None,
        partition_cols=None,
        storage_options: StorageOptions = None,
        **kwargs,
    ) -> Union[bytes, None]:
        from modin.core.execution.dispatching.factories.dispatcher import (
            FactoryDispatcher,
        )

        return FactoryDispatcher.to_parquet(
            self._query_compiler,
            path=path,
            engine=engine,
            compression=compression,
            index=index,
            partition_cols=partition_cols,
            storage_options=storage_options,
            **kwargs,
        )

    def to_period(
        self, freq=None, axis=0, copy=None
    ) -> DataFrame:  # pragma: no cover # noqa: PR01, RT01, D200
        """
        Convert ``DataFrame`` from ``DatetimeIndex`` to ``PeriodIndex``.
        """
        return super(DataFrame, self).to_period(freq=freq, axis=axis, copy=copy)

    def to_records(
        self, index=True, column_dtypes=None, index_dtypes=None
    ) -> np.rec.recarray:  # noqa: PR01, RT01, D200
        """
        Convert ``DataFrame`` to a NumPy record array.
        """
        return self._default_to_pandas(
            pandas.DataFrame.to_records,
            index=index,
            column_dtypes=column_dtypes,
            index_dtypes=index_dtypes,
        )

    @expanduser_path_arg("path")
    def to_stata(
        self,
        path: FilePath | WriteBuffer[bytes],
        *,
        convert_dates: dict[Hashable, str] | None = None,
        write_index: bool = True,
        byteorder: str | None = None,
        time_stamp: datetime.datetime | None = None,
        data_label: str | None = None,
        variable_labels: dict[Hashable, str] | None = None,
        version: int | None = 114,
        convert_strl: Sequence[Hashable] | None = None,
        compression: CompressionOptions = "infer",
        storage_options: StorageOptions = None,
        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
    ) -> None:
        return self._default_to_pandas(
            pandas.DataFrame.to_stata,
            path,
            convert_dates=convert_dates,
            write_index=write_index,
            byteorder=byteorder,
            time_stamp=time_stamp,
            data_label=data_label,
            variable_labels=variable_labels,
            version=version,
            convert_strl=convert_strl,
            compression=compression,
            storage_options=storage_options,
            value_labels=value_labels,
        )

    @expanduser_path_arg("path_or_buffer")
    def to_xml(
        self,
        path_or_buffer=None,
        index=True,
        root_name="data",
        row_name="row",
        na_rep=None,
        attr_cols=None,
        elem_cols=None,
        namespaces=None,
        prefix=None,
        encoding="utf-8",
        xml_declaration=True,
        pretty_print=True,
        parser="lxml",
        stylesheet=None,
        compression="infer",
        storage_options=None,
    ) -> Union[str, None]:
        from modin.core.execution.dispatching.factories.dispatcher import (
            FactoryDispatcher,
        )

        return FactoryDispatcher.to_xml(
            self._query_compiler,
            path_or_buffer=path_or_buffer,
            index=index,
            root_name=root_name,
            row_name=row_name,
            na_rep=na_rep,
            attr_cols=attr_cols,
            elem_cols=elem_cols,
            namespaces=namespaces,
            prefix=prefix,
            encoding=encoding,
            xml_declaration=xml_declaration,
            pretty_print=pretty_print,
            parser=parser,
            stylesheet=stylesheet,
            compression=compression,
            storage_options=storage_options,
        )

    def to_timestamp(
        self, freq=None, how="start", axis=0, copy=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Cast to DatetimeIndex of timestamps, at *beginning* of period.
        """
        return super(DataFrame, self).to_timestamp(
            freq=freq, how=how, axis=axis, copy=copy
        )

    def truediv(
        self, other, axis="columns", level=None, fill_value=None
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `truediv`).
        """
        return self._binary_op(
            "truediv",
            other,
            axis=axis,
            level=level,
            fill_value=fill_value,
            broadcast=isinstance(other, Series),
        )

    div = divide = truediv

    def update(
        self, other, join="left", overwrite=True, filter_func=None, errors="ignore"
    ) -> None:  # noqa: PR01, RT01, D200
        """
        Modify in place using non-NA values from another ``DataFrame``.
        """
        if not isinstance(other, DataFrame):
            other = self.__constructor__(other)
        query_compiler = self._query_compiler.df_update(
            other._query_compiler,
            join=join,
            overwrite=overwrite,
            filter_func=filter_func,
            errors=errors,
        )
        self._update_inplace(new_query_compiler=query_compiler)

    def where(
        self,
        cond,
        other=np.nan,
        *,
        inplace=False,
        axis=None,
        level=None,
    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200
        """
        Replace values where the condition is False.
        """
        inplace = validate_bool_kwarg(inplace, "inplace")
        if isinstance(other, Series) and axis is None:
            raise ValueError("Must specify axis=0 or 1")
        if level is not None:
            if isinstance(other, DataFrame):
                other = other._query_compiler.to_pandas()
            if isinstance(cond, DataFrame):
                cond = cond._query_compiler.to_pandas()
            new_query_compiler = self._default_to_pandas(
                pandas.DataFrame.where,
                cond,
                other=other,
                inplace=False,
                axis=axis,
                level=level,
            )
            return self._create_or_update_from_compiler(new_query_compiler, inplace)
        cond = cond(self) if callable(cond) else cond

        if not isinstance(cond, DataFrame):
            if not hasattr(cond, "shape"):
                cond = np.asanyarray(cond)
            if cond.shape != self.shape:
                raise ValueError("Array conditional must be same shape as self")
            cond = self.__constructor__(cond, index=self.index, columns=self.columns)
        if isinstance(other, DataFrame):
            other = other._query_compiler
        else:
            """
            Only infer the axis number when ``other`` will be made into a
            series. When ``other`` is a dataframe, axis=None has a meaning
            distinct from 0 and 1, e.g. at pandas 1.4.3:

            import pandas as pd
            df = pd.DataFrame([[1,2], [3, 4]], index=[1, 0])
            cond = pd.DataFrame([[True,False], [False, True]], columns=[1, 0])
            other = pd.DataFrame([[5,6], [7,8]], columns=[1, 0])

            print(df.where(cond, other, axis=None))
            0  1
            1  1  7
            0  6  4

            print(df.where(cond, other, axis=0))

            0  1
            1  1  8
            0  5  4

            print(df.where(cond, other, axis=1))

            0  1
            1  1  5
            0  8  4
            """
            # _get_axis_number interprets lib.no_default as None, but where doesn't
            # accept lib.no_default.
            if axis == lib.no_default:
                raise ValueError(
                    "No axis named NoDefault.no_default for object type DataFrame"
                )
            axis = self._get_axis_number(axis)
            if isinstance(other, Series):
                other = other.reindex(
                    self.index if axis == 0 else self.columns
                )._query_compiler
                if other._shape_hint is None:
                    # To make the query compiler recognizable as a Series at lower levels
                    other._shape_hint = "column"
            elif is_list_like(other):
                index = self.index if axis == 0 else self.columns
                other = pandas.Series(other, index=index)
        query_compiler = self._query_compiler.where(
            cond._query_compiler, other, axis=axis, level=level
        )
        return self._create_or_update_from_compiler(query_compiler, inplace)

    def _getitem_column(self, key) -> Series:
        """
        Get column specified by `key`.

        Parameters
        ----------
        key : hashable
            Key that points to column to retrieve.

        Returns
        -------
        Series
            Selected column.
        """
        if key not in self.keys():
            raise KeyError("{}".format(key))
        s = self.__constructor__(
            query_compiler=self._query_compiler.getitem_column_array([key])
        ).squeeze(axis=1)
        if isinstance(s, Series):
            s._parent = self
            s._parent_axis = 1
        return s

    @disable_logging
    def __getattribute__(self, item: str) -> Any:
        """
        Return attribute from the `BasePandasDataset`.

        Parameters
        ----------
        item : str
            Item to get.

        Returns
        -------
        Any
        """
        # NOTE that to get an attribute, python calls __getattribute__() first and
        # then falls back to __getattr__() if the former raises an AttributeError.

        if item not in EXTENSION_NO_LOOKUP:
            extensions_result = self._getattribute__from_extension_impl(
                item, __class__._extensions
            )
            if extensions_result is not sentinel:
                return extensions_result
        return super().__getattribute__(item)

    @disable_logging
    def __getattr__(self, key) -> Any:
        """
        Return item identified by `key`.

        Parameters
        ----------
        key : hashable
            Key to get.

        Returns
        -------
        Any

        Notes
        -----
        First try to use `__getattribute__` method. If it fails
        try to get `key` from ``DataFrame`` fields.
        """
        # NOTE that to get an attribute, python calls __getattribute__() first and
        # then falls back to __getattr__() if the former raises an AttributeError.
        if key not in _ATTRS_NO_LOOKUP and key in self.columns:
            return self[key]
        raise AttributeError(f"'DataFrame' object has no attribute '{key}'")

    def __setattr__(self, key, value) -> None:
        """
        Set attribute `value` identified by `key`.

        Parameters
        ----------
        key : hashable
            Key to set.
        value : Any
            Value to set.

        Returns
        -------
        None
        """
        # While we let users assign to a column labeled "x" with "df.x" , there
        # are some attributes that we should assume are NOT column names and
        # therefore should follow the default Python object assignment
        # behavior. These are:
        # - anything in self.__dict__. This includes any attributes that the
        #   user has added to the dataframe with,  e.g., `df.c = 3`, and
        #   any attribute that Modin has added to the frame, e.g.
        #   `_query_compiler` and `_siblings`
        # - `_query_compiler`, which Modin initializes before it appears in
        #   __dict__
        # - `_siblings`, which Modin initializes before it appears in __dict__
        #   before it appears in __dict__.
        if key in ("_query_compiler", "_siblings") or key in self.__dict__:
            pass
        elif self._get_extension(key, __class__._extensions) is not sentinel:
            return self._get_extension(key, __class__._extensions).__set__(self, value)
        # we have to check for the key in `dir(self)` first in order not to trigger columns computation
        elif key not in dir(self) and key in self:
            self.__setitem__(key, value)
            # Note: return immediately so we don't keep this `key` as dataframe state.
            # `__getattr__` will return the columns not present in `dir(self)`, so we do not need
            # to manually track this state in the `dir`.
            return
        elif is_list_like(value) and key not in ["index", "columns"]:
            warnings.warn(
                SET_DATAFRAME_ATTRIBUTE_WARNING,
                UserWarning,
            )
        super().__setattr__(key, value)

    def __setitem__(self, key, value) -> None:
        """
        Set attribute `value` identified by `key`.

        Parameters
        ----------
        key : Any
            Key to set.
        value : Any
            Value to set.

        Returns
        -------
        None
        """
        if isinstance(key, slice):
            return self._setitem_slice(key, value)

        if hashable(key) and key not in self.columns:
            if isinstance(value, Series) and self._query_compiler.get_axis_len(1) == 0:
                # Note: column information is lost when assigning a query compiler
                prev_index = self.columns
                self._query_compiler = value._query_compiler.copy()
                # Now that the data is appended, we need to update the column name for
                # that column to `key`, otherwise the name could be incorrect.
                self.columns = prev_index.insert(0, key)
                return
            # Do new column assignment after error checks and possible value modifications
            self.insert(
                loc=self._query_compiler.get_axis_len(1), column=key, value=value
            )
            return

        if not hashable(key):
            if isinstance(key, DataFrame) or isinstance(key, np.ndarray):
                if isinstance(key, np.ndarray):
                    if key.shape != self.shape:
                        raise ValueError("Array must be same shape as DataFrame")
                    key = self.__constructor__(key, columns=self.columns)
                return self.mask(key, value, inplace=True)

            if isinstance(key, (list, pandas.Index)) and all(
                (x in self.columns for x in key)
            ):
                if is_list_like(value):
                    if not (hasattr(value, "shape") and hasattr(value, "ndim")):
                        value = np.array(value)
                    if len(key) != value.shape[-1]:
                        raise ValueError("Columns must be same length as key")
                if isinstance(value, type(self)):
                    # importing here to avoid circular import
                    from .general import concat

                    if not value.columns.equals(pandas.Index(key)):
                        # we only need to change the labels, so shallow copy here
                        value = value.copy(deep=False)
                        value.columns = key

                    # here we iterate over every column in the 'self' frame, then check if it's in the 'key'
                    # and so has to be taken from either from the 'value' or from the 'self'. After that,
                    # we concatenate those mixed column chunks and get a dataframe with updated columns
                    to_concat = []
                    # columns to take for this chunk
                    to_take = []
                    # whether columns in this chunk are in the 'key' and has to be taken from the 'value'
                    get_cols_from_value = False
                    # an object to take columns from for this chunk
                    src_obj = self
                    for col in self.columns:
                        if (col in key) != get_cols_from_value:
                            if len(to_take):
                                to_concat.append(src_obj[to_take])
                            to_take = [col]
                            get_cols_from_value = not get_cols_from_value
                            src_obj = value if get_cols_from_value else self
                        else:
                            to_take.append(col)
                    if len(to_take):
                        to_concat.append(src_obj[to_take])

                    new_qc = concat(to_concat, axis=1)._query_compiler
                else:
                    new_qc = self._query_compiler.write_items(
                        slice(None),
                        self.columns.get_indexer_for(key),
                        value,
                        need_columns_reindex=False,
                    )
                self._update_inplace(new_qc)
                # self.loc[:, key] = value
                return
            elif (
                isinstance(key, list)
                and isinstance(value, type(self))
                # Mixed case is more complicated, it's defaulting to pandas for now
                and all((x not in self.columns for x in key))
            ):
                if len(key) != len(value.columns):
                    raise ValueError("Columns must be same length as key")

                # Aligning the value's columns with the key
                if not np.array_equal(value.columns, key):
                    value = value.set_axis(key, axis=1)

                new_qc = self._query_compiler.insert_item(
                    axis=1,
                    loc=self._query_compiler.get_axis_len(1),
                    value=value._query_compiler,
                    how="left",
                )
                self._update_inplace(new_qc)
                return

            def setitem_unhashable_key(df, value):
                df[key] = value
                return df

            return self._update_inplace(
                self._default_to_pandas(setitem_unhashable_key, value)._query_compiler
            )
        if is_list_like(value):
            if isinstance(value, (pandas.DataFrame, DataFrame)):
                value = value[value.columns[0]].values
            elif isinstance(value, np.ndarray):
                assert (
                    len(value.shape) < 3
                ), "Shape of new values must be compatible with manager shape"
                value = value.T.reshape(-1)
                if len(self) > 0:
                    value = value[: len(self)]
            if not isinstance(value, (Series, Categorical, np.ndarray, list, range)):
                value = list(value)

        if isinstance(value, Series):
            value = value._query_compiler
        self._update_inplace(self._query_compiler.setitem(axis=0, key=key, value=value))

    def __iter__(self) -> Iterable[Hashable]:
        """
        Iterate over info axis.

        Returns
        -------
        iterable
            Iterator of the columns names.
        """
        return iter(self.columns)

    def __contains__(self, key) -> bool:
        """
        Check if `key` in the ``DataFrame.columns``.

        Parameters
        ----------
        key : hashable
            Key to check the presence in the columns.

        Returns
        -------
        bool
        """
        return self.columns.__contains__(key)

    def __round__(self, decimals=0) -> DataFrame:
        """
        Round each value in a ``DataFrame`` to the given number of decimals.

        Parameters
        ----------
        decimals : int, default: 0
            Number of decimal places to round to.

        Returns
        -------
        DataFrame
        """
        return self.round(decimals)

    def __delitem__(self, key) -> None:
        """
        Delete item identified by `key` label.

        Parameters
        ----------
        key : hashable
            Key to delete.
        """
        if key not in self:
            raise KeyError(key)
        self._update_inplace(new_query_compiler=self._query_compiler.delitem(key))

    @_doc_binary_op(
        operation="integer division and modulo",
        bin_op="divmod",
        returns="tuple of two DataFrames",
    )
    def __divmod__(self, right) -> tuple[DataFrame, DataFrame]:
        return self._default_to_pandas(pandas.DataFrame.__divmod__, right)

    @_doc_binary_op(
        operation="integer division and modulo",
        bin_op="divmod",
        right="left",
        returns="tuple of two DataFrames",
    )
    def __rdivmod__(self, left) -> tuple[DataFrame, DataFrame]:
        return self._default_to_pandas(pandas.DataFrame.__rdivmod__, left)

    __add__ = add
    __iadd__ = add  # pragma: no cover
    __radd__ = radd
    __mul__ = mul
    __imul__ = mul  # pragma: no cover
    __rmul__ = rmul
    __pow__ = pow
    __ipow__ = pow  # pragma: no cover
    __rpow__ = rpow
    __sub__ = sub
    __isub__ = sub  # pragma: no cover
    __rsub__ = rsub
    __floordiv__ = floordiv
    __ifloordiv__ = floordiv  # pragma: no cover
    __rfloordiv__ = rfloordiv
    __truediv__ = truediv
    __itruediv__ = truediv  # pragma: no cover
    __rtruediv__ = rtruediv
    __mod__ = mod
    __imod__ = mod  # pragma: no cover
    __rmod__ = rmod
    __rdiv__ = rdiv

    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
        """
        Get a Modin DataFrame that implements the dataframe exchange protocol.

        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.

        Parameters
        ----------
        nan_as_null : bool, default: False
            A keyword intended for the consumer to tell the producer
            to overwrite null values in the data with ``NaN`` (or ``NaT``).
            This currently has no effect; once support for nullable extension
            dtypes is added, this value should be propagated to columns.
        allow_copy : bool, default: True
            A keyword that defines whether or not the library is allowed
            to make a copy of the data. For example, copying data would be necessary
            if a library supports strided buffers, given that this protocol
            specifies contiguous buffers. Currently, if the flag is set to ``False``
            and a copy is needed, a ``RuntimeError`` will be raised.

        Returns
        -------
        ProtocolDataframe
            A dataframe object following the dataframe protocol specification.
        """
        return self._query_compiler.to_interchange_dataframe(
            nan_as_null=nan_as_null, allow_copy=allow_copy
        )

    def __dataframe_consortium_standard__(
        self, *, api_version: str | None = None
    ):  # noqa: PR01, RT01
        """
        Provide entry point to the Consortium DataFrame Standard API.

        This is developed and maintained outside of Modin.
        Please report any issues to https://github.com/data-apis/dataframe-api-compat.
        """
        dataframe_api_compat = import_optional_dependency(
            "dataframe_api_compat", "implementation"
        )
        convert_to_standard_compliant_dataframe = (
            dataframe_api_compat.modin_standard.convert_to_standard_compliant_dataframe
        )
        return convert_to_standard_compliant_dataframe(self, api_version=api_version)

    @property
    def attrs(self) -> dict:  # noqa: RT01, D200
        """
        Return dictionary of global attributes of this dataset.
        """

        def attrs(df):
            return df.attrs

        return self._default_to_pandas(attrs)

    @property
    def style(self):  # noqa: RT01, D200
        """
        Return a Styler object.
        """

        def style(df):
            """Define __name__ attr because properties do not have it."""
            return df.style

        return self._default_to_pandas(style)

    def reindex_like(
        self: DataFrame,
        other,
        method=None,
        copy: Optional[bool] = None,
        limit=None,
        tolerance=None,
    ) -> DataFrame:
        if copy is None:
            copy = True
        # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).":
        # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.DataFrame.reindex_like.html
        return self.reindex(
            index=other.index,
            columns=other.columns,
            method=method,
            copy=copy,
            limit=limit,
            tolerance=tolerance,
        )

    def _create_or_update_from_compiler(
        self, new_query_compiler, inplace=False
    ) -> Union[DataFrame, None]:
        """
        Return or update a ``DataFrame`` with given `new_query_compiler`.

        Parameters
        ----------
        new_query_compiler : PandasQueryCompiler
            QueryCompiler to use to manage the data.
        inplace : bool, default: False
            Whether or not to perform update or creation inplace.

        Returns
        -------
        DataFrame or None
            None if update was done, ``DataFrame`` otherwise.
        """
        assert isinstance(
            new_query_compiler, self._query_compiler.__class__.__bases__
        ), "Invalid Query Compiler object: {}".format(type(new_query_compiler))
        if not inplace:
            return self.__constructor__(query_compiler=new_query_compiler)
        else:
            self._update_inplace(new_query_compiler=new_query_compiler)

    def _get_numeric_data(self, axis: int) -> DataFrame:
        """
        Grab only numeric data from ``DataFrame``.

        Parameters
        ----------
        axis : {0, 1}
            Axis to inspect on having numeric types only.

        Returns
        -------
        DataFrame
            ``DataFrame`` with numeric data.
        """
        # Pandas ignores `numeric_only` if `axis` is 1, but we do have to drop
        # non-numeric columns if `axis` is 0.
        if axis != 0:
            return self
        return self.drop(
            columns=[
                i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])
            ]
        )

    def _validate_dtypes(self, numeric_only=False) -> None:
        """
        Check that all the dtypes are the same.

        Parameters
        ----------
        numeric_only : bool, default: False
            Whether or not to allow only numeric data.
            If True and non-numeric data is found, exception
            will be raised.
        """
        # Series.__getitem__ treating keys as positions is deprecated. In a future version,
        # integer keys will always be treated as labels (consistent with DataFrame behavior).
        # To access a value by position, use `ser.iloc[pos]`
        dtypes = self._query_compiler.get_dtypes_set()
        dtype = next(iter(dtypes))
        for t in dtypes:
            if numeric_only and not is_numeric_dtype(t):
                raise TypeError("{0} is not a numeric data type".format(t))
            elif not numeric_only and t != dtype:
                raise TypeError(
                    "Cannot compare type '{0}' with type '{1}'".format(t, dtype)
                )

    def _validate_dtypes_min_max(self, axis, numeric_only) -> DataFrame:
        """
        Validate data dtype for `min` and `max` methods.

        Parameters
        ----------
        axis : {0, 1}
            Axis to validate over.
        numeric_only : bool
            Whether or not to allow only numeric data.
            If True and non-numeric data is found, exception.

        Returns
        -------
        DataFrame
        """
        # If our DataFrame has both numeric and non-numeric dtypes then
        # comparisons between these types do not make sense and we must raise a
        # TypeError. We must check explicitly if
        # numeric_only is False because if it is None, it will default to True
        # if the operation fails with mixed dtypes.
        if (
            axis
            and numeric_only is False
            and not all([is_numeric_dtype(dtype) for dtype in self.dtypes])
        ):
            raise TypeError("Cannot compare Numeric and Non-Numeric Types")

        return self._get_numeric_data(axis) if numeric_only else self

    def _validate_dtypes_prod_mean(
        self, axis, numeric_only, ignore_axis=False
    ) -> DataFrame:
        """
        Validate data dtype for `prod` and `mean` methods.

        Parameters
        ----------
        axis : {0, 1}
            Axis to validate over.
        numeric_only : bool
            Whether or not to allow only numeric data.
            If True and non-numeric data is found, exception
            will be raised.
        ignore_axis : bool, default: False
            Whether or not to ignore `axis` parameter.

        Returns
        -------
        DataFrame
        """
        # If our DataFrame has both numeric and non-numeric dtypes then
        # operations between these types do not make sense and we must raise a
        # TypeError. We must check explicitly if
        # numeric_only is False because if it is None, it will default to True
        # if the operation fails with mixed dtypes.
        if (
            (axis or ignore_axis)
            and numeric_only is False
            and not all([is_numeric_dtype(dtype) for dtype in self.dtypes])
        ):
            raise TypeError("Cannot operate on Numeric and Non-Numeric Types")

        return self._get_numeric_data(axis) if numeric_only else self

    def _to_pandas(self) -> pandas.DataFrame:
        """
        Convert Modin ``DataFrame`` to pandas ``DataFrame``.

        Recommended conversion method: `dataframe.modin.to_pandas()`.

        Returns
        -------
        pandas.DataFrame
        """
        return self._query_compiler.to_pandas()

    def _validate_eval_query(self, expr, **kwargs) -> None:
        """
        Validate the arguments of ``eval`` and ``query`` functions.

        Parameters
        ----------
        expr : str
            The expression to evaluate. This string cannot contain any
            Python statements, only Python expressions.
        **kwargs : dict
            Optional arguments of ``eval`` and ``query`` functions.
        """
        if isinstance(expr, str) and expr == "":
            raise ValueError("expr cannot be an empty string")

        if isinstance(expr, str) and "not" in expr:
            if "parser" in kwargs and kwargs["parser"] == "python":
                ErrorMessage.not_implemented(
                    "'Not' nodes are not implemented."
                )  # pragma: no cover

    def _reduce_dimension(self, query_compiler: BaseQueryCompiler) -> Series:
        """
        Reduce the dimension of data from the `query_compiler`.

        Parameters
        ----------
        query_compiler : BaseQueryCompiler
            Query compiler to retrieve the data.

        Returns
        -------
        Series
        """
        return Series(query_compiler=query_compiler)

    def _set_axis_name(self, name, axis=0, inplace=False) -> Union[DataFrame, None]:
        """
        Alter the name or names of the axis.

        Parameters
        ----------
        name : str or list of str
            Name for the Index, or list of names for the MultiIndex.
        axis : str or int, default: 0
            The axis to set the label.
            0 or 'index' for the index, 1 or 'columns' for the columns.
        inplace : bool, default: False
            Whether to modify `self` directly or return a copy.

        Returns
        -------
        DataFrame or None
        """
        axis = self._get_axis_number(axis)
        renamed = self if inplace else self.copy()
        if axis == 0:
            renamed.index = renamed.index.set_names(name)
        else:
            renamed.columns = renamed.columns.set_names(name)
        if not inplace:
            return renamed

    def _to_datetime(self, **kwargs) -> Series:
        """
        Convert `self` to datetime.

        Parameters
        ----------
        **kwargs : dict
            Optional arguments to use during query compiler's
            `to_datetime` invocation.

        Returns
        -------
        Series of datetime64 dtype
        """
        return self._reduce_dimension(
            query_compiler=self._query_compiler.to_datetime(**kwargs)
        )

    def _getitem(self, key) -> Union[DataFrame, Series]:
        """
        Get the data specified by `key` for this ``DataFrame``.

        Parameters
        ----------
        key : callable, Series, DataFrame, np.ndarray, pandas.Index or list
            Data identifiers to retrieve.

        Returns
        -------
        Series or DataFrame
            Retrieved data.
        """
        key = apply_if_callable(key, self)
        # Shortcut if key is an actual column
        is_mi_columns = self._query_compiler.has_multiindex(axis=1)
        try:
            if key in self.columns and not is_mi_columns:
                return self._getitem_column(key)
        except (KeyError, ValueError, TypeError):
            pass
        if isinstance(key, Series):
            return self.__constructor__(
                query_compiler=self._query_compiler.getitem_array(key._query_compiler)
            )
        elif isinstance(key, (np.ndarray, pandas.Index, list)):
            return self.__constructor__(
                query_compiler=self._query_compiler.getitem_array(key)
            )
        elif isinstance(key, DataFrame):
            return self.where(key)
        elif is_mi_columns:
            return self._default_to_pandas(pandas.DataFrame.__getitem__, key)
            # return self._getitem_multilevel(key)
        else:
            return self._getitem_column(key)

    # Persistance support methods - BEGIN
    @classmethod
    def _inflate_light(cls, query_compiler, source_pid) -> DataFrame:
        """
        Re-creates the object from previously-serialized lightweight representation.

        The method is used for faster but not disk-storable persistence.

        Parameters
        ----------
        query_compiler : BaseQueryCompiler
            Query compiler to use for object re-creation.
        source_pid : int
            Determines whether a Modin or pandas object needs to be created.
            Modin objects are created only on the main process.

        Returns
        -------
        DataFrame
            New ``DataFrame`` based on the `query_compiler`.
        """
        if os.getpid() != source_pid:
            return query_compiler.to_pandas()
        # The current logic does not involve creating Modin objects
        # and manipulation with them in worker processes
        return cls(query_compiler=query_compiler)

    @classmethod
    def _inflate_full(cls, pandas_df, source_pid) -> DataFrame:
        """
        Re-creates the object from previously-serialized disk-storable representation.

        Parameters
        ----------
        pandas_df : pandas.DataFrame
            Data to use for object re-creation.
        source_pid : int
            Determines whether a Modin or pandas object needs to be created.
            Modin objects are created only on the main process.

        Returns
        -------
        DataFrame
            New ``DataFrame`` based on the `pandas_df`.
        """
        if os.getpid() != source_pid:
            return pandas_df
        # The current logic does not involve creating Modin objects
        # and manipulation with them in worker processes
        return cls(data=from_pandas(pandas_df))

    def __reduce__(self):
        self._query_compiler.finalize()
        pid = os.getpid()
        if (
            PersistentPickle.get()
            or not self._query_compiler.support_materialization_in_worker_process()
        ):
            return self._inflate_full, (self._to_pandas(), pid)
        return self._inflate_light, (self._query_compiler, pid)

    # Persistance support methods - END

    @doc(SET_BACKEND_DOC, class_name=__qualname__)
    def set_backend(
        self,
        backend: str,
        inplace: bool = False,
        *,
        switch_operation: Optional[str] = None,
    ) -> Optional[Self]:
        return super().set_backend(
            backend=backend, inplace=inplace, switch_operation=switch_operation
        )

    move_to = set_backend

    @doc(GET_BACKEND_DOC, class_name=__qualname__)
    @disable_logging
    def get_backend(self) -> str:
        return super().get_backend()

    @disable_logging
    def __delattr__(self, name: str) -> None:
        """
        Delete attribute `name`.

        Parameters
        ----------
        name : str
            Name of the attribute to delete.

        Returns
        -------
        None
        """
        extension = self._get_extension(name, __class__._extensions)
        if extension is not sentinel:
            return extension.__delete__(self)
        return super().__delattr__(name)

    @disable_logging
    @_inherit_docstrings(BasePandasDataset._copy_into)
    def _copy_into(self, other: DataFrame) -> None:
        other._query_compiler = self._query_compiler
        other._siblings = self._siblings
        return None


================================================
FILE: modin/pandas/errors/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


"""The module is needed to allow the following import `import modin.pandas.errors`."""

from pandas.errors import *  # noqa: F403, F401
from pandas.errors import __all__  # noqa: F401


================================================
FILE: modin/pandas/general.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement pandas general API."""

from __future__ import annotations

import warnings
from typing import Hashable, Iterable, Mapping, Optional, Union

import numpy as np
import pandas
from pandas._libs.lib import NoDefault, no_default
from pandas._typing import ArrayLike, DtypeBackend, Scalar, npt
from pandas.core.dtypes.common import is_list_like

from modin.core.storage_formats import BaseQueryCompiler
from modin.core.storage_formats.pandas.query_compiler_caster import (
    wrap_free_function_in_argument_caster,
)
from modin.logging import enable_logging
from modin.pandas.io import to_pandas
from modin.utils import _inherit_docstrings, _maybe_warn_on_default

from .base import BasePandasDataset
from .dataframe import DataFrame
from .series import Series


@enable_logging
def _isna(
    obj,
) -> bool | npt.NDArray[np.bool_] | Series | DataFrame:  # noqa: PR01, RT01, D200
    """
    Detect missing values for an array-like object.
    """
    if isinstance(obj, BasePandasDataset):
        return obj.isna()
    else:
        return pandas.isna(obj)


_inherit_isna_docstring = _inherit_docstrings(pandas.isnull, apilink="pandas.isna")

isna = _inherit_isna_docstring(wrap_free_function_in_argument_caster("isna")(_isna))

isnull = _inherit_isna_docstring(wrap_free_function_in_argument_caster("isnull")(_isna))


@enable_logging
def _notna(
    obj,
) -> bool | npt.NDArray[np.bool_] | Series | DataFrame:  # noqa: PR01, RT01, D200
    """
    Detect non-missing values for an array-like object.
    """
    if isinstance(obj, BasePandasDataset):
        return obj.notna()
    else:
        return pandas.notna(obj)


_inherit_notna_docstring = _inherit_docstrings(pandas.notna, apilink="pandas.notna")

notnull = _inherit_notna_docstring(
    wrap_free_function_in_argument_caster("notnull")(_notna)
)

notna = _inherit_notna_docstring(wrap_free_function_in_argument_caster("notna")(_notna))


@_inherit_docstrings(pandas.merge, apilink="pandas.merge")
@enable_logging
@wrap_free_function_in_argument_caster("merge")
def merge(
    left,
    right,
    how: str = "inner",
    on=None,
    left_on=None,
    right_on=None,
    left_index: bool = False,
    right_index: bool = False,
    sort: bool = False,
    suffixes=("_x", "_y"),
    copy: Optional[bool] = None,
    indicator: bool = False,
    validate=None,
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Merge DataFrame or named Series objects with a database-style join.
    """
    if isinstance(left, Series):
        if left.name is None:
            raise ValueError("Cannot merge a Series without a name")
        else:
            left = left.to_frame()

    if not isinstance(left, DataFrame):
        raise TypeError(
            f"Can only merge Series or DataFrame objects, a {type(left)} was passed"
        )

    return left.merge(
        right,
        how=how,
        on=on,
        left_on=left_on,
        right_on=right_on,
        left_index=left_index,
        right_index=right_index,
        sort=sort,
        suffixes=suffixes,
        copy=copy,
        indicator=indicator,
        validate=validate,
    )


@_inherit_docstrings(pandas.merge_ordered, apilink="pandas.merge_ordered")
@enable_logging
@wrap_free_function_in_argument_caster("merge_ordered")
def merge_ordered(
    left,
    right,
    on=None,
    left_on=None,
    right_on=None,
    left_by=None,
    right_by=None,
    fill_method=None,
    suffixes=("_x", "_y"),
    how: str = "outer",
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Perform a merge for ordered data with optional filling/interpolation.
    """
    for operand in (left, right):
        if not isinstance(operand, (Series, DataFrame)):
            raise TypeError(
                f"Can only merge Series or DataFrame objects, a {type(operand)} was passed"
            )

    return DataFrame(
        query_compiler=left._query_compiler.merge_ordered(
            right._query_compiler,
            on=on,
            left_on=left_on,
            right_on=right_on,
            left_by=left_by,
            right_by=right_by,
            fill_method=fill_method,
            suffixes=suffixes,
            how=how,
        )
    )


@_inherit_docstrings(pandas.merge_asof, apilink="pandas.merge_asof")
@enable_logging
@wrap_free_function_in_argument_caster("merge_asof")
def merge_asof(
    left,
    right,
    on=None,
    left_on=None,
    right_on=None,
    left_index: bool = False,
    right_index: bool = False,
    by=None,
    left_by=None,
    right_by=None,
    suffixes=("_x", "_y"),
    tolerance=None,
    allow_exact_matches: bool = True,
    direction: str = "backward",
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Perform a merge by key distance.
    """
    if not isinstance(left, DataFrame):
        raise ValueError(
            "can not merge DataFrame with instance of type {}".format(type(right))
        )
    left._query_compiler._maybe_warn_on_default(message="`merge_asof`")

    # As of Pandas 1.2 these should raise an error; before that it did
    # something likely random:
    if (
        (on and (left_index or right_index))
        or (left_on and left_index)
        or (right_on and right_index)
    ):
        raise ValueError("Can't combine left/right_index with left/right_on or on.")

    if on is not None:
        if left_on is not None or right_on is not None:
            raise ValueError("If 'on' is set, 'left_on' and 'right_on' can't be set.")
        left_on = on
        right_on = on

    if by is not None:
        if left_by is not None or right_by is not None:
            raise ValueError("Can't have both 'by' and 'left_by' or 'right_by'")
        left_by = right_by = by

    if left_on is None and not left_index:
        raise ValueError("Must pass on, left_on, or left_index=True")

    if right_on is None and not right_index:
        raise ValueError("Must pass on, right_on, or right_index=True")

    return DataFrame(
        query_compiler=left._query_compiler.merge_asof(
            right._query_compiler,
            left_on,
            right_on,
            left_index,
            right_index,
            left_by,
            right_by,
            suffixes,
            tolerance,
            allow_exact_matches,
            direction,
        )
    )


@_inherit_docstrings(pandas.pivot_table, apilink="pandas.pivot_table")
@enable_logging
@wrap_free_function_in_argument_caster("pivot_table")
def pivot_table(
    data,
    values=None,
    index=None,
    columns=None,
    aggfunc="mean",
    fill_value=None,
    margins=False,
    dropna=True,
    margins_name="All",
    observed=no_default,
    sort=True,
) -> DataFrame:
    if not isinstance(data, DataFrame):
        raise ValueError(
            "can not create pivot table with instance of type {}".format(type(data))
        )

    return data.pivot_table(
        values=values,
        index=index,
        columns=columns,
        aggfunc=aggfunc,
        fill_value=fill_value,
        margins=margins,
        dropna=dropna,
        margins_name=margins_name,
        observed=observed,
        sort=sort,
    )


@_inherit_docstrings(pandas.pivot, apilink="pandas.pivot")
@enable_logging
@wrap_free_function_in_argument_caster("pivot")
def pivot(
    data, *, columns, index=no_default, values=no_default
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Return reshaped DataFrame organized by given index / column values.
    """
    if not isinstance(data, DataFrame):
        raise ValueError("can not pivot with instance of type {}".format(type(data)))
    return data.pivot(index=index, columns=columns, values=values)


@_inherit_docstrings(pandas.to_numeric, apilink="pandas.to_numeric")
@enable_logging
@wrap_free_function_in_argument_caster("to_numeric")
def to_numeric(
    arg,
    errors="raise",
    downcast=None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> Scalar | np.ndarray | Series:  # noqa: PR01, RT01, D200
    """
    Convert argument to a numeric type.
    """
    if not isinstance(arg, Series):
        return pandas.to_numeric(
            arg, errors=errors, downcast=downcast, dtype_backend=dtype_backend
        )
    return arg._to_numeric(
        errors=errors, downcast=downcast, dtype_backend=dtype_backend
    )


@_inherit_docstrings(pandas.qcut, apilink="pandas.qcut")
@enable_logging
@wrap_free_function_in_argument_caster("qcut")
def qcut(
    x, q, labels=None, retbins=False, precision=3, duplicates="raise"
):  # noqa: PR01, RT01, D200
    """
    Quantile-based discretization function.
    """
    kwargs = {
        "labels": labels,
        "retbins": retbins,
        "precision": precision,
        "duplicates": duplicates,
    }
    if not isinstance(x, Series):
        return pandas.qcut(x, q, **kwargs)
    return x._qcut(q, **kwargs)


@_inherit_docstrings(pandas.cut, apilink="pandas.cut")
@enable_logging
@wrap_free_function_in_argument_caster("cut")
def cut(
    x,
    bins,
    right: bool = True,
    labels=None,
    retbins: bool = False,
    precision: int = 3,
    include_lowest: bool = False,
    duplicates: str = "raise",
    ordered: bool = True,
):
    if isinstance(x, DataFrame):
        raise ValueError("Input array must be 1 dimensional")
    if not isinstance(x, Series):
        _maybe_warn_on_default(
            reason=f"pd.cut is not supported on objects of type {type(x)}"
        )
        import pandas

        return pandas.cut(
            x,
            bins,
            right=right,
            labels=labels,
            retbins=retbins,
            precision=precision,
            include_lowest=include_lowest,
            duplicates=duplicates,
            ordered=ordered,
        )

    def _wrap_in_series_object(qc_result):
        if isinstance(qc_result, type(x._query_compiler)):
            return Series(query_compiler=qc_result)
        if isinstance(qc_result, (tuple, list)):
            return tuple([_wrap_in_series_object(result) for result in qc_result])
        return qc_result

    return _wrap_in_series_object(
        x._query_compiler.cut(
            bins,
            right=right,
            labels=labels,
            retbins=retbins,
            precision=precision,
            include_lowest=include_lowest,
            duplicates=duplicates,
            ordered=ordered,
        )
    )


@_inherit_docstrings(pandas.unique, apilink="pandas.unique")
@enable_logging
@wrap_free_function_in_argument_caster("unique")
def unique(values) -> ArrayLike:  # noqa: PR01, RT01, D200
    """
    Return unique values based on a hash table.
    """
    return Series(values).unique()


# Adding docstring since pandas docs don't have web section for this function.
@enable_logging
@wrap_free_function_in_argument_caster("value_counts")
def value_counts(
    values, sort=True, ascending=False, normalize=False, bins=None, dropna=True
) -> Series:
    """
    Compute a histogram of the counts of non-null values.

    Parameters
    ----------
    values : ndarray (1-d)
        Values to perform computation.
    sort : bool, default: True
        Sort by values.
    ascending : bool, default: False
        Sort in ascending order.
    normalize : bool, default: False
        If True then compute a relative histogram.
    bins : integer, optional
        Rather than count values, group them into half-open bins,
        convenience for pd.cut, only works with numeric data.
    dropna : bool, default: True
        Don't include counts of NaN.

    Returns
    -------
    Series
    """
    warnings.warn(
        "pandas.value_counts is deprecated and will be removed in a "
        + "future version. Use pd.Series(obj).value_counts() instead.",
        FutureWarning,
    )
    return Series(values).value_counts(
        sort=sort,
        ascending=ascending,
        normalize=normalize,
        bins=bins,
        dropna=dropna,
    )


@_inherit_docstrings(pandas.concat, apilink="pandas.concat")
@enable_logging
@wrap_free_function_in_argument_caster(name="concat")
def concat(
    objs: "Iterable[DataFrame | Series] | Mapping[Hashable, DataFrame | Series]",
    *,
    axis=0,
    join="outer",
    ignore_index: bool = False,
    keys=None,
    levels=None,
    names=None,
    verify_integrity: bool = False,
    sort: bool = False,
    copy: Optional[bool] = None,
) -> DataFrame | Series:  # noqa: PR01, RT01, D200
    """
    Concatenate Modin objects along a particular axis.
    """
    if isinstance(objs, (pandas.Series, Series, DataFrame, str, pandas.DataFrame)):
        raise TypeError(
            "first argument must be an iterable of pandas "
            + "objects, you passed an object of type "
            + f'"{type(objs).__name__}"'
        )
    axis = pandas.DataFrame()._get_axis_number(axis)
    if isinstance(objs, dict):
        input_list_of_objs = list(objs.values())
    else:
        input_list_of_objs = list(objs)
    if len(input_list_of_objs) == 0:
        raise ValueError("No objects to concatenate")

    list_of_objs = [obj for obj in input_list_of_objs if obj is not None]

    if len(list_of_objs) == 0:
        raise ValueError("All objects passed were None")
    try:
        type_check = next(
            obj
            for obj in list_of_objs
            if not isinstance(obj, (pandas.Series, Series, pandas.DataFrame, DataFrame))
        )
    except StopIteration:
        type_check = None
    if type_check is not None:
        raise ValueError(
            'cannot concatenate object of type "{0}"; only '
            + "modin.pandas.Series "
            + "and modin.pandas.DataFrame objs are "
            + "valid",
            type(type_check),
        )
    all_series = all(isinstance(obj, Series) for obj in list_of_objs)
    if all_series and axis == 0:
        return Series(
            query_compiler=list_of_objs[0]._query_compiler.concat(
                axis,
                [o._query_compiler for o in list_of_objs[1:]],
                join=join,
                join_axes=None,
                ignore_index=ignore_index,
                keys=None,
                levels=None,
                names=None,
                verify_integrity=False,
                copy=True,
                sort=sort,
            )
        )
    if join == "outer":
        # Filter out empties
        list_of_objs = [
            obj
            for obj in list_of_objs
            if (
                isinstance(obj, (Series, pandas.Series))
                or (isinstance(obj, DataFrame) and obj._query_compiler.lazy_shape)
                or sum(obj.shape) > 0
            )
        ]
    elif join != "inner":
        raise ValueError(
            "Only can inner (intersect) or outer (union) join the other axis"
        )
    list_of_objs = [
        (
            obj._query_compiler
            if isinstance(obj, DataFrame)
            else DataFrame(obj)._query_compiler
        )
        for obj in list_of_objs
    ]
    if keys is None and isinstance(objs, dict):
        keys = list(objs.keys())
    if keys is not None:
        if all_series:
            new_idx = keys
        else:
            list_of_objs = [
                list_of_objs[i] for i in range(min(len(list_of_objs), len(keys)))
            ]
            new_idx_labels = {
                k: v.index if axis == 0 else v.columns
                for k, v in zip(keys, list_of_objs)
            }
            tuples = [
                (k, *o) if isinstance(o, tuple) else (k, o)
                for k, obj in new_idx_labels.items()
                for o in obj
            ]
            new_idx = pandas.MultiIndex.from_tuples(tuples)
            if names is not None:
                new_idx.names = names
            else:
                old_name = _determine_name(list_of_objs, axis)
                if old_name is not None:
                    new_idx.names = [None] + old_name
    else:
        new_idx = None

    if len(list_of_objs) == 0:
        return DataFrame(
            index=input_list_of_objs[0].index.append(
                [f.index for f in input_list_of_objs[1:]]
            )
        )

    new_query_compiler = list_of_objs[0].concat(
        axis,
        list_of_objs[1:],
        join=join,
        join_axes=None,
        ignore_index=ignore_index,
        keys=None,
        levels=None,
        names=None,
        verify_integrity=False,
        copy=True,
        sort=sort,
    )
    result_df = DataFrame(query_compiler=new_query_compiler)
    if new_idx is not None:
        if axis == 0:
            result_df.index = new_idx
        else:
            result_df.columns = new_idx
    return result_df


@_inherit_docstrings(pandas.to_datetime, apilink="pandas.to_datetime")
@enable_logging
@wrap_free_function_in_argument_caster("to_datetime")
def to_datetime(
    arg,
    errors="raise",
    dayfirst=False,
    yearfirst=False,
    utc=False,
    format=None,
    exact=no_default,
    unit=None,
    infer_datetime_format=no_default,
    origin="unix",
    cache=True,
) -> Scalar | ArrayLike | Series | DataFrame:  # noqa: PR01, RT01, D200
    """
    Convert argument to datetime.
    """
    if not hasattr(arg, "_to_datetime"):
        return pandas.to_datetime(
            arg,
            errors=errors,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            format=format,
            exact=exact,
            unit=unit,
            infer_datetime_format=infer_datetime_format,
            origin=origin,
            cache=cache,
        )
    return arg._to_datetime(
        errors=errors,
        dayfirst=dayfirst,
        yearfirst=yearfirst,
        utc=utc,
        format=format,
        exact=exact,
        unit=unit,
        infer_datetime_format=infer_datetime_format,
        origin=origin,
        cache=cache,
    )


@_inherit_docstrings(pandas.get_dummies, apilink="pandas.get_dummies")
@enable_logging
@wrap_free_function_in_argument_caster("get_dummies")
def get_dummies(
    data,
    prefix=None,
    prefix_sep="_",
    dummy_na=False,
    columns=None,
    sparse=False,
    drop_first=False,
    dtype=None,
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Convert categorical variable into dummy/indicator variables.
    """
    if sparse:
        raise NotImplementedError(
            "SparseArray is not implemented. "
            + "To contribute to Modin, please visit "
            + "github.com/modin-project/modin."
        )
    if not isinstance(data, DataFrame):
        _maybe_warn_on_default("`get_dummies` on non-DataFrame")
        if isinstance(data, Series):
            data = data._to_pandas()
        return DataFrame(
            pandas.get_dummies(
                data,
                prefix=prefix,
                prefix_sep=prefix_sep,
                dummy_na=dummy_na,
                columns=columns,
                sparse=sparse,
                drop_first=drop_first,
                dtype=dtype,
            )
        )
    else:
        new_manager = data._query_compiler.get_dummies(
            columns,
            prefix=prefix,
            prefix_sep=prefix_sep,
            dummy_na=dummy_na,
            drop_first=drop_first,
            dtype=dtype,
        )
        return DataFrame(query_compiler=new_manager)


@_inherit_docstrings(pandas.melt, apilink="pandas.melt")
@enable_logging
@wrap_free_function_in_argument_caster("melt")
def melt(
    frame,
    id_vars=None,
    value_vars=None,
    var_name=None,
    value_name="value",
    col_level=None,
    ignore_index: bool = True,
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.
    """
    return frame.melt(
        id_vars=id_vars,
        value_vars=value_vars,
        var_name=var_name,
        value_name=value_name,
        col_level=col_level,
        ignore_index=ignore_index,
    )


@_inherit_docstrings(pandas.crosstab, apilink="pandas.crosstab")
@enable_logging
@wrap_free_function_in_argument_caster("crosstab")
def crosstab(
    index,
    columns,
    values=None,
    rownames=None,
    colnames=None,
    aggfunc=None,
    margins=False,
    margins_name: str = "All",
    dropna: bool = True,
    normalize=False,
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Compute a simple cross tabulation of two (or more) factors.
    """
    _maybe_warn_on_default("`crosstab`")
    pandas_crosstab = pandas.crosstab(
        index,
        columns,
        values,
        rownames,
        colnames,
        aggfunc,
        margins,
        margins_name,
        dropna,
        normalize,
    )
    return DataFrame(pandas_crosstab)


# Adding docstring since pandas docs don't have web section for this function.
@enable_logging
@wrap_free_function_in_argument_caster("lreshape")
def lreshape(data: DataFrame, groups, dropna=True) -> DataFrame:
    """
    Reshape wide-format data to long. Generalized inverse of ``DataFrame.pivot``.

    Accepts a dictionary, `groups`, in which each key is a new column name
    and each value is a list of old column names that will be "melted" under
    the new column name as part of the reshape.

    Parameters
    ----------
    data : DataFrame
        The wide-format DataFrame.
    groups : dict
        Dictionary in the form: `{new_name : list_of_columns}`.
    dropna : bool, default: True
        Whether include columns whose entries are all NaN or not.

    Returns
    -------
    DataFrame
        Reshaped DataFrame.
    """
    if not isinstance(data, DataFrame):
        raise ValueError("can not lreshape with instance of type {}".format(type(data)))
    data._query_compiler._maybe_warn_on_default(message="`lreshape`")
    return DataFrame(pandas.lreshape(to_pandas(data), groups, dropna=dropna))


@_inherit_docstrings(pandas.wide_to_long, apilink="pandas.wide_to_long")
@enable_logging
@wrap_free_function_in_argument_caster("wide_to_long")
def wide_to_long(
    df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Unpivot a DataFrame from wide to long format.
    """
    if not isinstance(df, DataFrame):
        raise ValueError(
            "can not wide_to_long with instance of type {}".format(type(df))
        )
    return DataFrame(
        query_compiler=df._query_compiler.wide_to_long(
            stubnames=stubnames,
            i=i,
            j=j,
            sep=sep,
            suffix=suffix,
        )
    )


@wrap_free_function_in_argument_caster("_determine_name")
def _determine_name(objs: Iterable[BaseQueryCompiler], axis: Union[int, str]):
    """
    Determine names of index after concatenation along passed axis.

    Parameters
    ----------
    objs : iterable of QueryCompilers
        Objects to concatenate.
    axis : int or str
        The axis to concatenate along.

    Returns
    -------
    list with single element
        Computed index name, `None` if it could not be determined.
    """
    axis = pandas.DataFrame()._get_axis_number(axis)

    def get_names(obj):
        return obj.columns.names if axis else obj.index.names

    names = np.array([get_names(obj) for obj in objs])

    # saving old name, only if index names of all objs are the same
    if np.all(names == names[0]):
        # we must do this check to avoid this calls `list(str_like_name)`
        return list(names[0]) if is_list_like(names[0]) else [names[0]]
    else:
        return None


@_inherit_docstrings(pandas.to_datetime, apilink="pandas.to_timedelta")
@enable_logging
@wrap_free_function_in_argument_caster("to_timedelta")
def to_timedelta(
    arg, unit=None, errors="raise"
) -> Scalar | pandas.Index | Series:  # noqa: PR01, RT01, D200
    """
    Convert argument to timedelta.

    Accepts str, timedelta, list-like or Series for arg parameter.
    Returns a Series if and only if arg is provided as a Series.
    """
    if isinstance(arg, Series):
        query_compiler = arg._query_compiler.to_timedelta(unit=unit, errors=errors)
        return Series(query_compiler=query_compiler)
    return pandas.to_timedelta(arg, unit=unit, errors=errors)


================================================
FILE: modin/pandas/groupby.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement GroupBy public API as pandas does."""

from __future__ import annotations

import warnings
from collections.abc import Iterable
from functools import cached_property
from types import BuiltinFunctionType
from typing import TYPE_CHECKING, Any, Hashable, Optional, Union

import numpy as np
import pandas
import pandas.core.common as com
import pandas.core.groupby
from pandas._libs import lib
from pandas.api.types import is_scalar
from pandas.core.apply import reconstruct_func
from pandas.core.dtypes.common import (
    is_datetime64_any_dtype,
    is_integer,
    is_list_like,
    is_numeric_dtype,
)
from pandas.errors import SpecificationError
from typing_extensions import Self

from modin.core.dataframe.algebra.default2pandas.groupby import GroupBy
from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
from modin.core.storage_formats.pandas.query_compiler_caster import (
    EXTENSION_DICT_TYPE,
    EXTENSION_NO_LOOKUP,
    QueryCompilerCaster,
    visit_nested_args,
)
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger, disable_logging
from modin.pandas.utils import cast_function_modin2pandas
from modin.utils import (
    MODIN_UNNAMED_SERIES_LABEL,
    _inherit_docstrings,
    hashable,
    sentinel,
    try_cast_to_pandas,
    wrap_into_list,
    wrap_udf_function,
)

from .series import Series
from .utils import is_label
from .window import RollingGroupby

if TYPE_CHECKING:
    from modin.pandas import DataFrame

_DEFAULT_BEHAVIOUR = EXTENSION_NO_LOOKUP | {
    "__class__",
    "__getitem__",
    "__init__",
    "__iter__",
    "_as_index",
    "_axis",
    "_by",
    "_check_index",
    "_columns",
    "_compute_index_grouped",
    "_default_to_pandas",
    "_df",
    "_drop",
    "_idx_name",
    "_index",
    "_internal_by",
    "_is_multi_by",
    "_iter",
    "_kwargs",
    "_level",
    "_pandas_class",
    "_query_compiler",
    "_sort",
    "_wrap_aggregation",
}

GROUPBY_EXTENSION_NO_LOOKUP = EXTENSION_NO_LOOKUP | {
    "_axis",
    "_idx_name",
    "_df",
    "_query_compiler",
    "_columns",
    "_by",
    "_drop",
    "_return_tuple_when_iterating",
    "_is_multi_by",
    "_level",
    "_kwargs",
    "_get_query_compiler",
}


@_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy)
class DataFrameGroupBy(ClassLogger, QueryCompilerCaster):  # noqa: GL08
    _pandas_class = pandas.core.groupby.DataFrameGroupBy
    _return_tuple_when_iterating = False
    _df: Union[DataFrame, Series]
    _query_compiler: BaseQueryCompiler
    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)

    def __init__(
        self,
        df: Union[DataFrame, Series],
        by,
        axis,
        level,
        as_index,
        sort,
        group_keys,
        idx_name,
        drop,
        backend_pinned: bool,
        **kwargs,
    ):
        self._axis = axis
        self._idx_name = idx_name
        self._df = df
        self._query_compiler = self._df._query_compiler
        self._columns = self._query_compiler.columns
        self._by = by
        self._drop = drop
        # When providing a list of columns of length one to DataFrame.groupby(),
        # the keys that are returned by iterating over the resulting DataFrameGroupBy
        # object will now be tuples of length one (pandas#GH47761)
        self._return_tuple_when_iterating = kwargs.pop(
            "return_tuple_when_iterating", False
        )
        # Whether the backend of this groupby object has been pinned.
        self._backend_pinned = backend_pinned

        if (
            level is None
            and is_list_like(by)
            or isinstance(by, type(self._query_compiler))
        ):
            # This tells us whether or not there are multiple columns/rows in the groupby
            self._is_multi_by = (
                isinstance(by, type(self._query_compiler)) and len(by.columns) > 1
            ) or (
                not isinstance(by, type(self._query_compiler))
                and axis == 0
                and all(
                    (hashable(obj) and obj in self._query_compiler.columns)
                    or isinstance(obj, type(self._query_compiler))
                    or is_list_like(obj)
                    for obj in self._by
                )
            )
        else:
            self._is_multi_by = False
        self._level = level
        self._kwargs = {
            "level": level,
            "sort": sort,
            "as_index": as_index,
            "group_keys": group_keys,
        }
        self._kwargs.update(kwargs)

    @disable_logging
    @_inherit_docstrings(QueryCompilerCaster._get_query_compiler)
    def _get_query_compiler(self) -> Optional[BaseQueryCompiler]:
        if hasattr(self, "_df"):
            return self._df._query_compiler
        return None

    @disable_logging
    @_inherit_docstrings(QueryCompilerCaster.get_backend)
    def get_backend(self) -> str:
        return self._df.get_backend()

    @disable_logging
    def set_backend(
        self,
        backend: str,
        inplace: bool = False,
        *,
        switch_operation: Optional[str] = None,
    ) -> Optional[Self]:
        """
        Move the data in this groupby object to a different backend.

        Parameters
        ----------
        backend : str
            The name of the backend to switch to.
        inplace : bool, default: False
            Whether to perform the operation in-place.
        switch_operation : str, optional
            The operation being performed that triggered the backend switch.

        Returns
        -------
        DataFrameGroupBy or None
            If inplace=False, returns a new groupby object with the specified backend.
            If inplace=True, returns None and changes the backend of the current object.

        Notes
        -----
        When `inplace=True`, this method will move the data between backends
        for all parent objects (the DataFrame/Series used to create this
        groupby, and any DataFrames/Series in the `by` list). When
        `inplace=False`, new copies of the parent objects are created with their
        data in the target backend for the returned groupby object, leaving the
        original parent objects unchanged.
        """

        def set_instance_variable_backend(arg: Any) -> Any:
            # groupby object _by and _df fields may include both
            # QueryCompilerCaster objects and BaseQueryCompiler objects,
            # so we have to be able to set the backend on both of those.

            if isinstance(arg, QueryCompilerCaster):
                result = arg.set_backend(
                    backend=backend, inplace=inplace, switch_operation=switch_operation
                )
                return arg if inplace else result
            if isinstance(arg, BaseQueryCompiler):
                # Use a cyclic import here because query compilers themselves
                # do not implement set_backend().
                from modin.pandas import DataFrame

                return (
                    DataFrame(query_compiler=arg)
                    .set_backend(backend=backend, inplace=False)
                    ._query_compiler
                )
            return arg

        new_by = visit_nested_args([self._by], set_instance_variable_backend)[0]
        new_df = visit_nested_args([self._df], set_instance_variable_backend)[0]

        if inplace:
            self._df = new_df
            self._query_compiler = new_df._query_compiler
            self._by = new_by
            return None
        return type(self)(
            df=new_df,
            by=new_by,
            axis=self._axis,
            level=self._level,
            as_index=self._as_index,
            sort=self._sort,
            group_keys=self._kwargs["group_keys"],
            idx_name=self._idx_name,
            drop=self._drop,
            backend_pinned=self._backend_pinned,
            # We have added as_index, sort, group_keys, and level to the kwargs
            # dictionary, so we need to remove them from the keyword arguments
            # that we pass to the new DataFrameGroupBy object.
            **{
                k: v
                for k, v in self._kwargs.items()
                if k not in ["as_index", "sort", "group_keys", "level"]
            },
        )

    @_inherit_docstrings(QueryCompilerCaster.is_backend_pinned)
    def is_backend_pinned(self) -> bool:
        return self._backend_pinned

    @_inherit_docstrings(QueryCompilerCaster._set_backend_pinned)
    def _set_backend_pinned(self, pinned: bool, inplace: bool) -> Optional[Self]:
        if inplace:
            self._backend_pinned = pinned
            return None
        else:
            # Create a new groupby object with the updated pinned status
            new_obj = self._override(backend_pinned=pinned)
            # Force the correct pinned status since the automatic pinning logic
            # in query_compiler_caster.py might override it
            new_obj._backend_pinned = pinned
            return new_obj

    @disable_logging
    @_inherit_docstrings(QueryCompilerCaster._get_query_compiler)
    def _copy_into(self, other: Self) -> None:
        # TODO(https://github.com/modin-project/modin/issues/7544): implement
        # this method to support automatic pre-operation backend switch for
        # groupby methods.
        ErrorMessage.not_implemented()

    def _override(self, **kwargs):
        """
        Override groupby parameters.

        Parameters
        ----------
        **kwargs : dict
            Parameters to override.

        Returns
        -------
        DataFrameGroupBy
            A groupby object with new parameters.
        """
        new_kw = dict(
            df=self._df,
            by=self._by,
            axis=self._axis,
            idx_name=self._idx_name,
            drop=self._drop,
            backend_pinned=self._backend_pinned,
            **self._kwargs,
        )
        new_kw.update(kwargs)
        return type(self)(**new_kw)

    @disable_logging
    def __getattr__(self, key):
        """
        Alter regular attribute access, looks up the name in the columns.

        Parameters
        ----------
        key : str
            Attribute name.

        Returns
        -------
        The value of the attribute.
        """
        try:
            return self._getattr__from_extension_impl(
                key=key,
                default_behavior_attributes=GROUPBY_EXTENSION_NO_LOOKUP,
                extensions=__class__._extensions,
            )
        except AttributeError as err:
            if key != "_columns" and key in self._columns:
                return self.__getitem__(key)
            raise err

    @disable_logging
    def __getattribute__(self, item: str) -> Any:
        """
        Override __getattribute__, which python calls to access any attribute of an object of this class.

        We override this method
            1) to default to pandas for empty dataframes on non-lazy engines.
            2) to get non-method extensions (e.g. properties)

        Parameters
        ----------
        item : str
            The name of the attribute to access.

        Returns
        -------
        Any
            The value of the attribute.
        """
        if item not in GROUPBY_EXTENSION_NO_LOOKUP:
            extensions_result = self._getattribute__from_extension_impl(
                item, __class__._extensions
            )
            if extensions_result is not sentinel:
                return extensions_result

        attr = super().__getattribute__(item)
        if item not in _DEFAULT_BEHAVIOUR and not self._query_compiler.lazy_shape:
            # We default to pandas on empty DataFrames. This avoids a large amount of
            # pain in underlying implementation and returns a result immediately rather
            # than dealing with the edge cases that empty DataFrames have.
            if callable(attr) and self._df.empty and hasattr(self._pandas_class, item):

                def default_handler(*args, **kwargs):
                    return self._default_to_pandas(item, *args, **kwargs)

                return default_handler
        return attr

    @disable_logging
    def __setattr__(self, key: str, value) -> None:
        """
        Set an attribute on the object.

        We override this method to set extension properties.

        Parameters
        ----------
        key : str
            The name of the attribute to set.
        value : Any
            The value to set the attribute to.

        Returns
        -------
        None
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(key, __class__._extensions)
        if extension is not sentinel and hasattr(extension, "__set__"):
            return extension.__set__(self, value)
        return super().__setattr__(key, value)

    @disable_logging
    def __delattr__(self, name: str) -> None:
        """
        Delete an attribute on the object.

        We override this method to delete extension properties.

        Parameters
        ----------
        name : str
            The name of the attribute to delete.

        Returns
        -------
        None
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(name, __class__._extensions)
        if extension is not sentinel and hasattr(extension, "__delete__"):
            return extension.__delete__(self)
        return super().__delattr__(name)

    @property
    def ngroups(self):  # noqa: GL08
        return len(self)

    def skew(self, axis=lib.no_default, skipna=True, numeric_only=False, **kwargs):
        # default behaviour for aggregations; for the reference see
        # `_op_via_apply` func in pandas==2.0.2
        if axis is None or axis is lib.no_default:
            axis = self._axis

        if axis != 0 or not skipna:
            return self._default_to_pandas(
                lambda df: df.skew(
                    axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
                )
            )

        return self._wrap_aggregation(
            type(self._query_compiler).groupby_skew,
            agg_kwargs=kwargs,
            numeric_only=numeric_only,
        )

    def ffill(self, limit=None):
        ErrorMessage.single_warning(
            ".ffill() is implemented using .fillna() in Modin, "
            + "which can be impacted by pandas bug https://github.com/pandas-dev/pandas/issues/43412 "
            + "on dataframes with duplicated indices"
        )
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message=".*fillna with 'method' is deprecated.*",
                category=FutureWarning,
            )
            return self.fillna(limit=limit, method="ffill")

    def sem(self, ddof=1, numeric_only=False):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_sem,
            agg_kwargs=dict(ddof=ddof),
            numeric_only=numeric_only,
        )

    def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None):
        return self._default_to_pandas(
            lambda df: df.sample(
                n=n,
                frac=frac,
                replace=replace,
                weights=weights,
                random_state=random_state,
            )
        )

    def ewm(self, *args, **kwargs):
        return self._default_to_pandas(lambda df: df.ewm(*args, **kwargs))

    def value_counts(
        self,
        subset=None,
        normalize: bool = False,
        sort: bool = True,
        ascending: bool = False,
        dropna: bool = True,
    ):
        return self._default_to_pandas(
            lambda df: df.value_counts(
                subset=subset,
                normalize=normalize,
                sort=sort,
                ascending=ascending,
                dropna=dropna,
            )
        )

    def mean(self, numeric_only=False, engine=None, engine_kwargs=None):
        if engine not in ("cython", None) and engine_kwargs is not None:
            return self._default_to_pandas(
                lambda df: df.mean(
                    numeric_only=numeric_only,
                    engine=engine,
                    engine_kwargs=engine_kwargs,
                )
            )
        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_mean,
                agg_kwargs=dict(numeric_only=numeric_only),
                numeric_only=numeric_only,
            )
        )

    def any(self, skipna=True):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_any,
            numeric_only=False,
            agg_kwargs=dict(skipna=skipna),
        )

    @property
    def plot(self):  # pragma: no cover
        return self._default_to_pandas(lambda df: df.plot)

    def ohlc(self):
        from .dataframe import DataFrame

        return DataFrame(
            query_compiler=self._query_compiler.groupby_ohlc(
                by=self._by,
                axis=self._axis,
                groupby_kwargs=self._kwargs,
                agg_args=[],
                agg_kwargs={},
                is_df=isinstance(self._df, DataFrame),
            ),
        )

    def __bytes__(self):
        """
        Convert DataFrameGroupBy object into a python2-style byte string.

        Returns
        -------
        bytearray
            Byte array representation of `self`.

        Notes
        -----
        Deprecated and removed in pandas and will be likely removed in Modin.
        """
        return self._default_to_pandas(lambda df: df.__bytes__())

    @cached_property
    def groups(self):
        return self._compute_index_grouped(numerical=False)

    def min(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):
        if engine not in ("cython", None) and engine_kwargs is not None:
            return self._default_to_pandas(
                lambda df: df.min(
                    numeric_only=numeric_only,
                    min_count=min_count,
                    engine=engine,
                    engine_kwargs=engine_kwargs,
                )
            )
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_min,
            agg_kwargs=dict(min_count=min_count),
            numeric_only=numeric_only,
        )

    def max(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):
        if engine not in ("cython", None) and engine_kwargs is not None:
            return self._default_to_pandas(
                lambda df: df.max(
                    numeric_only=numeric_only,
                    min_count=min_count,
                    engine=engine,
                    engine_kwargs=engine_kwargs,
                )
            )
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_max,
            agg_kwargs=dict(min_count=min_count),
            numeric_only=numeric_only,
        )

    def idxmax(self, axis=lib.no_default, skipna=True, numeric_only=False):
        if axis is not lib.no_default:
            self._deprecate_axis(axis, "idxmax")
        # default behaviour for aggregations; for the reference see
        # `_op_via_apply` func in pandas==2.0.2
        if axis is None or axis is lib.no_default:
            axis = self._axis
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_idxmax,
            agg_kwargs=dict(axis=axis, skipna=skipna),
            numeric_only=numeric_only,
        )

    def idxmin(self, axis=lib.no_default, skipna=True, numeric_only=False):
        if axis is not lib.no_default:
            self._deprecate_axis(axis, "idxmin")
        # default behaviour for aggregations; for the reference see
        # `_op_via_apply` func in pandas==2.0.2
        if axis is None or axis is lib.no_default:
            axis = self._axis
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_idxmin,
            agg_kwargs=dict(axis=axis, skipna=skipna),
            numeric_only=numeric_only,
        )

    @property
    def ndim(self):
        """
        Return 2.

        Returns
        -------
        int
            Returns 2.

        Notes
        -----
        Deprecated and removed in pandas and will be likely removed in Modin.
        """
        return 2  # ndim is always 2 for DataFrames

    def shift(
        self,
        periods=1,
        freq=None,
        axis=lib.no_default,
        fill_value=lib.no_default,
        suffix=None,
    ):
        if suffix:
            return self._default_to_pandas(
                lambda df: df.shift(
                    periods=periods,
                    freq=freq,
                    axis=axis,
                    fill_value=fill_value,
                    suffix=suffix,
                )
            )
        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "shift")
        else:
            axis = 0

        def _shift(data, periods, freq, axis, fill_value, is_set_nan_rows=True):
            from .dataframe import DataFrame

            result = data.shift(periods, freq, axis, fill_value)

            if (
                is_set_nan_rows
                and isinstance(self._by, BaseQueryCompiler)
                and (
                    # Check using `issubset` is effective only in case of MultiIndex
                    set(self._by.columns).issubset(list(data.columns))
                    if isinstance(self._by.columns, pandas.MultiIndex)
                    else len(
                        self._by.columns.unique()
                        .sort_values()
                        .difference(data.columns.unique().sort_values())
                    )
                    == 0
                )
                and DataFrame(query_compiler=self._by.isna()).any(axis=None)
            ):
                mask_nan_rows = data[self._by.columns].isna().any(axis=1)
                result = result.loc[~mask_nan_rows]
            return result

        if freq is None and axis == 1 and self._axis == 0:
            result = _shift(self._df, periods, freq, axis, fill_value)
        elif (
            freq is not None
            and axis == 0
            and self._axis == 0
            and isinstance(self._by, BaseQueryCompiler)
        ):
            result = _shift(
                self._df, periods, freq, axis, fill_value, is_set_nan_rows=False
            )
            result = result.dropna(subset=self._by.columns)
            if self._sort:
                result = result.sort_values(list(self._by.columns), axis=axis)
            else:
                result = result.sort_index()
        else:
            result = self._wrap_aggregation(
                type(self._query_compiler).groupby_shift,
                numeric_only=False,
                agg_kwargs=dict(
                    periods=periods, freq=freq, axis=axis, fill_value=fill_value
                ),
            )
        return result

    def nth(self, n, dropna=None):
        # TODO: what we really should do is create a GroupByNthSelector to mimic
        # pandas behavior and then implement some of these methods there.
        # Adapted error checking from pandas
        if dropna:
            if not is_integer(n):
                raise ValueError("dropna option only supported for an integer argument")

            if dropna not in ("any", "all"):
                # Note: when agg-ing picker doesn't raise this, just returns NaN
                raise ValueError(
                    "For a DataFrame or Series groupby.nth, dropna must be "
                    + "either None, 'any' or 'all', "
                    + f"(was passed {dropna})."
                )

        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_nth,
                numeric_only=False,
                agg_kwargs=dict(n=n, dropna=dropna),
            )
        )

    def cumsum(self, axis=lib.no_default, *args, **kwargs):
        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "cumsum")
        else:
            axis = 0
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_cumsum,
            agg_args=args,
            agg_kwargs=dict(axis=axis, **kwargs),
        )

    @cached_property
    def indices(self):
        return self._compute_index_grouped(numerical=True)

    @_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy.pct_change)
    def pct_change(
        self,
        periods=1,
        fill_method=lib.no_default,
        limit=lib.no_default,
        freq=None,
        axis=lib.no_default,
    ):
        from .dataframe import DataFrame

        if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
            warnings.warn(
                "The 'fill_method' keyword being not None and the 'limit' keyword in "
                + f"{type(self).__name__}.pct_change are deprecated and will be removed "
                + "in a future version. Either fill in any non-leading NA values prior "
                + "to calling pct_change or specify 'fill_method=None' to not fill NA "
                + "values.",
                FutureWarning,
            )
        if fill_method is lib.no_default:
            if any(grp.isna().values.any() for _, grp in self):
                warnings.warn(
                    "The default fill_method='ffill' in "
                    + f"{type(self).__name__}.pct_change is deprecated and will be "
                    + "removed in a future version. Call ffill before calling "
                    + "pct_change to retain current behavior and silence this warning.",
                    FutureWarning,
                )
            fill_method = "ffill"
        if limit is lib.no_default:
            limit = None

        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "pct_change")
        else:
            axis = 0

        # Should check for API level errors
        # Attempting to match pandas error behavior here
        if not isinstance(periods, int):
            raise TypeError(f"periods must be an int. got {type(periods)} instead")

        if isinstance(self._df, Series):
            if not is_numeric_dtype(self._df.dtypes):
                raise TypeError(
                    f"unsupported operand type for -: got {self._df.dtypes}"
                )
        elif isinstance(self._df, DataFrame) and axis == 0:
            for col, dtype in self._df.dtypes.items():
                # can't calculate change on non-numeric columns, so check for
                # non-numeric columns that are not included in the `by`
                if not is_numeric_dtype(dtype) and not (
                    isinstance(self._by, BaseQueryCompiler) and col in self._by.columns
                ):
                    raise TypeError(f"unsupported operand type for -: got {dtype}")

        return self._wrap_aggregation(
            type(self._query_compiler).groupby_pct_change,
            agg_kwargs=dict(
                periods=periods,
                fill_method=fill_method,
                limit=limit,
                freq=freq,
                axis=axis,
            ),
        )

    def filter(self, func, dropna=True, *args, **kwargs):
        return self._default_to_pandas(
            lambda df: df.filter(func, dropna=dropna, *args, **kwargs)
        )

    def _deprecate_axis(self, axis: int, name: str) -> None:  # noqa: GL08
        if axis == 1:
            warnings.warn(
                f"{type(self).__name__}.{name} with axis=1 is deprecated and "
                + "will be removed in a future version. Operate on the un-grouped "
                + "DataFrame instead",
                FutureWarning,
            )
        else:
            warnings.warn(
                f"The 'axis' keyword in {type(self).__name__}.{name} is deprecated "
                + "and will be removed in a future version. "
                + "Call without passing 'axis' instead.",
                FutureWarning,
            )

    def cummax(self, axis=lib.no_default, numeric_only=False, **kwargs):
        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "cummax")
        else:
            axis = 0
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_cummax,
            agg_kwargs=dict(axis=axis, **kwargs),
            numeric_only=numeric_only,
        )

    def apply(self, func, *args, include_groups=True, **kwargs):
        func = cast_function_modin2pandas(func)
        if not isinstance(func, BuiltinFunctionType):
            func = wrap_udf_function(func)

        apply_res = self._wrap_aggregation(
            qc_method=type(self._query_compiler).groupby_agg,
            numeric_only=False,
            agg_func=func,
            agg_args=args,
            agg_kwargs={**kwargs, "include_groups": include_groups},
            how="group_wise",
        )
        reduced_index = pandas.Index([MODIN_UNNAMED_SERIES_LABEL])
        if not isinstance(apply_res, Series) and apply_res.columns.equals(
            reduced_index
        ):
            apply_res = apply_res.squeeze(axis=1)
        return self._check_index(apply_res)

    @property
    def dtypes(self):
        if self._axis == 1:
            raise ValueError("Cannot call dtypes on groupby with axis=1")
        warnings.warn(
            f"{type(self).__name__}.dtypes is deprecated and will be removed in "
            + "a future version. Check the dtypes on the base object instead",
            FutureWarning,
        )
        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_dtypes,
                numeric_only=False,
            )
        )

    def first(self, numeric_only=False, min_count=-1, skipna=True):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_first,
            agg_kwargs=dict(min_count=min_count, skipna=skipna),
            numeric_only=numeric_only,
        )

    def last(self, numeric_only=False, min_count=-1, skipna=True):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_last,
            agg_kwargs=dict(min_count=min_count, skipna=skipna),
            numeric_only=numeric_only,
        )

    @cached_property
    def _internal_by(self) -> tuple[Hashable]:
        """
        Get only those components of 'by' that are column labels of the source frame.

        Returns
        -------
        tuple of labels
        """
        internal_by = tuple()
        if self._drop:
            if is_list_like(self._by):
                internal_by_list = []
                for by in self._by:
                    if isinstance(by, str):
                        internal_by_list.append(by)
                    elif isinstance(by, pandas.Grouper):
                        internal_by_list.append(by.key)
                internal_by = tuple(internal_by_list)
            elif isinstance(self._by, pandas.Grouper):
                internal_by = tuple([self._by.key])
            else:
                ErrorMessage.catch_bugs_and_request_email(
                    failure_condition=not isinstance(self._by, BaseQueryCompiler),
                    extra_log=f"When 'drop' is True, 'by' must be either list-like, Grouper, or a QueryCompiler, met: {type(self._by)}.",
                )
                internal_by = tuple(self._by.columns)
        return internal_by

    def __getitem__(self, key):
        """
        Implement indexing operation on a DataFrameGroupBy object.

        Parameters
        ----------
        key : list or str
            Names of columns to use as subset of original object.

        Returns
        -------
        DataFrameGroupBy or SeriesGroupBy
            Result of indexing operation.

        Raises
        ------
        NotImplementedError
            Column lookups on GroupBy with arbitrary Series in by is not yet supported.
        """
        # These parameters are common for building the resulted Series or DataFrame groupby object
        kwargs = {
            **self._kwargs.copy(),
            "by": self._by,
            "axis": self._axis,
            "idx_name": self._idx_name,
        }
        # The rules of type deduction for the resulted object is the following:
        #   1. If `key` is a list-like or `as_index is False`, then the resulted object is a DataFrameGroupBy
        #   2. Otherwise, the resulted object is SeriesGroupBy
        #   3. Result type does not depend on the `by` origin
        # Examples:
        #   - drop: any, as_index: any, __getitem__(key: list_like) -> DataFrameGroupBy
        #   - drop: any, as_index: False, __getitem__(key: any) -> DataFrameGroupBy
        #   - drop: any, as_index: True, __getitem__(key: label) -> SeriesGroupBy
        if is_list_like(key):
            make_dataframe = True
        else:
            if self._as_index:
                make_dataframe = False
            else:
                make_dataframe = True
                key = [key]
        if make_dataframe:
            internal_by = frozenset(self._internal_by)
            if len(internal_by.intersection(key)) != 0:
                ErrorMessage.mismatch_with_pandas(
                    operation="GroupBy.__getitem__",
                    message=(
                        "intersection of the selection and 'by' columns is not yet supported, "
                        + "to achieve the desired result rewrite the original code from:\n"
                        + "df.groupby('by_column')['by_column']\n"
                        + "to the:\n"
                        + "df.groupby(df['by_column'].copy())['by_column']"
                    ),
                )
            # We need to maintain order of the columns in key, using a set doesn't
            # maintain order.
            # We use dictionaries since they maintain insertion order as of 3.7,
            # and its faster to call dict.update than it is to loop through `key`
            # and select only the elements which aren't in `cols_to_grab`.
            cols_to_grab = dict.fromkeys(self._internal_by)
            cols_to_grab.update(dict.fromkeys(key))
            key = [col for col in cols_to_grab.keys() if col in self._df.columns]
            return DataFrameGroupBy(
                self._df[key],
                drop=self._drop,
                backend_pinned=self._backend_pinned,
                **kwargs,
            )
        if (
            self._is_multi_by
            and isinstance(self._by, list)
            and not all(hashable(o) and o in self._df for o in self._by)
        ):
            raise NotImplementedError(
                "Column lookups on GroupBy with arbitrary Series in by"
                + " is not yet supported."
            )
        return SeriesGroupBy(
            self._df[key],
            drop=False,
            backend_pinned=self._backend_pinned,
            **kwargs,
        )

    def cummin(self, axis=lib.no_default, numeric_only=False, **kwargs):
        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "cummin")
        else:
            axis = 0
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_cummin,
            agg_kwargs=dict(axis=axis, **kwargs),
            numeric_only=numeric_only,
        )

    def bfill(self, limit=None):
        ErrorMessage.single_warning(
            ".bfill() is implemented using .fillna() in Modin, "
            + "which can be impacted by pandas bug https://github.com/pandas-dev/pandas/issues/43412 "
            + "on dataframes with duplicated indices"
        )
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message=".*fillna with 'method' is deprecated.*",
                category=FutureWarning,
            )
            return self.fillna(limit=limit, method="bfill")

    def prod(self, numeric_only=False, min_count=0):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_prod,
            agg_kwargs=dict(min_count=min_count),
            numeric_only=numeric_only,
        )

    def std(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=False):
        if engine not in ("cython", None) and engine_kwargs is not None:
            return self._default_to_pandas(
                lambda df: df.std(
                    ddof=ddof,
                    engine=engine,
                    engine_kwargs=engine_kwargs,
                    numeric_only=numeric_only,
                )
            )
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_std,
            agg_kwargs=dict(ddof=ddof),
            numeric_only=numeric_only,
        )

    def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
        if engine not in ("cython", None) and engine_kwargs is not None:
            return self._default_to_pandas(
                lambda df: df.aggregate(
                    func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
                )
            )
        if self._axis != 0:
            # This is not implemented in pandas,
            # so we throw a different message
            raise NotImplementedError("axis other than 0 is not supported")

        if (
            callable(func)
            and isinstance(func, BuiltinFunctionType)
            and func.__name__ in dir(self)
        ):
            func_name = func.__name__
            warnings.warn(
                f"The provided callable {func} is currently using "
                + f"{type(self).__name__}.{func_name}. In a future version of pandas, "
                + "the provided callable will be used directly. To keep current "
                + f"behavior pass the string {func_name} instead.",
                category=FutureWarning,
            )
            func = func_name

        do_relabel = None
        if isinstance(func, dict) or func is None:
            # the order from `reconstruct_func` cannot be used correctly if there
            # is more than one columnar partition, since for correct use all columns
            # must be available within one partition.
            old_kwargs = dict(kwargs)
            relabeling_required, func_dict, new_columns, _ = reconstruct_func(
                func, **kwargs
            )

            if relabeling_required:

                def do_relabel(obj_to_relabel):  # noqa: F811
                    # unwrap nested labels into one level tuple
                    result_labels = [None] * len(old_kwargs)
                    for idx, labels in enumerate(old_kwargs.values()):
                        if is_scalar(labels) or callable(labels):
                            result_labels[idx] = (
                                labels if not callable(labels) else labels.__name__
                            )
                            continue
                        new_elem = []
                        for label in labels:
                            if is_scalar(label) or callable(label):
                                new_elem.append(
                                    label if not callable(label) else label.__name__
                                )
                            else:
                                new_elem.extend(label)
                        result_labels[idx] = tuple(new_elem)

                    new_order = obj_to_relabel.columns.get_indexer(result_labels)
                    new_columns_idx = pandas.Index(new_columns)
                    if not self._as_index:
                        nby_cols = len(obj_to_relabel.columns) - len(new_columns_idx)
                        new_order = np.concatenate([np.arange(nby_cols), new_order])
                        by_cols = obj_to_relabel.columns[:nby_cols]
                        if by_cols.nlevels != new_columns_idx.nlevels:
                            by_cols = by_cols.remove_unused_levels()
                            empty_levels = [
                                i
                                for i, level in enumerate(by_cols.levels)
                                if len(level) == 1 and level[0] == ""
                            ]
                            by_cols = by_cols.droplevel(empty_levels)
                        new_columns_idx = by_cols.append(new_columns_idx)
                    result = obj_to_relabel.iloc[:, new_order]
                    result.columns = new_columns_idx
                    return result

            if any(isinstance(fn, list) for fn in func_dict.values()):
                # multicolumn case
                # putting functions in a `list` allows to achieve multicolumn in each partition
                func_dict = {
                    col: fn if isinstance(fn, list) else [fn]
                    for col, fn in func_dict.items()
                }
            if (
                relabeling_required
                and not self._as_index
                and any(col in func_dict for col in self._internal_by)
            ):
                ErrorMessage.mismatch_with_pandas(
                    operation="GroupBy.aggregate(**dictionary_renaming_aggregation)",
                    message=(
                        "intersection of the columns to aggregate and 'by' is not yet supported when 'as_index=False', "
                        + "columns with group names of the intersection will not be presented in the result. "
                        + "To achieve the desired result rewrite the original code from:\n"
                        + "df.groupby('by_column', as_index=False).agg(agg_func=('by_column', agg_func))\n"
                        + "to the:\n"
                        + "df.groupby('by_column').agg(agg_func=('by_column', agg_func)).reset_index()"
                    ),
                )

            if any(i not in self._df.columns for i in func_dict.keys()):
                raise SpecificationError("nested renamer is not supported")
            if func is None:
                kwargs = {}
            func = func_dict
        elif is_list_like(func):
            # for list-list aggregation pandas always puts
            # groups as index in the result, ignoring as_index,
            # so we have to reset it to default value
            res = self._override(as_index=True)._wrap_aggregation(
                qc_method=type(self._query_compiler).groupby_agg,
                numeric_only=False,
                agg_func=func,
                agg_args=args,
                agg_kwargs=kwargs,
                how="axis_wise",
            )
            if not self._kwargs["as_index"]:
                res.reset_index(inplace=True)
            return res
        elif callable(func):
            return self._check_index(
                self._wrap_aggregation(
                    qc_method=type(self._query_compiler).groupby_agg,
                    numeric_only=False,
                    agg_func=func,
                    agg_args=args,
                    agg_kwargs=kwargs,
                    how="axis_wise",
                )
            )
        elif isinstance(func, str):
            # Using "getattr" here masks possible AttributeError which we throw
            # in __getattr__, so we should call __getattr__ directly instead.
            agg_func = self.__getattr__(func)
            if callable(agg_func):
                return agg_func(*args, **kwargs)

        result = self._wrap_aggregation(
            qc_method=type(self._query_compiler).groupby_agg,
            numeric_only=False,
            agg_func=func,
            agg_args=args,
            agg_kwargs=kwargs,
            how="axis_wise",
        )
        return do_relabel(result) if do_relabel else result

    agg = aggregate

    def rank(
        self,
        method="average",
        ascending=True,
        na_option="keep",
        pct=False,
        axis=lib.no_default,
    ):
        if na_option not in {"keep", "top", "bottom"}:
            raise ValueError("na_option must be one of 'keep', 'top', or 'bottom'")

        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "rank")
        else:
            axis = 0

        result = self._wrap_aggregation(
            type(self._query_compiler).groupby_rank,
            agg_kwargs=dict(
                method=method,
                ascending=ascending,
                na_option=na_option,
                pct=pct,
                axis=axis,
            ),
            numeric_only=False,
        )
        return result

    @property
    def corrwith(self):
        return self._default_to_pandas(lambda df: df.corrwith)

    def var(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=False):
        if engine not in ("cython", None) and engine_kwargs is not None:
            return self._default_to_pandas(
                lambda df: df.var(
                    ddof=ddof,
                    engine=engine,
                    engine_kwargs=engine_kwargs,
                    numeric_only=numeric_only,
                )
            )
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_var,
            agg_kwargs=dict(ddof=ddof),
            numeric_only=numeric_only,
        )

    def get_group(self, name, obj=None):
        work_object = self._override(
            df=obj if obj is not None else self._df, as_index=True
        )

        return work_object._check_index(
            work_object._wrap_aggregation(
                qc_method=type(work_object._query_compiler).groupby_get_group,
                numeric_only=False,
                agg_kwargs=dict(name=name),
            )
        )

    def __len__(self):  # noqa: GL08
        return len(self.indices)

    def all(self, skipna=True):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_all,
            numeric_only=False,
            agg_kwargs=dict(skipna=skipna),
        )

    def size(self):
        if self._axis == 1:
            return DataFrameGroupBy(
                self._df.T.iloc[:, [0]],
                self._by,
                0,
                drop=self._drop,
                idx_name=self._idx_name,
                backend_pinned=self._backend_pinned,
                **self._kwargs,
            ).size()
        result = self._wrap_aggregation(
            type(self._query_compiler).groupby_size,
            numeric_only=False,
        )
        if not isinstance(result, Series):
            result = result.squeeze(axis=1)
        if not self._kwargs.get("as_index") and not isinstance(result, Series):
            result = (
                result.rename(columns={MODIN_UNNAMED_SERIES_LABEL: "index"})
                if MODIN_UNNAMED_SERIES_LABEL in result.columns
                else result
            )
        elif isinstance(self._df, Series):
            result.name = self._df.name
        return result

    def sum(self, numeric_only=False, min_count=0, engine=None, engine_kwargs=None):
        if engine not in ("cython", None) and engine_kwargs is not None:
            return self._default_to_pandas(
                lambda df: df.sum(
                    numeric_only=numeric_only,
                    min_count=min_count,
                    engine=engine,
                    engine_kwargs=engine_kwargs,
                )
            )

        return self._wrap_aggregation(
            type(self._query_compiler).groupby_sum,
            agg_kwargs=dict(min_count=min_count),
            numeric_only=numeric_only,
        )

    def describe(self, percentiles=None, include=None, exclude=None):
        return self._default_to_pandas(
            lambda df: df.describe(
                percentiles=percentiles, include=include, exclude=exclude
            )
        )

    def boxplot(
        self,
        grouped,
        subplots=True,
        column=None,
        fontsize=None,
        rot=0,
        grid=True,
        ax=None,
        figsize=None,
        layout=None,
        sharex=False,
        sharey=True,
        backend=None,
        **kwargs,
    ):
        return self._default_to_pandas(
            lambda df: df.boxplot(
                grouped,
                subplots=subplots,
                column=column,
                fontsize=fontsize,
                rot=rot,
                grid=grid,
                ax=ax,
                figsize=figsize,
                layout=layout,
                sharex=sharex,
                sharey=sharey,
                backend=backend,
                **kwargs,
            )
        )

    def ngroup(self, ascending=True):
        result = self._wrap_aggregation(
            type(self._query_compiler).groupby_ngroup,
            numeric_only=False,
            agg_kwargs=dict(ascending=ascending),
        )
        if not isinstance(result, Series):
            # The result should always be a Series with name None and type int64
            result = result.squeeze(axis=1)
        return result

    def nunique(self, dropna=True):
        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_nunique,
                numeric_only=False,
                agg_kwargs=dict(dropna=dropna),
            )
        )

    def resample(self, rule, *args, include_groups=True, **kwargs):
        return self._default_to_pandas(
            lambda df: df.resample(rule, *args, include_groups=include_groups, **kwargs)
        )

    def median(self, numeric_only=False):
        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_median,
                numeric_only=numeric_only,
            )
        )

    def head(self, n=5):
        # groupby().head()/.tail() ignore as_index, so override it to True
        work_object = self._override(as_index=True)

        return work_object._check_index(
            work_object._wrap_aggregation(
                type(work_object._query_compiler).groupby_head,
                agg_kwargs=dict(n=n),
                numeric_only=False,
            )
        )

    def cumprod(self, axis=lib.no_default, *args, **kwargs):
        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "cumprod")
        else:
            axis = 0
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_cumprod,
            agg_args=args,
            agg_kwargs=dict(axis=axis, **kwargs),
        )

    def __iter__(self):
        return self._iter.__iter__()

    def cov(self, min_periods=None, ddof=1, numeric_only=False):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_cov,
            agg_kwargs=dict(min_periods=min_periods, ddof=ddof),
            numeric_only=numeric_only,
        )

    def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
        if engine not in ("cython", None) and engine_kwargs is not None:
            return self._default_to_pandas(
                lambda df: df.transform(
                    func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
                )
            )

        return self._wrap_aggregation(
            qc_method=type(self._query_compiler).groupby_agg,
            numeric_only=False,
            agg_func=func,
            agg_args=args,
            agg_kwargs=kwargs,
            how="transform",
        )

    def corr(self, method="pearson", min_periods=1, numeric_only=False):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_corr,
            agg_kwargs=dict(method=method, min_periods=min_periods),
            numeric_only=numeric_only,
        )

    def fillna(
        self,
        value=None,
        method=None,
        axis=lib.no_default,
        inplace=False,
        limit=None,
        downcast=lib.no_default,
    ):
        if axis is not lib.no_default:
            self._deprecate_axis(axis, "fillna")

        warnings.warn(
            f"{type(self).__name__}.fillna is deprecated and will be removed "
            + "in a future version. Use obj.ffill(), obj.bfill(), "
            + "or obj.nearest() instead.",
            FutureWarning,
        )

        # default behaviour for aggregations; for the reference see
        # `_op_via_apply` func in pandas==2.0.2
        if axis is None or axis is lib.no_default:
            axis = self._axis

        new_groupby_kwargs = self._kwargs.copy()
        new_groupby_kwargs["as_index"] = True
        work_object = type(self)(
            df=self._df,
            by=self._by,
            axis=self._axis,
            idx_name=self._idx_name,
            drop=self._drop,
            backend_pinned=self._backend_pinned,
            **new_groupby_kwargs,
        )
        return work_object._wrap_aggregation(
            type(self._query_compiler).groupby_fillna,
            agg_kwargs=dict(
                value=value,
                method=method,
                axis=axis,
                inplace=inplace,
                limit=limit,
                downcast=downcast,
            ),
            numeric_only=False,
        )

    def count(self):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_count,
            numeric_only=False,
        )

    def pipe(self, func, *args, **kwargs):
        return com.pipe(self, func, *args, **kwargs)

    def cumcount(self, ascending=True):
        result = self._wrap_aggregation(
            type(self._query_compiler).groupby_cumcount,
            numeric_only=False,
            agg_kwargs=dict(ascending=ascending),
        )
        if not isinstance(result, Series):
            # The result should always be a Series with name None and type int64
            result = result.squeeze(axis=1)
        return result

    def tail(self, n=5):
        # groupby().head()/.tail() ignore as_index, so override it to True
        work_object = self._override(as_index=True)
        return work_object._check_index(
            work_object._wrap_aggregation(
                type(work_object._query_compiler).groupby_tail,
                agg_kwargs=dict(n=n),
                numeric_only=False,
            )
        )

    # expanding and rolling are unique cases and need to likely be handled
    # separately. They do not appear to be commonly used.
    def expanding(self, *args, **kwargs):
        return self._default_to_pandas(lambda df: df.expanding(*args, **kwargs))

    def rolling(self, *args, **kwargs):
        return RollingGroupby(self, *args, **kwargs)

    def hist(
        self,
        column=None,
        by=None,
        grid=True,
        xlabelsize=None,
        xrot=None,
        ylabelsize=None,
        yrot=None,
        ax=None,
        sharex=False,
        sharey=False,
        figsize=None,
        layout=None,
        bins=10,
        backend=None,
        legend=False,
        **kwargs,
    ):
        return self._default_to_pandas(
            lambda df: df.hist(
                column=column,
                by=by,
                grid=grid,
                xlabelsize=xlabelsize,
                xrot=xrot,
                ylabelsize=ylabelsize,
                yrot=yrot,
                ax=ax,
                sharex=sharex,
                sharey=sharey,
                figsize=figsize,
                layout=layout,
                bins=bins,
                backend=backend,
                legend=legend,
                **kwargs,
            )
        )

    def quantile(self, q=0.5, interpolation="linear", numeric_only=False):
        # TODO: handle list-like cases properly
        if is_list_like(q):
            return self._default_to_pandas(
                lambda df: df.quantile(q=q, interpolation=interpolation)
            )

        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_quantile,
                numeric_only=numeric_only,
                agg_kwargs=dict(q=q, interpolation=interpolation),
            )
        )

    def diff(self, periods=1, axis=lib.no_default):
        from .dataframe import DataFrame

        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "diff")
        else:
            axis = 0

        # Should check for API level errors
        # Attempting to match pandas error behavior here
        if not isinstance(periods, int):
            raise TypeError(f"periods must be an int. got {type(periods)} instead")

        if isinstance(self._df, Series):
            if not is_numeric_dtype(self._df.dtypes):
                raise TypeError(
                    f"unsupported operand type for -: got {self._df.dtypes}"
                )
        elif isinstance(self._df, DataFrame) and axis == 0:
            for col, dtype in self._df.dtypes.items():
                # can't calculate diff on non-numeric columns, so check for non-numeric
                # columns that are not included in the `by`
                if not (
                    is_numeric_dtype(dtype) or is_datetime64_any_dtype(dtype)
                ) and not (
                    isinstance(self._by, BaseQueryCompiler) and col in self._by.columns
                ):
                    raise TypeError(f"unsupported operand type for -: got {dtype}")

        return self._wrap_aggregation(
            type(self._query_compiler).groupby_diff,
            agg_kwargs=dict(
                periods=periods,
                axis=axis,
            ),
        )

    def take(self, indices, axis=lib.no_default, **kwargs):
        return self._default_to_pandas(lambda df: df.take(indices, axis=axis, **kwargs))

    @property
    def _index(self):
        """
        Get index value.

        Returns
        -------
        pandas.Index
            Index value.
        """
        return self._query_compiler.index

    @property
    def _sort(self):
        """
        Get sort parameter value.

        Returns
        -------
        bool
            Value of sort parameter used to create DataFrameGroupBy object.
        """
        return self._kwargs.get("sort")

    @property
    def _as_index(self):
        """
        Get as_index parameter value.

        Returns
        -------
        bool
            Value of as_index parameter used to create DataFrameGroupBy object.
        """
        return self._kwargs.get("as_index")

    @property
    def _iter(self):
        """
        Construct a tuple of (group_id, DataFrame) tuples to allow iteration over groups.

        Returns
        -------
        generator
            Generator expression of GroupBy object broken down into tuples for iteration.
        """
        from .dataframe import DataFrame

        indices = self.indices
        group_ids = indices.keys()
        if self._axis == 0:
            return (
                (
                    (k,) if self._return_tuple_when_iterating else k,
                    DataFrame(
                        query_compiler=self._query_compiler.getitem_row_array(
                            indices[k]
                        )
                    ),
                )
                for k in (sorted(group_ids) if self._sort else group_ids)
            )
        else:
            return (
                (
                    (k,) if self._return_tuple_when_iterating else k,
                    DataFrame(
                        query_compiler=self._query_compiler.getitem_column_array(
                            indices[k], numeric=True
                        )
                    ),
                )
                for k in (sorted(group_ids) if self._sort else group_ids)
            )

    def _compute_index_grouped(self, numerical=False):
        """
        Construct an index of group IDs.

        Parameters
        ----------
        numerical : bool, default: False
            Whether a group indices should be positional (True) or label-based (False).

        Returns
        -------
        dict
            A dict of {group name -> group indices} values.

        See Also
        --------
        pandas.core.groupby.GroupBy.groups
        """
        # We end up using pure pandas to compute group indices, so raising a warning
        ErrorMessage.default_to_pandas("Group indices computation")

        # Splitting level-by and column-by since we serialize them in a different ways
        by = None
        level = []
        if self._level is not None:
            level = self._level
            if not isinstance(level, list):
                level = [level]
        elif isinstance(self._by, list):
            by = []
            for o in self._by:
                if hashable(o) and o in self._query_compiler.get_index_names(
                    self._axis
                ):
                    level.append(o)
                else:
                    by.append(o)
        else:
            by = self._by

        is_multi_by = self._is_multi_by or (by is not None and len(level) > 0)
        # `dropna` param is the only one that matters for the group indices result
        dropna = self._kwargs.get("dropna", True)

        if isinstance(self._by, BaseQueryCompiler) and is_multi_by:
            by = list(self._by.columns)

        if is_multi_by:
            # Because we are doing a collect (to_pandas) here and then groupby, we
            # end up using pandas implementation. Add the warning so the user is
            # aware.
            ErrorMessage.catch_bugs_and_request_email(self._axis == 1)
            if isinstance(by, list) and all(
                is_label(self._df, o, self._axis) for o in by
            ):
                pandas_df = self._df._query_compiler.getitem_column_array(
                    by
                ).to_pandas()
            else:
                by = try_cast_to_pandas(by, squeeze=True)
                pandas_df = self._df._to_pandas()
            by = wrap_into_list(by, level)
            groupby_obj = pandas_df.groupby(by=by, dropna=dropna)
            return groupby_obj.indices if numerical else groupby_obj.groups
        else:
            if isinstance(self._by, type(self._query_compiler)):
                by = self._by.to_pandas().squeeze().values
            elif self._by is None:
                index = self._query_compiler.get_axis(self._axis)
                levels_to_drop = [
                    i
                    for i, name in enumerate(index.names)
                    if name not in level and i not in level
                ]
                by = index.droplevel(levels_to_drop)
                if isinstance(by, pandas.MultiIndex):
                    by = by.reorder_levels(level)
            else:
                by = self._by
            axis_labels = self._query_compiler.get_axis(self._axis)
            if numerical:
                # Since we want positional indices of the groups, we want to group
                # on a `RangeIndex`, not on the actual index labels
                axis_labels = pandas.RangeIndex(len(axis_labels))
            # `pandas.Index.groupby` doesn't take any parameters except `by`.
            # Have to convert an Index to a Series to be able to process `dropna=False`:
            if dropna:
                return axis_labels.groupby(by)
            else:
                groupby_obj = axis_labels.to_series().groupby(by, dropna=dropna)
                return groupby_obj.indices if numerical else groupby_obj.groups

    def _wrap_aggregation(
        self,
        qc_method,
        numeric_only=False,
        agg_args=None,
        agg_kwargs=None,
        **kwargs,
    ):
        """
        Perform common metadata transformations and apply groupby functions.

        Parameters
        ----------
        qc_method : callable
            The query compiler method to call.
        numeric_only : {None, True, False}, default: None
            Specifies whether to aggregate non numeric columns:
                - True: include only numeric columns (including categories that holds a numeric dtype)
                - False: include all columns
                - None: infer the parameter, ``False`` if there are no numeric types in the frame,
                  ``True`` otherwise.
        agg_args : list-like, optional
            Positional arguments to pass to the aggregation function.
        agg_kwargs : dict-like, optional
            Keyword arguments to pass to the aggregation function.
        **kwargs : dict
            Keyword arguments to pass to the specified query compiler's method.

        Returns
        -------
        DataFrame or Series
            Returns the same type as `self._df`.
        """
        agg_args = tuple() if agg_args is None else agg_args
        agg_kwargs = dict() if agg_kwargs is None else agg_kwargs

        if numeric_only and self.ndim == 2:
            by_cols = self._internal_by
            mask_cols = [
                col
                for col, dtype in self._query_compiler.dtypes.items()
                if (is_numeric_dtype(dtype) or col in by_cols)
            ]
            groupby_qc = self._query_compiler.getitem_column_array(mask_cols)
        else:
            groupby_qc = self._query_compiler

        return type(self._df)(
            query_compiler=qc_method(
                groupby_qc,
                by=self._by,
                axis=self._axis,
                groupby_kwargs=self._kwargs,
                agg_args=agg_args,
                agg_kwargs=agg_kwargs,
                drop=self._drop,
                **kwargs,
            )
        )

    def _check_index(self, result):
        """
        Check the result of groupby aggregation on the need of resetting index.

        Parameters
        ----------
        result : DataFrame
            Group by aggregation result.

        Returns
        -------
        DataFrame
        """
        if self._by is None and not self._as_index:
            # This is a workaround to align behavior with pandas. In this case pandas
            # resets index, but Modin doesn't do that. More details are in https://github.com/modin-project/modin/issues/3716.
            result.reset_index(drop=True, inplace=True)

        return result

    def _default_to_pandas(self, f, *args, **kwargs):
        """
        Execute function `f` in default-to-pandas way.

        Parameters
        ----------
        f : callable or str
            The function to apply to each group.
        *args : list
            Extra positional arguments to pass to `f`.
        **kwargs : dict
            Extra keyword arguments to pass to `f`.

        Returns
        -------
        modin.pandas.DataFrame
            A new Modin DataFrame with the result of the pandas function.
        """
        if (
            isinstance(self._by, type(self._query_compiler))
            and len(self._by.columns) == 1
        ):
            by = self._by.columns[0] if self._drop else self._by.to_pandas().squeeze()
        # converting QC 'by' to a list of column labels only if this 'by' comes from the self (if drop is True)
        elif self._drop and isinstance(self._by, type(self._query_compiler)):
            by = list(self._by.columns)
        else:
            by = self._by

        by = try_cast_to_pandas(by, squeeze=True)
        # Since 'by' may be a 2D query compiler holding columns to group by,
        # to_pandas will also produce a pandas DataFrame containing them.
        # So splitting 2D 'by' into a list of 1D Series using 'GroupBy.validate_by':
        by = GroupBy.validate_by(by)

        def groupby_on_multiple_columns(df, *args, **kwargs):
            groupby_obj = df.groupby(by=by, axis=self._axis, **self._kwargs)

            if callable(f):
                return f(groupby_obj, *args, **kwargs)
            else:
                ErrorMessage.catch_bugs_and_request_email(
                    failure_condition=not isinstance(f, str)
                )
                attribute = getattr(groupby_obj, f)
                if callable(attribute):
                    return attribute(*args, **kwargs)
                return attribute

        return self._df._default_to_pandas(groupby_on_multiple_columns, *args, **kwargs)


@_inherit_docstrings(pandas.core.groupby.SeriesGroupBy)
class SeriesGroupBy(DataFrameGroupBy):  # noqa: GL08
    _pandas_class = pandas.core.groupby.SeriesGroupBy
    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)

    @disable_logging
    def __getattribute__(self, item: str) -> Any:
        """
        Get an attribute of the object.

        Python calls this method for every attribute access. We override it to
        get extension attributes.

        Parameters
        ----------
        item : str
            Attribute name.

        Returns
        -------
        Any
            The value of the attribute.
        """
        if item not in GROUPBY_EXTENSION_NO_LOOKUP:
            extensions_result = self._getattribute__from_extension_impl(
                item, __class__._extensions
            )
            if extensions_result is not sentinel:
                return extensions_result

        return super().__getattribute__(item)

    @_inherit_docstrings(QueryCompilerCaster._getattr__from_extension_impl)
    def __getattr__(self, key: str) -> Any:
        return self._getattr__from_extension_impl(
            key=key,
            default_behavior_attributes=GROUPBY_EXTENSION_NO_LOOKUP,
            extensions=__class__._extensions,
        )

    @disable_logging
    def __setattr__(self, key: str, value: Any) -> None:
        """
        Set an attribute of the object.

        We override this method to support settable extension attributes.

        Parameters
        ----------
        key : str
            Attribute name.
        value : Any
            Value to set the attribute to.

        Returns
        -------
        None
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(key, __class__._extensions)
        if extension is not sentinel and hasattr(extension, "__set__"):
            return extension.__set__(self, value)
        return super().__setattr__(key, value)

    @disable_logging
    def __delattr__(self, name: str) -> None:
        """
        Delete an attribute of the object.

        We override this method to support deletable extension attributes.

        Parameters
        ----------
        name : str
            Attribute name.

        Returns
        -------
        None
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(name, __class__._extensions)
        if extension is not sentinel and hasattr(extension, "__delete__"):
            return extension.__delete__(self)
        return super().__delattr__(name)

    @property
    def ndim(self):
        """
        Return 1.

        Returns
        -------
        int
            Returns 1.

        Notes
        -----
        Deprecated and removed in pandas and will be likely removed in Modin.
        """
        return 1  # ndim is always 1 for Series

    @property
    def _iter(self):
        """
        Construct a tuple of (group_id, Series) tuples to allow iteration over groups.

        Returns
        -------
        generator
            Generator expression of GroupBy object broken down into tuples for iteration.
        """
        indices = self.indices
        group_ids = indices.keys()
        if self._axis == 0:
            return (
                (
                    k,
                    Series(
                        query_compiler=self._query_compiler.getitem_row_array(
                            indices[k]
                        )
                    ),
                )
                for k in (sorted(group_ids) if self._sort else group_ids)
            )
        else:
            return (
                (
                    k,
                    Series(
                        query_compiler=self._query_compiler.getitem_column_array(
                            indices[k], numeric=True
                        )
                    ),
                )
                for k in (sorted(group_ids) if self._sort else group_ids)
            )

    def _try_get_str_func(self, fn):
        """
        Try to convert a groupby aggregation function to a string or list of such.

        Parameters
        ----------
        fn : callable, str, or Iterable

        Returns
        -------
        str, list
            If `fn` is a callable, return its name, otherwise return `fn` itself.
            If `fn` is a string, return it. If `fn` is an Iterable, return a list
            of _try_get_str_func applied to each element of `fn`.
        """
        if not isinstance(fn, str) and isinstance(fn, Iterable):
            return [self._try_get_str_func(f) for f in fn]
        return fn.__name__ if callable(fn) else fn

    def value_counts(
        self,
        normalize: bool = False,
        sort: bool = True,
        ascending: bool = False,
        bins=None,
        dropna: bool = True,
    ):  # noqa: GL08
        return self._default_to_pandas(
            lambda ser: ser.value_counts(
                normalize=normalize,
                sort=sort,
                ascending=ascending,
                bins=bins,
                dropna=dropna,
            )
        )

    def corr(self, other, method="pearson", min_periods=None):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_corr,
            agg_kwargs=dict(other=other, method=method, min_periods=min_periods),
        )

    def cov(self, other, min_periods=None, ddof=1):
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_cov,
            agg_kwargs=dict(other=other, min_periods=min_periods, ddof=ddof),
        )

    def describe(self, percentiles=None, include=None, exclude=None):
        return self._default_to_pandas(
            lambda df: df.describe(
                percentiles=percentiles, include=include, exclude=exclude
            )
        )

    def apply(self, func, *args, **kwargs):
        return super().apply(func, *args, **kwargs)

    def idxmax(self, axis=lib.no_default, skipna=True):
        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "idxmax")
        else:
            axis = 0
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_idxmax,
            agg_kwargs=dict(axis=axis, skipna=skipna),
        )

    def idxmin(self, axis=lib.no_default, skipna=True):
        if axis is not lib.no_default:
            axis = self._df._get_axis_number(axis)
            self._deprecate_axis(axis, "idxmin")
        else:
            axis = 0
        return self._wrap_aggregation(
            type(self._query_compiler).groupby_idxmin,
            agg_kwargs=dict(axis=axis, skipna=skipna),
        )

    def hist(
        self,
        by=None,
        ax=None,
        grid=True,
        xlabelsize=None,
        xrot=None,
        ylabelsize=None,
        yrot=None,
        figsize=None,
        bins=10,
        backend=None,
        legend=False,
        **kwargs,
    ):
        return self._default_to_pandas(
            lambda df: df.hist(
                by=by,
                ax=ax,
                grid=grid,
                xlabelsize=xlabelsize,
                xrot=xrot,
                ylabelsize=ylabelsize,
                yrot=yrot,
                figsize=figsize,
                bins=bins,
                backend=backend,
                legend=legend,
                **kwargs,
            )
        )

    @property
    def is_monotonic_decreasing(self):
        return self._default_to_pandas(lambda ser: ser.is_monotonic_decreasing)

    @property
    def is_monotonic_increasing(self):
        return self._default_to_pandas(lambda ser: ser.is_monotonic_increasing)

    @property
    def dtype(self):
        return self._default_to_pandas(lambda ser: ser.dtype)

    def unique(self):
        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_unique,
                numeric_only=False,
            )
        )

    def nlargest(self, n=5, keep="first"):
        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_nlargest,
                agg_kwargs=dict(n=n, keep=keep),
                numeric_only=True,
            )
        )

    def nsmallest(self, n=5, keep="first"):
        return self._check_index(
            self._wrap_aggregation(
                type(self._query_compiler).groupby_nsmallest,
                agg_kwargs=dict(n=n, keep=keep),
                numeric_only=True,
            )
        )

    def _validate_func_kwargs(self, kwargs: dict):
        """
        Validate types of user-provided "named aggregation" kwargs.

        Parameters
        ----------
        kwargs : dict

        Returns
        -------
        columns : List[str]
            List of user-provided keys.
        funcs : List[Union[str, callable[...,Any]]]
            List of user-provided aggfuncs.

        Raises
        ------
        `TypeError` is raised if aggfunc is not `str` or callable.

        Notes
        -----
        Copied from pandas.
        """
        columns = list(kwargs)
        funcs = []
        for col_func in kwargs.values():
            if not (isinstance(col_func, str) or callable(col_func)):
                raise TypeError(
                    f"func is expected but received {type(col_func).__name__} in **kwargs."
                )
            funcs.append(col_func)
        if not columns:
            raise TypeError("Must provide 'func' or named aggregation **kwargs.")
        return columns, funcs

    def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
        engine_default = engine is None and engine_kwargs is None
        # if func is None, will switch to user-provided "named aggregation" kwargs
        if func_is_none := func is None:
            columns, func = self._validate_func_kwargs(kwargs)
            kwargs = {}
        if isinstance(func, dict) and engine_default:
            raise SpecificationError("nested renamer is not supported")
        elif is_list_like(func) and engine_default:
            from .dataframe import DataFrame

            result = DataFrame(
                query_compiler=self._query_compiler.groupby_agg(
                    by=self._by,
                    agg_func=func,
                    axis=self._axis,
                    groupby_kwargs=self._kwargs,
                    agg_args=args,
                    agg_kwargs=kwargs,
                )
            )
            # query compiler always gives result a multiindex on the axis with the
            # function names, but series always gets a regular index on the columns
            # because there is no need to identify which original column's aggregation
            # the new column represents. alternatively we could give the query compiler
            # a hint that it's for a series, not a dataframe.
            if func_is_none:
                return result.set_axis(labels=columns, axis=1, copy=False)
            return result.set_axis(
                labels=self._try_get_str_func(func), axis=1, copy=False
            )
        else:
            return super().aggregate(
                func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
            )

    agg = aggregate


================================================
FILE: modin/pandas/indexing.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# noqa: MD02
"""
Details about how Indexing Helper Class works.

_LocationIndexerBase provide methods framework for __getitem__
  and __setitem__ that work with Modin DataFrame's internal index. Base
  class's __{get,set}item__ takes in partitions & idx_in_partition data
  and perform lookup/item write.

_LocIndexer and _iLocIndexer is responsible for indexer specific logic and
  lookup computation. Loc will take care of enlarge DataFrame. Both indexer
  will take care of translating pandas's lookup to Modin DataFrame's internal
  lookup.

An illustration is available at
https://github.com/ray-project/ray/pull/1955#issuecomment-386781826
"""

from __future__ import annotations

import itertools
from typing import TYPE_CHECKING, Optional, Union

import numpy as np
import pandas
from pandas.api.types import is_bool, is_list_like
from pandas.core.dtypes.common import is_bool_dtype, is_integer, is_integer_dtype
from pandas.core.indexing import IndexingError

from modin.core.storage_formats.pandas.query_compiler_caster import (
    EXTENSION_DICT_TYPE,
    QueryCompilerCaster,
)
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger, disable_logging
from modin.utils import _inherit_docstrings

from .dataframe import DataFrame
from .series import Series
from .utils import is_scalar

if TYPE_CHECKING:
    from typing_extensions import Self

    from modin.core.storage_formats import BaseQueryCompiler


def is_slice(x):
    """
    Check that argument is an instance of slice.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        True if argument is a slice, False otherwise.
    """
    return isinstance(x, slice)


def compute_sliced_len(slc, sequence_len):
    """
    Compute length of sliced object.

    Parameters
    ----------
    slc : slice
        Slice object.
    sequence_len : int
        Length of sequence, to which slice will be applied.

    Returns
    -------
    int
        Length of object after applying slice object on it.
    """
    # This will translate slice to a range, from which we can retrieve length
    return len(range(*slc.indices(sequence_len)))


def is_2d(x):
    """
    Check that argument is a list or a slice.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        `True` if argument is a list or slice, `False` otherwise.
    """
    return is_list_like(x) or is_slice(x)


def is_tuple(x):
    """
    Check that argument is a tuple.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        True if argument is a tuple, False otherwise.
    """
    return isinstance(x, tuple)


def is_boolean_array(x):
    """
    Check that argument is an array of bool.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        True if argument is an array of bool, False otherwise.
    """
    if isinstance(x, (np.ndarray, Series, pandas.Series, pandas.Index)):
        return is_bool_dtype(x.dtype)
    elif isinstance(x, (DataFrame, pandas.DataFrame)):
        return all(map(is_bool_dtype, x.dtypes))
    return is_list_like(x) and all(map(is_bool, x))


def is_integer_array(x):
    """
    Check that argument is an array of integers.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        True if argument is an array of integers, False otherwise.
    """
    if isinstance(x, (np.ndarray, Series, pandas.Series, pandas.Index)):
        return is_integer_dtype(x.dtype)
    elif isinstance(x, (DataFrame, pandas.DataFrame)):
        return all(map(is_integer_dtype, x.dtypes))
    return is_list_like(x) and all(map(is_integer, x))


def is_integer_slice(x):
    """
    Check that argument is an array of int.

    Parameters
    ----------
    x : object
        Object to check.

    Returns
    -------
    bool
        True if argument is an array of int, False otherwise.
    """
    if not is_slice(x):
        return False
    for pos in [x.start, x.stop, x.step]:
        if not ((pos is None) or is_integer(pos)):
            return False  # one position is neither None nor int
    return True


def is_range_like(obj):
    """
    Check if the object is range-like.

    Objects that are considered range-like have information about the range (start and
    stop positions, and step) and also have to be iterable. Examples of range-like
    objects are: Python range, pandas.RangeIndex.

    Parameters
    ----------
    obj : object

    Returns
    -------
    bool
    """
    return (
        hasattr(obj, "__iter__")
        and hasattr(obj, "start")
        and hasattr(obj, "stop")
        and hasattr(obj, "step")
    )


def boolean_mask_to_numeric(indexer):
    """
    Convert boolean mask to numeric indices.

    Parameters
    ----------
    indexer : list-like of booleans

    Returns
    -------
    np.ndarray of ints
        Numerical positions of ``True`` elements in the passed `indexer`.
    """
    if isinstance(indexer, (np.ndarray, Series, pandas.Series)):
        return np.where(indexer)[0]
    else:
        # It's faster to build the resulting numpy array from the reduced amount of data via
        # `compress` iterator than convert non-numpy-like `indexer` to numpy and apply `np.where`.
        return np.fromiter(
            # `itertools.compress` masks `data` with the `selectors` mask,
            # works about ~10% faster than a pure list comprehension
            itertools.compress(data=range(len(indexer)), selectors=indexer),
            dtype=np.int64,
        )


_ILOC_INT_ONLY_ERROR = """
Location based indexing can only have [integer, integer slice (START point is
INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types.
"""

_one_ellipsis_message = "indexer may only contain one '...' entry"


def _compute_ndim(row_loc, col_loc):
    """
    Compute the number of dimensions of result from locators.

    Parameters
    ----------
    row_loc : list or scalar
        Row locator.
    col_loc : list or scalar
        Column locator.

    Returns
    -------
    {0, 1, 2}
        Number of dimensions in located dataset.
    """
    row_scalar = is_scalar(row_loc) or is_tuple(row_loc)
    col_scalar = is_scalar(col_loc) or is_tuple(col_loc)

    if row_scalar and col_scalar:
        ndim = 0
    elif row_scalar ^ col_scalar:
        ndim = 1
    else:
        ndim = 2

    return ndim


class _LocationIndexerBase(QueryCompilerCaster, ClassLogger):
    """
    Base class for location indexer like loc and iloc.

    Parameters
    ----------
    modin_df : Union[DataFrame, Series]
        DataFrame to operate on.
    """

    df: Union[DataFrame, Series]
    qc: BaseQueryCompiler
    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)

    def is_backend_pinned(self) -> bool:
        """
        Get whether this object's data is pinned to a particular backend.

        Returns
        -------
        bool
            True if the data is pinned.
        """
        return self.df.is_backend_pinned()

    def _set_backend_pinned(self, pinned: bool, inplace: bool = False):
        """
        Update whether this object's data is pinned to a particular backend.

        Parameters
        ----------
        pinned : bool
            Whether the data is pinned.

        inplace : bool, default: False
            Whether to update the object in place.

        Returns
        -------
        Optional[Self]
            The object with the new pin state, if `inplace` is False. Otherwise, None.
        """
        change = (self.is_backend_pinned() and not pinned) or (
            not self.is_backend_pinned() and pinned
        )
        if not change:
            return None if inplace else self
        result = type(self)(self.df._set_backend_pinned(pinned))
        if inplace:
            result._copy_into(self)
            return None
        return result

    @disable_logging
    @_inherit_docstrings(QueryCompilerCaster.set_backend)
    def set_backend(
        self, backend, inplace: bool = False, *, switch_operation: Optional[str] = None
    ) -> Optional[Self]:
        result = type(self)(
            self.df.set_backend(backend, switch_operation=switch_operation)
        )
        if inplace:
            result._copy_into(self)
            return None
        return result

    @disable_logging
    @_inherit_docstrings(QueryCompilerCaster._get_query_compiler)
    def _get_query_compiler(self):
        return getattr(self, "qc", None)

    @disable_logging
    @_inherit_docstrings(QueryCompilerCaster.get_backend)
    def get_backend(self):
        return self.qc.get_backend()

    @disable_logging
    @_inherit_docstrings(QueryCompilerCaster._copy_into)
    def _copy_into(self, other: Series):
        other.qc = self.df._query_compiler
        other.df._update_inplace(new_query_compiler=self.df._query_compiler)
        other.df._set_backend_pinned(self.is_backend_pinned())
        return None

    def __init__(self, modin_df: Union[DataFrame, Series]):
        # TODO(https://github.com/modin-project/modin/issues/7513): Do not keep
        # both `df` and `qc`.
        self.df = modin_df
        self.qc = modin_df._query_compiler

    def _validate_key_length(self, key: tuple) -> tuple:  # noqa: GL08
        # Implementation copied from pandas.
        if len(key) > self.df.ndim:
            if key[0] is Ellipsis:
                # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
                key = key[1:]
                if Ellipsis in key:
                    raise IndexingError(_one_ellipsis_message)
                return self._validate_key_length(key)
            raise IndexingError("Too many indexers")
        return key

    def __getitem__(self, key):  # pragma: no cover
        """
        Retrieve dataset according to `key`.

        Parameters
        ----------
        key : callable, scalar, or tuple
            The global row index to retrieve data from.

        Returns
        -------
        modin.pandas.DataFrame or modin.pandas.Series
            Located dataset.

        See Also
        --------
        pandas.DataFrame.loc
        """
        raise NotImplementedError("Implemented by subclasses")

    def __setitem__(self, key, item):  # pragma: no cover
        """
        Assign `item` value to dataset located by `key`.

        Parameters
        ----------
        key : callable or tuple
            The global row numbers to assign data to.
        item : modin.pandas.DataFrame, modin.pandas.Series or scalar
            Value that should be assigned to located dataset.

        See Also
        --------
        pandas.DataFrame.iloc
        """
        raise NotImplementedError("Implemented by subclasses")

    def _get_pandas_object_from_qc_view(
        self,
        qc_view,
        row_multiindex_full_lookup: bool,
        col_multiindex_full_lookup: bool,
        row_scalar: bool,
        col_scalar: bool,
        ndim: int,
    ):
        """
        Convert the query compiler view to the appropriate pandas object.

        Parameters
        ----------
        qc_view : BaseQueryCompiler
            Query compiler to convert.
        row_multiindex_full_lookup : bool
            See _multiindex_possibly_contains_key.__doc__.
        col_multiindex_full_lookup : bool
            See _multiindex_possibly_contains_key.__doc__.
        row_scalar : bool
            Whether indexer for rows is scalar.
        col_scalar : bool
            Whether indexer for columns is scalar.
        ndim : {0, 1, 2}
            Number of dimensions in dataset to be retrieved.

        Returns
        -------
        modin.pandas.DataFrame or modin.pandas.Series
            The pandas object with the data from the query compiler view.

        Notes
        -----
        Usage of `slice(None)` as a lookup is a hack to pass information about
        full-axis grab without computing actual indices that triggers lazy computations.
        Ideally, this API should get rid of using slices as indexers and either use a
        common ``Indexer`` object or range and ``np.ndarray`` only.
        """
        if ndim == 2:
            return self.df.__constructor__(query_compiler=qc_view)
        if isinstance(self.df, Series) and not row_scalar:
            return self.df.__constructor__(query_compiler=qc_view)

        if isinstance(self.df, Series):
            axis = 0
        elif ndim == 0:
            axis = None
        else:
            # We are in the case where ndim == 1
            # The axis we squeeze on depends on whether we are looking for an exact
            # value or a subset of rows and columns. Knowing if we have a full MultiIndex
            # lookup or scalar lookup can help us figure out whether we need to squeeze
            # on the row or column index.
            axis = (
                None
                if (col_scalar and row_scalar)
                or (row_multiindex_full_lookup and col_multiindex_full_lookup)
                else 1 if col_scalar or col_multiindex_full_lookup else 0
            )

        res_df = self.df.__constructor__(query_compiler=qc_view)
        return res_df.squeeze(axis=axis)

    def _setitem_positional(self, row_lookup, col_lookup, item, axis=None):
        """
        Assign `item` value to located dataset.

        Parameters
        ----------
        row_lookup : slice or scalar
            The global row index to write item to.
        col_lookup : slice or scalar
            The global col index to write item to.
        item : DataFrame, Series or scalar
            The new item needs to be set. It can be any shape that's
            broadcast-able to the product of the lookup tables.
        axis : {None, 0, 1}, default: None
            If not None, it means that whole axis is used to assign a value.
            0 means assign to whole column, 1 means assign to whole row.
            If None, it means that partial assignment is done on both axes.
        """
        # Convert slices to indices for the purposes of application.
        # TODO (devin-petersohn): Apply to slice without conversion to list
        if isinstance(row_lookup, slice):
            row_lookup = range(len(self.qc.index))[row_lookup]
        if isinstance(col_lookup, slice):
            col_lookup = range(len(self.qc.columns))[col_lookup]
        # This is True when we dealing with assignment of a full column. This case
        # should be handled in a fastpath with `df[col] = item`.
        if axis == 0:
            assert len(col_lookup) == 1
            self.df[self.df.columns[col_lookup][0]] = item
        # This is True when we are assigning to a full row. We want to reuse the setitem
        # mechanism to operate along only one axis for performance reasons.
        elif axis == 1:
            if hasattr(item, "_query_compiler"):
                if isinstance(item, DataFrame):
                    item = item.squeeze(axis=0)
                item = item._query_compiler
            assert len(row_lookup) == 1
            new_qc = self.qc.setitem(1, self.qc.index[row_lookup[0]], item)
            self.df._create_or_update_from_compiler(new_qc, inplace=True)
        # Assignment to both axes.
        else:
            new_qc = self.qc.write_items(row_lookup, col_lookup, item)
            self.df._create_or_update_from_compiler(new_qc, inplace=True)

        self.qc = self.df._query_compiler

    def _determine_setitem_axis(self, row_lookup, col_lookup, row_scalar, col_scalar):
        """
        Determine an axis along which we should do an assignment.

        Parameters
        ----------
        row_lookup : slice or list
            Indexer for rows.
        col_lookup : slice or list
            Indexer for columns.
        row_scalar : bool
            Whether indexer for rows is scalar or not.
        col_scalar : bool
            Whether indexer for columns is scalar or not.

        Returns
        -------
        int or None
            None if this will be a both axis assignment, number of axis to assign in other cases.

        Notes
        -----
        axis = 0: column assignment df[col] = item
        axis = 1: row assignment df.loc[row] = item
        axis = None: assignment along both axes
        """
        if self.df.shape == (1, 1):
            return None if not (row_scalar ^ col_scalar) else 1 if row_scalar else 0

        def get_axis(axis):
            return self.qc.index if axis == 0 else self.qc.columns

        row_lookup_len, col_lookup_len = [
            (
                len(lookup)
                if not isinstance(lookup, slice)
                else compute_sliced_len(lookup, len(get_axis(i)))
            )
            for i, lookup in enumerate([row_lookup, col_lookup])
        ]

        if col_lookup_len == 1 and row_lookup_len == 1:
            axis = None
        elif (
            row_lookup_len == len(self.qc.index)
            and col_lookup_len == 1
            and isinstance(self.df, DataFrame)
        ):
            axis = 0
        elif col_lookup_len == len(self.qc.columns) and row_lookup_len == 1:
            axis = 1
        else:
            axis = None
        return axis

    def _parse_row_and_column_locators(self, tup):
        """
        Unpack the user input for getitem and setitem and compute ndim.

        loc[a] -> ([a], :), 1D
        loc[[a,b]] -> ([a,b], :),
        loc[a,b] -> ([a], [b]), 0D

        Parameters
        ----------
        tup : tuple
            User input to unpack.

        Returns
        -------
        row_loc : scalar or list
            Row locator(s) as a scalar or List.
        col_list : scalar or list
            Column locator(s) as a scalar or List.
        ndim : {0, 1, 2}
            Number of dimensions of located dataset.
        """
        row_loc, col_loc = slice(None), slice(None)

        if is_tuple(tup):
            row_loc = tup[0]
            if len(tup) == 2:
                col_loc = tup[1]
            if len(tup) > 2:
                raise IndexingError("Too many indexers")
        else:
            row_loc = tup

        row_loc = row_loc(self.df) if callable(row_loc) else row_loc
        col_loc = col_loc(self.df) if callable(col_loc) else col_loc
        return row_loc, col_loc, _compute_ndim(row_loc, col_loc)

    # HACK: This method bypasses regular ``loc/iloc.__getitem__`` flow in order to ensure better
    # performance in the case of boolean masking. The only purpose of this method is to compensate
    # for a lack of backend's indexing API, there is no Query Compiler method allowing masking
    # along both axis when any of the indexers is a boolean. That's why rows and columns masking
    # phases are separate in this case.
    # TODO: Remove this method and handle this case naturally via ``loc/iloc.__getitem__`` flow
    # when QC API would support both-axis masking with boolean indexers.
    def _handle_boolean_masking(self, row_loc, col_loc):
        """
        Retrieve dataset according to the boolean mask for rows and an indexer for columns.

        In comparison with the regular ``loc/iloc.__getitem__`` flow this method efficiently
        masks rows with a Modin Series boolean mask without materializing it (if the selected
        execution implements such masking).

        Parameters
        ----------
        row_loc : modin.pandas.Series of bool dtype
            Boolean mask to index rows with.
        col_loc : object
            An indexer along column axis.

        Returns
        -------
        modin.pandas.DataFrame or modin.pandas.Series
            Located dataset.
        """
        ErrorMessage.catch_bugs_and_request_email(
            failure_condition=not isinstance(row_loc, Series),
            extra_log=f"Only ``modin.pandas.Series`` boolean masks are acceptable, got: {type(row_loc)}",
        )
        masked_df = self.df.__constructor__(
            query_compiler=self.qc.getitem_array(row_loc._query_compiler)
        )
        if isinstance(masked_df, Series):
            assert col_loc == slice(None)
            return masked_df
        # Passing `slice(None)` as a row indexer since we've just applied it
        return type(self)(masked_df)[(slice(None), col_loc)]

    def _multiindex_possibly_contains_key(self, axis, key):
        """
        Determine if a MultiIndex row/column possibly contains a key.

        Check to see if the current DataFrame has a MultiIndex row/column and if it does,
        check to see if the key is potentially a full key-lookup such that the number of
        levels match up with the length of the tuple key.

        Parameters
        ----------
        axis : {0, 1}
            0 for row, 1 for column.
        key : Any
            Lookup key for MultiIndex row/column.

        Returns
        -------
        bool
            If the MultiIndex possibly contains the given key.

        Notes
        -----
        This function only returns False if we have a partial key lookup. It's
        possible that this function returns True for a key that does NOT exist
        since we only check the length of the `key` tuple to match the number
        of levels in the MultiIndex row/colunmn.
        """
        if not self.qc.has_multiindex(axis=axis):
            return False

        multiindex = self.df.index if axis == 0 else self.df.columns
        return isinstance(key, tuple) and len(key) == len(multiindex.levels)


class _LocIndexer(_LocationIndexerBase):
    """
    An indexer for modin_df.loc[] functionality.

    Parameters
    ----------
    modin_df : Union[DataFrame, Series]
        DataFrame to operate on.
    """

    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)

    def __getitem__(self, key):
        """
        Retrieve dataset according to `key`.

        Parameters
        ----------
        key : callable, scalar, or tuple
            The global row index to retrieve data from.

        Returns
        -------
        modin.pandas.DataFrame or modin.pandas.Series
            Located dataset.

        See Also
        --------
        pandas.DataFrame.loc
        """
        if self.df.empty:
            return self.df._default_to_pandas(lambda df: df.loc[key])
        if isinstance(key, tuple):
            key = self._validate_key_length(key)
        if (
            isinstance(key, tuple)
            and len(key) == 2
            and all((is_scalar(k) for k in key))
            and self.qc.has_multiindex(axis=0)
        ):
            # __getitem__ has no way to distinguish between
            # loc[('level_one_key', level_two_key')] and
            # loc['level_one_key', 'column_name']. It's possible for both to be valid
            # when we have a multiindex on axis=0, and it seems pandas uses
            # interpretation 1 if that's possible. Do the same.
            locators = self._parse_row_and_column_locators((key, slice(None)))
            try:
                return self._helper_for__getitem__(key, *locators)
            except KeyError:
                pass
        return self._helper_for__getitem__(
            key, *self._parse_row_and_column_locators(key)
        )

    def _helper_for__getitem__(self, key, row_loc, col_loc, ndim):
        """
        Retrieve dataset according to `key`, row_loc, and col_loc.

        Parameters
        ----------
        key : callable, scalar, or tuple
            The global row index to retrieve data from.
        row_loc : callable, scalar, or slice
            Row locator(s) as a scalar or List.
        col_loc : callable, scalar, or slice
            Row locator(s) as a scalar or List.
        ndim : int
            The number of dimensions of the returned object.

        Returns
        -------
        modin.pandas.DataFrame or modin.pandas.Series
            Located dataset.
        """
        row_scalar = is_scalar(row_loc)
        col_scalar = is_scalar(col_loc)

        # The thought process here is that we should check to see that we have a full key lookup
        # for a MultiIndex DataFrame. If that's the case, then we should not drop any levels
        # since our resulting intermediate dataframe will have dropped these for us already.
        # Thus, we need to make sure we don't try to drop these levels again. The logic here is
        # kind of hacked together. Ideally, we should handle this properly in the lower-level
        # implementations, but this will have to be engineered properly later.
        row_multiindex_full_lookup = self._multiindex_possibly_contains_key(
            axis=0, key=row_loc
        )
        col_multiindex_full_lookup = self._multiindex_possibly_contains_key(
            axis=1, key=col_loc
        )
        levels_already_dropped = (
            row_multiindex_full_lookup or col_multiindex_full_lookup
        )

        if isinstance(row_loc, Series) and is_boolean_array(row_loc):
            return self._handle_boolean_masking(row_loc, col_loc)

        qc_view = self.qc.take_2d_labels(row_loc, col_loc)
        result = self._get_pandas_object_from_qc_view(
            qc_view,
            row_multiindex_full_lookup,
            col_multiindex_full_lookup,
            row_scalar,
            col_scalar,
            ndim,
        )

        if isinstance(result, Series):
            result._parent = self.df
            result._parent_axis = 0

        col_loc_as_list = [col_loc] if col_scalar else col_loc
        row_loc_as_list = [row_loc] if row_scalar else row_loc
        # Pandas drops the levels that are in the `loc`, so we have to as well.
        if (
            isinstance(result, (Series, DataFrame))
            and result._query_compiler.has_multiindex()
            and not levels_already_dropped
        ):
            if (
                isinstance(result, Series)
                and not isinstance(col_loc_as_list, slice)
                and all(
                    col_loc_as_list[i] in result.index.levels[i]
                    for i in range(len(col_loc_as_list))
                )
            ):
                result.index = result.index.droplevel(list(range(len(col_loc_as_list))))
            elif not isinstance(row_loc_as_list, slice) and all(
                not isinstance(row_loc_as_list[i], slice)
                and row_loc_as_list[i] in result.index.levels[i]
                for i in range(len(row_loc_as_list))
            ):
                result.index = result.index.droplevel(list(range(len(row_loc_as_list))))
        if (
            isinstance(result, DataFrame)
            and not isinstance(col_loc_as_list, slice)
            and not levels_already_dropped
            and result._query_compiler.has_multiindex(axis=1)
            and all(
                col_loc_as_list[i] in result.columns.levels[i]
                for i in range(len(col_loc_as_list))
            )
        ):
            result.columns = result.columns.droplevel(list(range(len(col_loc_as_list))))
        # This is done for cases where the index passed in has other state, like a
        # frequency in the case of DateTimeIndex.
        if (
            row_loc is not None
            and isinstance(col_loc, slice)
            and col_loc == slice(None)
            and isinstance(key, pandas.Index)
        ):
            result.index = key
        return result

    def __setitem__(self, key, item):
        """
        Assign `item` value to dataset located by `key`.

        Parameters
        ----------
        key : callable or tuple
            The global row index to assign data to.
        item : modin.pandas.DataFrame, modin.pandas.Series or scalar
            Value that should be assigned to located dataset.

        See Also
        --------
        pandas.DataFrame.loc
        """
        if self.df.empty:

            def _loc(df):
                df.loc[key] = item
                return df

            self.df._update_inplace(
                new_query_compiler=self.df._default_to_pandas(_loc)._query_compiler
            )
            self.qc = self.df._query_compiler
            return
        row_loc, col_loc, ndims = self._parse_row_and_column_locators(key)
        append_axis = self._check_missing_loc(row_loc, col_loc)
        if ndims >= 1 and append_axis is not None:
            # We enter this codepath if we're either appending a row or a column
            if append_axis:
                # Appending at least one new column
                if is_scalar(col_loc):
                    col_loc = [col_loc]
                self._setitem_with_new_columns(row_loc, col_loc, item)
            else:
                # Appending at most one new row
                if is_scalar(row_loc) or len(row_loc) == 1:
                    index = self.qc.index.insert(len(self.qc.index), row_loc)
                    self.qc = self.qc.reindex(labels=index, axis=0, fill_value=0)
                    self.df._update_inplace(new_query_compiler=self.qc)
                self._set_item_existing_loc(row_loc, col_loc, item)
        else:
            self._set_item_existing_loc(row_loc, col_loc, item)
        self.qc = self.df._query_compiler

    def _setitem_with_new_columns(self, row_loc, col_loc, item):
        """
        Assign `item` value to dataset located by `row_loc` and `col_loc` with new columns.

        Parameters
        ----------
        row_loc : scalar, slice, list, array or tuple
            Row locator.
        col_loc : list, array or tuple
            Columns locator.
        item : modin.pandas.DataFrame, modin.pandas.Series or scalar
            Value that should be assigned to located dataset.
        """
        if is_list_like(item) and not isinstance(item, (DataFrame, Series)):
            item = np.array(item)
            if len(item.shape) == 1:
                if len(col_loc) != 1:
                    raise ValueError(
                        "Must have equal len keys and value when setting with an iterable"
                    )
            else:
                if item.shape[-1] != len(col_loc):
                    raise ValueError(
                        "Must have equal len keys and value when setting with an iterable"
                    )
        common_label_loc = np.isin(col_loc, self.qc.columns.values)
        if not all(common_label_loc):
            # In this case we have some new cols and some old ones
            columns = self.qc.columns
            for i in range(len(common_label_loc)):
                if not common_label_loc[i]:
                    columns = columns.insert(len(columns), col_loc[i])
            self.qc = self.qc.reindex(labels=columns, axis=1, fill_value=np.nan)
            self.df._update_inplace(new_query_compiler=self.qc)
        self._set_item_existing_loc(row_loc, np.array(col_loc), item)
        self.qc = self.df._query_compiler

    def _set_item_existing_loc(self, row_loc, col_loc, item):
        """
        Assign `item` value to dataset located by `row_loc` and `col_loc` with existing rows and columns.

        Parameters
        ----------
        row_loc : scalar, slice, list, array or tuple
            Row locator.
        col_loc : scalar, slice, list, array or tuple
            Columns locator.
        item : modin.pandas.DataFrame, modin.pandas.Series or scalar
            Value that should be assigned to located dataset.
        """
        if (
            isinstance(row_loc, Series)
            and is_boolean_array(row_loc)
            and is_scalar(item)
        ):
            new_qc = self.df._query_compiler.setitem_bool(
                row_loc._query_compiler, col_loc, item
            )
            self.df._update_inplace(new_qc)
            self.qc = self.df._query_compiler
            return

        row_lookup, col_lookup = self.qc.get_positions_from_labels(row_loc, col_loc)
        if isinstance(item, np.ndarray) and is_boolean_array(row_loc):
            # fix for 'test_loc_series'; np.log(Series) returns nd.array instead
            # of Series as it was before (`Series.__array_wrap__` is removed)
            # otherwise incompatible shapes are obtained
            item = item.take(row_lookup)
        self._setitem_positional(
            row_lookup,
            col_lookup,
            item,
            axis=self._determine_setitem_axis(
                row_lookup, col_lookup, is_scalar(row_loc), is_scalar(col_loc)
            ),
        )

    def _check_missing_loc(self, row_loc, col_loc):
        """
        Help `__setitem__` compute whether an axis needs appending.

        Parameters
        ----------
        row_loc : scalar, slice, list, array or tuple
            Row locator.
        col_loc : scalar, slice, list, array or tuple
            Columns locator.

        Returns
        -------
        int or None :
            0 if new row, 1 if new column, None if neither.
        """
        if is_scalar(row_loc):
            return 0 if row_loc not in self.qc.index else None
        elif isinstance(row_loc, list):
            missing_labels = self._compute_enlarge_labels(
                pandas.Index(row_loc), self.qc.index
            )
            if len(missing_labels) > 1:
                # We cast to list to copy pandas' error:
                # In pandas, we get: KeyError: [a, b,...] not in index
                # If we don't convert to list we get: KeyError: [a b ...] not in index
                raise KeyError("{} not in index".format(list(missing_labels)))
        if (
            not (is_list_like(row_loc) or isinstance(row_loc, slice))
            and row_loc not in self.qc.index
        ):
            return 0
        if (
            isinstance(col_loc, list)
            and len(pandas.Index(col_loc).difference(self.qc.columns)) >= 1
        ):
            return 1
        if is_scalar(col_loc) and col_loc not in self.qc.columns:
            return 1
        return None

    def _compute_enlarge_labels(self, locator, base_index):
        """
        Help to _enlarge_axis, compute common labels and extra labels.

        Parameters
        ----------
        locator : pandas.Index
            Index from locator.
        base_index : pandas.Index
            Current index.

        Returns
        -------
        nan_labels : pandas.Index
            The labels that need to be added.
        """
        # base_index_type can be pd.Index or pd.DatetimeIndex
        # depending on user input and pandas behavior
        # See issue #2264
        base_as_index = pandas.Index(list(base_index))
        locator_as_index = pandas.Index(list(locator))

        if locator_as_index.inferred_type == "boolean":
            if len(locator_as_index) != len(base_as_index):
                raise ValueError(
                    f"Item wrong length {len(locator_as_index)} instead of {len(base_as_index)}!"
                )
            common_labels = base_as_index[locator_as_index]
            nan_labels = pandas.Index([])
        else:
            common_labels = locator_as_index.intersection(base_as_index)
            nan_labels = locator_as_index.difference(base_as_index)

        if len(common_labels) == 0:
            raise KeyError(
                "None of [{labels}] are in the [{base_index_name}]".format(
                    labels=list(locator_as_index), base_index_name=base_as_index
                )
            )
        return nan_labels


class _iLocIndexer(_LocationIndexerBase):
    """
    An indexer for modin_df.iloc[] functionality.

    Parameters
    ----------
    modin_df : Union[DataFrame, Series]
        DataFrame to operate on.
    """

    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)

    def __getitem__(self, key):
        """
        Retrieve dataset according to `key`.

        Parameters
        ----------
        key : callable or tuple
            The global row numbers to retrieve data from.

        Returns
        -------
        DataFrame or Series
            Located dataset.

        See Also
        --------
        pandas.DataFrame.iloc
        """
        if self.df.empty:
            return self.df._default_to_pandas(lambda df: df.iloc[key])
        if isinstance(key, tuple):
            key = self._validate_key_length(key)
        row_loc, col_loc, ndim = self._parse_row_and_column_locators(key)
        row_scalar = is_scalar(row_loc)
        col_scalar = is_scalar(col_loc)
        self._check_dtypes(row_loc)
        self._check_dtypes(col_loc)

        if isinstance(row_loc, Series) and is_boolean_array(row_loc):
            return self._handle_boolean_masking(row_loc, col_loc)

        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
        if isinstance(row_lookup, slice):
            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=row_lookup != slice(None),
                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {row_lookup}",
            )
            row_lookup = None
        if isinstance(col_lookup, slice):
            ErrorMessage.catch_bugs_and_request_email(
                failure_condition=col_lookup != slice(None),
                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {col_lookup}",
            )
            col_lookup = None
        qc_view = self.qc.take_2d_positional(row_lookup, col_lookup)
        result = self._get_pandas_object_from_qc_view(
            qc_view,
            row_multiindex_full_lookup=False,
            col_multiindex_full_lookup=False,
            row_scalar=row_scalar,
            col_scalar=col_scalar,
            ndim=ndim,
        )

        if isinstance(result, Series):
            result._parent = self.df
            result._parent_axis = 0
        return result

    def __setitem__(self, key, item):
        """
        Assign `item` value to dataset located by `key`.

        Parameters
        ----------
        key : callable or tuple
            The global row numbers to assign data to.
        item : modin.pandas.DataFrame, modin.pandas.Series or scalar
            Value that should be assigned to located dataset.

        See Also
        --------
        pandas.DataFrame.iloc
        """
        if self.df.empty:

            def _iloc(df):
                df.iloc[key] = item
                return df

            self.df._update_inplace(
                new_query_compiler=self.df._default_to_pandas(_iloc)._query_compiler
            )
            self.qc = self.df._query_compiler
            return
        row_loc, col_loc, _ = self._parse_row_and_column_locators(key)
        row_scalar = is_scalar(row_loc)
        col_scalar = is_scalar(col_loc)
        self._check_dtypes(row_loc)
        self._check_dtypes(col_loc)

        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
        self._setitem_positional(
            row_lookup,
            col_lookup,
            item,
            axis=self._determine_setitem_axis(
                row_lookup, col_lookup, row_scalar, col_scalar
            ),
        )

    def _compute_lookup(self, row_loc, col_loc):
        """
        Compute index and column labels from index and column integer locators.

        Parameters
        ----------
        row_loc : slice, list, array or tuple
            Row locator.
        col_loc : slice, list, array or tuple
            Columns locator.

        Returns
        -------
        row_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise
            List of index labels.
        col_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise
            List of columns labels.

        Notes
        -----
        Usage of `slice(None)` as a resulting lookup is a hack to pass information about
        full-axis grab without computing actual indices that triggers lazy computations.
        Ideally, this API should get rid of using slices as indexers and either use a
        common ``Indexer`` object or range and ``np.ndarray`` only.
        """
        lookups = []
        for axis, axis_loc in enumerate((row_loc, col_loc)):
            if is_scalar(axis_loc):
                axis_loc = np.array([axis_loc])
            if isinstance(axis_loc, slice):
                axis_lookup = (
                    axis_loc
                    if axis_loc == slice(None)
                    else pandas.RangeIndex(
                        *axis_loc.indices(len(self.qc.get_axis(axis)))
                    )
                )
            elif is_range_like(axis_loc):
                axis_lookup = pandas.RangeIndex(
                    axis_loc.start, axis_loc.stop, axis_loc.step
                )
            elif is_boolean_array(axis_loc):
                axis_lookup = boolean_mask_to_numeric(axis_loc)
            else:
                if isinstance(axis_loc, pandas.Index):
                    axis_loc = axis_loc.values
                elif is_list_like(axis_loc) and not isinstance(axis_loc, np.ndarray):
                    # `Index.__getitem__` works much faster with numpy arrays than with python lists,
                    # so although we lose some time here on converting to numpy, `Index.__getitem__`
                    # speedup covers the loss that we gain here.
                    axis_loc = np.array(axis_loc, dtype=np.int64)
                # Relatively fast check allows us to not trigger `self.qc.get_axis()` computation
                # if there're no negative indices and so they don't not depend on the axis length.
                if isinstance(axis_loc, np.ndarray) and not (axis_loc < 0).any():
                    axis_lookup = axis_loc
                else:
                    axis_lookup = pandas.RangeIndex(len(self.qc.get_axis(axis)))[
                        axis_loc
                    ]

            if isinstance(axis_lookup, pandas.Index) and not is_range_like(axis_lookup):
                axis_lookup = axis_lookup.values
            lookups.append(axis_lookup)
        return lookups

    def _check_dtypes(self, locator):
        """
        Check that `locator` is an integer scalar, integer slice, integer list or array of booleans.

        Parameters
        ----------
        locator : scalar, list, slice or array
            Object to check.

        Raises
        ------
        ValueError
            If check fails.
        """
        is_int = is_integer(locator)
        is_int_slice = is_integer_slice(locator)
        is_int_arr = is_integer_array(locator)
        is_bool_arr = is_boolean_array(locator)

        if not any([is_int, is_int_slice, is_int_arr, is_bool_arr]):
            raise ValueError(_ILOC_INT_ONLY_ERROR)


================================================
FILE: modin/pandas/io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Implement I/O public API as pandas does.

Almost all docstrings for public and magic methods should be inherited from pandas
for better maintability.
Manually add documentation for methods which are not presented in pandas.
"""

from __future__ import annotations

import csv
import inspect
import pathlib
import pickle
from typing import (
    IO,
    TYPE_CHECKING,
    Any,
    AnyStr,
    Callable,
    Dict,
    Hashable,
    Iterable,
    Iterator,
    List,
    Literal,
    Optional,
    Pattern,
    Sequence,
    Union,
)

import numpy as np
import pandas
from pandas._libs.lib import NoDefault, no_default
from pandas._typing import (
    CompressionOptions,
    ConvertersArg,
    CSVEngine,
    DtypeArg,
    DtypeBackend,
    FilePath,
    IndexLabel,
    IntStrT,
    ParseDatesArg,
    ReadBuffer,
    ReadCsvBuffer,
    StorageOptions,
    XMLParsers,
)
from pandas.io.parsers import TextFileReader
from pandas.io.parsers.readers import _c_parser_defaults

from modin.config import ModinNumpy
from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
    ProtocolDataframe,
)
from modin.core.storage_formats.pandas.query_compiler_caster import (
    wrap_free_function_in_argument_caster,
)
from modin.logging import ClassLogger, enable_logging
from modin.utils import (
    SupportsPrivateToNumPy,
    SupportsPublicToNumPy,
    SupportsPublicToPandas,
    _inherit_docstrings,
    _maybe_warn_on_default,
    classproperty,
    expanduser_path_arg,
)

# below logic is to handle circular imports without errors
if TYPE_CHECKING:
    from .dataframe import DataFrame
    from .series import Series


class ModinObjects:
    """Lazily import Modin classes and provide an access to them."""

    _dataframe = None

    @classproperty
    def DataFrame(cls):
        """Get ``modin.pandas.DataFrame`` class."""
        if cls._dataframe is None:
            from .dataframe import DataFrame

            cls._dataframe = DataFrame
        return cls._dataframe


def _read(**kwargs):
    """
    Read csv file from local disk.

    Parameters
    ----------
    **kwargs : dict
        Keyword arguments in pandas.read_csv.

    Returns
    -------
    modin.pandas.DataFrame
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    squeeze = kwargs.pop("squeeze", False)
    pd_obj = FactoryDispatcher.read_csv(**kwargs)
    # This happens when `read_csv` returns a TextFileReader object for iterating through
    if isinstance(pd_obj, TextFileReader):
        reader = pd_obj.read
        pd_obj.read = lambda *args, **kwargs: ModinObjects.DataFrame(
            query_compiler=reader(*args, **kwargs)
        )
        return pd_obj
    result = ModinObjects.DataFrame(query_compiler=pd_obj)
    if squeeze:
        return result.squeeze(axis=1)
    return result


@_inherit_docstrings(pandas.read_xml, apilink="pandas.read_xml")
@enable_logging
@wrap_free_function_in_argument_caster("read_xml")
@expanduser_path_arg("path_or_buffer")
def read_xml(
    path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
    *,
    xpath: str = "./*",
    namespaces: dict[str, str] | None = None,
    elems_only: bool = False,
    attrs_only: bool = False,
    names: Sequence[str] | None = None,
    dtype: DtypeArg | None = None,
    converters: ConvertersArg | None = None,
    parse_dates: ParseDatesArg | None = None,
    encoding: str | None = "utf-8",
    parser: XMLParsers = "lxml",
    stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = None,
    iterparse: dict[str, list[str]] | None = None,
    compression: CompressionOptions = "infer",
    storage_options: StorageOptions = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> DataFrame:
    _maybe_warn_on_default("read_xml")
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
    return ModinObjects.DataFrame(pandas.read_xml(**kwargs))


@_inherit_docstrings(pandas.read_csv, apilink="pandas.read_csv")
@enable_logging
@wrap_free_function_in_argument_caster("read_csv")
@expanduser_path_arg("filepath_or_buffer")
def read_csv(
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
    *,
    sep: str | None | NoDefault = no_default,
    delimiter: str | None | NoDefault = None,
    # Column and Index Locations and Names
    header: int | Sequence[int] | None | Literal["infer"] = "infer",
    names: Sequence[Hashable] | None | NoDefault = no_default,
    index_col: IndexLabel | Literal[False] | None = None,
    usecols=None,
    # General Parsing Configuration
    dtype: DtypeArg | None = None,
    engine: CSVEngine | None = None,
    converters=None,
    true_values=None,
    false_values=None,
    skipinitialspace: bool = False,
    skiprows=None,
    skipfooter: int = 0,
    nrows: int | None = None,
    # NA and Missing Data Handling
    na_values=None,
    keep_default_na: bool = True,
    na_filter: bool = True,
    verbose: bool = no_default,
    skip_blank_lines: bool = True,
    # Datetime Handling
    parse_dates=None,
    infer_datetime_format: bool = no_default,
    keep_date_col: bool = no_default,
    date_parser=no_default,
    date_format=None,
    dayfirst: bool = False,
    cache_dates: bool = True,
    # Iteration
    iterator: bool = False,
    chunksize: int | None = None,
    # Quoting, Compression, and File Format
    compression: CompressionOptions = "infer",
    thousands: str | None = None,
    decimal: str = ".",
    lineterminator: str | None = None,
    quotechar: str = '"',
    quoting: int = csv.QUOTE_MINIMAL,
    doublequote: bool = True,
    escapechar: str | None = None,
    comment: str | None = None,
    encoding: str | None = None,
    encoding_errors: str | None = "strict",
    dialect: str | csv.Dialect | None = None,
    # Error Handling
    on_bad_lines="error",
    # Internal
    delim_whitespace: bool = no_default,
    low_memory=_c_parser_defaults["low_memory"],
    memory_map: bool = False,
    float_precision: Literal["high", "legacy"] | None = None,
    storage_options: StorageOptions = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> DataFrame | TextFileReader:
    # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args
    _pd_read_csv_signature = {
        val.name for val in inspect.signature(pandas.read_csv).parameters.values()
    }
    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
    return _read(**kwargs)


@_inherit_docstrings(pandas.read_table, apilink="pandas.read_table")
@enable_logging
@wrap_free_function_in_argument_caster("read_table")
@expanduser_path_arg("filepath_or_buffer")
def read_table(
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
    *,
    sep: str | None | NoDefault = no_default,
    delimiter: str | None | NoDefault = None,
    # Column and Index Locations and Names
    header: int | Sequence[int] | None | Literal["infer"] = "infer",
    names: Sequence[Hashable] | None | NoDefault = no_default,
    index_col: IndexLabel | Literal[False] | None = None,
    usecols=None,
    # General Parsing Configuration
    dtype: DtypeArg | None = None,
    engine: CSVEngine | None = None,
    converters=None,
    true_values=None,
    false_values=None,
    skipinitialspace: bool = False,
    skiprows=None,
    skipfooter: int = 0,
    nrows: int | None = None,
    # NA and Missing Data Handling
    na_values=None,
    keep_default_na: bool = True,
    na_filter: bool = True,
    verbose: bool = no_default,
    skip_blank_lines: bool = True,
    # Datetime Handling
    parse_dates=False,
    infer_datetime_format: bool = no_default,
    keep_date_col: bool = no_default,
    date_parser=no_default,
    date_format: str = None,
    dayfirst: bool = False,
    cache_dates: bool = True,
    # Iteration
    iterator: bool = False,
    chunksize: int | None = None,
    # Quoting, Compression, and File Format
    compression: CompressionOptions = "infer",
    thousands: str | None = None,
    decimal: str = ".",
    lineterminator: str | None = None,
    quotechar: str = '"',
    quoting: int = csv.QUOTE_MINIMAL,
    doublequote: bool = True,
    escapechar: str | None = None,
    comment: str | None = None,
    encoding: str | None = None,
    encoding_errors: str | None = "strict",
    dialect: str | csv.Dialect | None = None,
    # Error Handling
    on_bad_lines="error",
    # Internal
    delim_whitespace: bool = no_default,
    low_memory=_c_parser_defaults["low_memory"],
    memory_map: bool = False,
    float_precision: str | None = None,
    storage_options: StorageOptions = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> DataFrame | TextFileReader:
    # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args
    _pd_read_table_signature = {
        val.name for val in inspect.signature(pandas.read_table).parameters.values()
    }
    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
    if f_locals.get("sep", sep) is False or f_locals.get("sep", sep) is no_default:
        f_locals["sep"] = "\t"
    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_table_signature}
    return _read(**kwargs)


@_inherit_docstrings(pandas.read_parquet, apilink="pandas.read_parquet")
@enable_logging
@wrap_free_function_in_argument_caster("read_parquet")
@expanduser_path_arg("path")
def read_parquet(
    path,
    engine: str = "auto",
    columns: list[str] | None = None,
    storage_options: StorageOptions = None,
    use_nullable_dtypes: bool = no_default,
    dtype_backend=no_default,
    filesystem=None,
    filters=None,
    **kwargs,
) -> DataFrame:
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    if engine == "fastparquet" and dtype_backend is not no_default:
        raise ValueError(
            "The 'dtype_backend' argument is not supported for the fastparquet engine"
        )

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.read_parquet(
            path=path,
            engine=engine,
            columns=columns,
            storage_options=storage_options,
            use_nullable_dtypes=use_nullable_dtypes,
            dtype_backend=dtype_backend,
            filesystem=filesystem,
            filters=filters,
            **kwargs,
        )
    )


@_inherit_docstrings(pandas.read_json, apilink="pandas.read_json")
@enable_logging
@wrap_free_function_in_argument_caster("read_json")
@expanduser_path_arg("path_or_buf")
def read_json(
    path_or_buf,
    *,
    orient: str | None = None,
    typ: Literal["frame", "series"] = "frame",
    dtype: DtypeArg | None = None,
    convert_axes=None,
    convert_dates: bool | list[str] = True,
    keep_default_dates: bool = True,
    precise_float: bool = False,
    date_unit: str | None = None,
    encoding: str | None = None,
    encoding_errors: str | None = "strict",
    lines: bool = False,
    chunksize: int | None = None,
    compression: CompressionOptions = "infer",
    nrows: int | None = None,
    storage_options: StorageOptions = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
    engine="ujson",
) -> DataFrame | Series | pandas.io.json._json.JsonReader:
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_json(**kwargs))


@_inherit_docstrings(pandas.read_gbq, apilink="pandas.read_gbq")
@enable_logging
@wrap_free_function_in_argument_caster("read_gbq")
def read_gbq(
    query: str,
    project_id: str | None = None,
    index_col: str | None = None,
    col_order: list[str] | None = None,
    reauth: bool = False,
    auth_local_webserver: bool = True,
    dialect: str | None = None,
    location: str | None = None,
    configuration: dict[str, Any] | None = None,
    credentials=None,
    use_bqstorage_api: bool | None = None,
    max_results: int | None = None,
    progress_bar_type: str | None = None,
) -> DataFrame:
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
    kwargs.update(kwargs.pop("kwargs", {}))

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_gbq(**kwargs))


@_inherit_docstrings(pandas.read_html, apilink="pandas.read_html")
@enable_logging
@wrap_free_function_in_argument_caster("read_html")
@expanduser_path_arg("io")
def read_html(
    io,
    *,
    match: str | Pattern = ".+",
    flavor: str | None = None,
    header: int | Sequence[int] | None = None,
    index_col: int | Sequence[int] | None = None,
    skiprows: int | Sequence[int] | slice | None = None,
    attrs: dict[str, str] | None = None,
    parse_dates: bool = False,
    thousands: str | None = ",",
    encoding: str | None = None,
    decimal: str = ".",
    converters: dict | None = None,
    na_values: Iterable[object] | None = None,
    keep_default_na: bool = True,
    displayed_only: bool = True,
    extract_links: Literal[None, "header", "footer", "body", "all"] = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
    storage_options: StorageOptions = None,
) -> list[DataFrame]:  # noqa: PR01, RT01, D200
    """
    Read HTML tables into a ``DataFrame`` object.
    """
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    qcs = FactoryDispatcher.read_html(**kwargs)
    return [ModinObjects.DataFrame(query_compiler=qc) for qc in qcs]


@_inherit_docstrings(pandas.read_clipboard, apilink="pandas.read_clipboard")
@enable_logging
@wrap_free_function_in_argument_caster("read_clipboard")
def read_clipboard(
    sep=r"\s+",
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
    **kwargs,
) -> DataFrame:  # pragma: no cover  # noqa: PR01, RT01, D200
    """
    Read text from clipboard and pass to read_csv.
    """
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
    kwargs.update(kwargs.pop("kwargs", {}))

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.read_clipboard(**kwargs)
    )


@_inherit_docstrings(pandas.read_excel, apilink="pandas.read_excel")
@enable_logging
@wrap_free_function_in_argument_caster("read_excel")
@expanduser_path_arg("io")
def read_excel(
    io,
    sheet_name: str | int | list[IntStrT] | None = 0,
    *,
    header: int | Sequence[int] | None = 0,
    names: list[str] | None = None,
    index_col: int | Sequence[int] | None = None,
    usecols: (
        int | str | Sequence[int] | Sequence[str] | Callable[[str], bool] | None
    ) = None,
    dtype: DtypeArg | None = None,
    engine: Literal[("xlrd", "openpyxl", "odf", "pyxlsb")] | None = None,
    converters: dict[str, Callable] | dict[int, Callable] | None = None,
    true_values: Iterable[Hashable] | None = None,
    false_values: Iterable[Hashable] | None = None,
    skiprows: Sequence[int] | int | Callable[[int], object] | None = None,
    nrows: int | None = None,
    na_values=None,
    keep_default_na: bool = True,
    na_filter: bool = True,
    verbose: bool = False,
    parse_dates: list | dict | bool = False,
    date_parser: Union[Callable, NoDefault] = no_default,
    date_format=None,
    thousands: str | None = None,
    decimal: str = ".",
    comment: str | None = None,
    skipfooter: int = 0,
    storage_options: StorageOptions = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
    engine_kwargs: Optional[dict] = None,
) -> DataFrame | dict[IntStrT, DataFrame]:
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    intermediate = FactoryDispatcher.read_excel(**kwargs)
    if isinstance(intermediate, dict):
        parsed = type(intermediate)()
        for key in intermediate.keys():
            parsed[key] = ModinObjects.DataFrame(query_compiler=intermediate.get(key))
        return parsed
    else:
        return ModinObjects.DataFrame(query_compiler=intermediate)


@_inherit_docstrings(pandas.read_hdf, apilink="pandas.read_hdf")
@enable_logging
@wrap_free_function_in_argument_caster("read_hdf")
@expanduser_path_arg("path_or_buf")
def read_hdf(
    path_or_buf,
    key=None,
    mode: str = "r",
    errors: str = "strict",
    where=None,
    start: Optional[int] = None,
    stop: Optional[int] = None,
    columns=None,
    iterator=False,
    chunksize: Optional[int] = None,
    **kwargs,
):  # noqa: PR01, RT01, D200
    """
    Read data from the store into DataFrame.
    """
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
    kwargs.update(kwargs.pop("kwargs", {}))

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_hdf(**kwargs))


@_inherit_docstrings(pandas.read_feather, apilink="pandas.read_feather")
@enable_logging
@wrap_free_function_in_argument_caster("read_feather")
@expanduser_path_arg("path")
def read_feather(
    path,
    columns: Sequence[Hashable] | None = None,
    use_threads: bool = True,
    storage_options: StorageOptions = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> DataFrame:
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.read_feather(**kwargs)
    )


@_inherit_docstrings(pandas.read_stata)
@enable_logging
@wrap_free_function_in_argument_caster("read_stata")
@expanduser_path_arg("filepath_or_buffer")
def read_stata(
    filepath_or_buffer,
    *,
    convert_dates: bool = True,
    convert_categoricals: bool = True,
    index_col: str | None = None,
    convert_missing: bool = False,
    preserve_dtypes: bool = True,
    columns: Sequence[str] | None = None,
    order_categoricals: bool = True,
    chunksize: int | None = None,
    iterator: bool = False,
    compression: CompressionOptions = "infer",
    storage_options: StorageOptions = None,
) -> DataFrame | pandas.io.stata.StataReader:
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_stata(**kwargs))


@_inherit_docstrings(pandas.read_sas, apilink="pandas.read_sas")
@enable_logging
@wrap_free_function_in_argument_caster("read_sas")
@expanduser_path_arg("filepath_or_buffer")
def read_sas(
    filepath_or_buffer,
    *,
    format: str | None = None,
    index: Hashable | None = None,
    encoding: str | None = None,
    chunksize: int | None = None,
    iterator: bool = False,
    compression: CompressionOptions = "infer",
) -> DataFrame | pandas.io.sas.sasreader.ReaderBase:  # noqa: PR01, RT01, D200
    """
    Read SAS files stored as either XPORT or SAS7BDAT format files.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.read_sas(
            filepath_or_buffer=filepath_or_buffer,
            format=format,
            index=index,
            encoding=encoding,
            chunksize=chunksize,
            iterator=iterator,
            compression=compression,
        )
    )


@_inherit_docstrings(pandas.read_pickle, apilink="pandas.read_pickle")
@enable_logging
@wrap_free_function_in_argument_caster("read_pickle")
@expanduser_path_arg("filepath_or_buffer")
def read_pickle(
    filepath_or_buffer,
    compression: CompressionOptions = "infer",
    storage_options: StorageOptions = None,
) -> DataFrame | Series:
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.read_pickle(**kwargs)
    )


@_inherit_docstrings(pandas.read_sql, apilink="pandas.read_sql")
@enable_logging
@wrap_free_function_in_argument_caster("read_sql")
def read_sql(
    sql,
    con,
    index_col=None,
    coerce_float=True,
    params=None,
    parse_dates=None,
    columns=None,
    chunksize=None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
    dtype=None,
) -> DataFrame | Iterator[DataFrame]:  # noqa: PR01, RT01, D200
    """
    Read SQL query or database table into a DataFrame.
    """
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    if kwargs.get("chunksize") is not None:
        _maybe_warn_on_default("Parameters provided [chunksize]")
        df_gen = pandas.read_sql(**kwargs)
        return (
            ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))
            for df in df_gen
        )
    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_sql(**kwargs))


@_inherit_docstrings(pandas.read_fwf, apilink="pandas.read_fwf")
@enable_logging
@wrap_free_function_in_argument_caster("read_fwf")
@expanduser_path_arg("filepath_or_buffer")
def read_fwf(
    filepath_or_buffer: Union[str, pathlib.Path, IO[AnyStr]],
    *,
    colspecs="infer",
    widths=None,
    infer_nrows=100,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
    iterator: bool = False,
    chunksize: Optional[int] = None,
    **kwds,
) -> DataFrame | TextFileReader:  # noqa: PR01, RT01, D200
    """
    Read a table of fixed-width formatted lines into DataFrame.
    """
    from pandas.io.parsers.base_parser import parser_defaults

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
    kwargs.update(kwargs.pop("kwds", {}))
    target_kwargs = parser_defaults.copy()
    target_kwargs.update(kwargs)
    pd_obj = FactoryDispatcher.read_fwf(**target_kwargs)
    # When `read_fwf` returns a TextFileReader object for iterating through
    if isinstance(pd_obj, TextFileReader):
        reader = pd_obj.read
        pd_obj.read = lambda *args, **kwargs: ModinObjects.DataFrame(
            query_compiler=reader(*args, **kwargs)
        )
        return pd_obj
    return ModinObjects.DataFrame(query_compiler=pd_obj)


@_inherit_docstrings(pandas.read_sql_table, apilink="pandas.read_sql_table")
@enable_logging
@wrap_free_function_in_argument_caster("read_sql_table")
def read_sql_table(
    table_name,
    con,
    schema=None,
    index_col=None,
    coerce_float=True,
    parse_dates=None,
    columns=None,
    chunksize=None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> DataFrame | Iterator[DataFrame]:  # noqa: PR01, RT01, D200
    """
    Read SQL database table into a DataFrame.
    """
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.read_sql_table(**kwargs)
    )


@_inherit_docstrings(pandas.read_sql_query, apilink="pandas.read_sql_query")
@enable_logging
@wrap_free_function_in_argument_caster("read_sql_query")
def read_sql_query(
    sql,
    con,
    index_col: str | list[str] | None = None,
    coerce_float: bool = True,
    params: list[str] | dict[str, str] | None = None,
    parse_dates: list[str] | dict[str, str] | None = None,
    chunksize: int | None = None,
    dtype: DtypeArg | None = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> DataFrame | Iterator[DataFrame]:
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())

    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.read_sql_query(**kwargs)
    )


@_inherit_docstrings(pandas.to_pickle)
@enable_logging
@wrap_free_function_in_argument_caster("to_pickle")
@expanduser_path_arg("filepath_or_buffer")
def to_pickle(
    obj: Any,
    filepath_or_buffer,
    compression: CompressionOptions = "infer",
    protocol: int = pickle.HIGHEST_PROTOCOL,
    storage_options: StorageOptions = None,
) -> None:
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    if isinstance(obj, ModinObjects.DataFrame):
        obj = obj._query_compiler
    return FactoryDispatcher.to_pickle(
        obj,
        filepath_or_buffer=filepath_or_buffer,
        compression=compression,
        protocol=protocol,
        storage_options=storage_options,
    )


@_inherit_docstrings(pandas.read_spss, apilink="pandas.read_spss")
@enable_logging
@wrap_free_function_in_argument_caster("read_spss")
@expanduser_path_arg("path")
def read_spss(
    path: Union[str, pathlib.Path],
    usecols: Optional[Sequence[str]] = None,
    convert_categoricals: bool = True,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Load an SPSS file from the file path, returning a DataFrame.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.read_spss(
            path=path,
            usecols=usecols,
            convert_categoricals=convert_categoricals,
            dtype_backend=dtype_backend,
        )
    )


@_inherit_docstrings(pandas.json_normalize, apilink="pandas.json_normalize")
@enable_logging
@wrap_free_function_in_argument_caster("json_normalize")
def json_normalize(
    data: Union[Dict, List[Dict]],
    record_path: Optional[Union[str, List]] = None,
    meta: Optional[Union[str, List[Union[str, List[str]]]]] = None,
    meta_prefix: Optional[str] = None,
    record_prefix: Optional[str] = None,
    errors: Optional[str] = "raise",
    sep: str = ".",
    max_level: Optional[int] = None,
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Normalize semi-structured JSON data into a flat table.
    """
    _maybe_warn_on_default("json_normalize")
    return ModinObjects.DataFrame(
        pandas.json_normalize(
            data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level
        )
    )


@_inherit_docstrings(pandas.read_orc, apilink="pandas.read_orc")
@enable_logging
@wrap_free_function_in_argument_caster("read_orc")
@expanduser_path_arg("path")
def read_orc(
    path,
    columns: Optional[List[str]] = None,
    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
    filesystem=None,
    **kwargs,
) -> DataFrame:  # noqa: PR01, RT01, D200
    """
    Load an ORC object from the file path, returning a DataFrame.
    """
    _maybe_warn_on_default("read_orc")
    return ModinObjects.DataFrame(
        pandas.read_orc(
            path,
            columns=columns,
            dtype_backend=dtype_backend,
            filesystem=filesystem,
            **kwargs,
        )
    )


@_inherit_docstrings(pandas.HDFStore)
class HDFStore(ClassLogger, pandas.HDFStore):  # noqa: PR01, D200
    """
    Dict-like IO interface for storing pandas objects in PyTables.
    """

    _return_modin_dataframe = True

    def __getattribute__(self, item):
        default_behaviors = ["__init__", "__class__"]
        method = super(HDFStore, self).__getattribute__(item)
        if item not in default_behaviors:
            if callable(method):

                def return_handler(*args, **kwargs):
                    """
                    Replace the default behavior of methods with inplace kwarg.

                    Returns
                    -------
                    A Modin DataFrame in place of a pandas DataFrame, or the same
                    return type as pandas.HDFStore.

                    Notes
                    -----
                    This function will replace all of the arguments passed to
                    methods of HDFStore with the pandas equivalent. It will convert
                    Modin DataFrame to pandas DataFrame, etc. Currently, pytables
                    does not accept Modin DataFrame objects, so we must convert to
                    pandas.
                    """
                    # We don't want to constantly be giving this error message for
                    # internal methods.
                    if item[0] != "_":
                        _maybe_warn_on_default("`{}`".format(item))
                    args = [
                        (
                            to_pandas(arg)
                            if isinstance(arg, ModinObjects.DataFrame)
                            else arg
                        )
                        for arg in args
                    ]
                    kwargs = {
                        k: to_pandas(v) if isinstance(v, ModinObjects.DataFrame) else v
                        for k, v in kwargs.items()
                    }
                    obj = super(HDFStore, self).__getattribute__(item)(*args, **kwargs)
                    if self._return_modin_dataframe and isinstance(
                        obj, pandas.DataFrame
                    ):
                        return ModinObjects.DataFrame(obj)
                    return obj

                # We replace the method with `return_handler` for inplace operations
                method = return_handler
        return method


@_inherit_docstrings(pandas.ExcelFile)
class ExcelFile(ClassLogger, pandas.ExcelFile):  # noqa: PR01, D200
    """
    Class for parsing tabular excel sheets into DataFrame objects.
    """

    _behave_like_pandas = False

    def _set_pandas_mode(self):  # noqa
        # disable Modin behavior to be able to pass object to `pandas.read_excel`
        # otherwise, Modin objects may be passed to the pandas context, resulting
        # in undefined behavior
        self._behave_like_pandas = True

    def __getattribute__(self, item):
        if item in ["_set_pandas_mode", "_behave_like_pandas"]:
            return object.__getattribute__(self, item)

        default_behaviors = ["__init__", "__class__"]
        method = super(ExcelFile, self).__getattribute__(item)
        if not self._behave_like_pandas and item not in default_behaviors:
            if callable(method):

                def return_handler(*args, **kwargs):
                    """
                    Replace the default behavior of methods with inplace kwarg.

                    Returns
                    -------
                    A Modin DataFrame in place of a pandas DataFrame, or the same
                    return type as pandas.ExcelFile.

                    Notes
                    -----
                    This function will replace all of the arguments passed to
                    methods of ExcelFile with the pandas equivalent. It will convert
                    Modin DataFrame to pandas DataFrame, etc.
                    """
                    # We don't want to constantly be giving this error message for
                    # internal methods.
                    if item[0] != "_":
                        _maybe_warn_on_default("`{}`".format(item))
                    args = [
                        (
                            to_pandas(arg)
                            if isinstance(arg, ModinObjects.DataFrame)
                            else arg
                        )
                        for arg in args
                    ]
                    kwargs = {
                        k: to_pandas(v) if isinstance(v, ModinObjects.DataFrame) else v
                        for k, v in kwargs.items()
                    }
                    obj = super(ExcelFile, self).__getattribute__(item)(*args, **kwargs)
                    if isinstance(obj, pandas.DataFrame):
                        return ModinObjects.DataFrame(obj)
                    return obj

                # We replace the method with `return_handler` for inplace operations
                method = return_handler
        return method


@wrap_free_function_in_argument_caster("from_non_pandas")
def from_non_pandas(df, index, columns, dtype) -> DataFrame | None:
    """
    Convert a non-pandas DataFrame into Modin DataFrame.

    Parameters
    ----------
    df : object
        Non-pandas DataFrame.
    index : object
        Index for non-pandas DataFrame.
    columns : object
        Columns for non-pandas DataFrame.
    dtype : type
        Data type to force.

    Returns
    -------
    modin.pandas.DataFrame
        Converted DataFrame.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    new_qc = FactoryDispatcher.from_non_pandas(df, index, columns, dtype)
    if new_qc is not None:
        return ModinObjects.DataFrame(query_compiler=new_qc)
    return new_qc


@wrap_free_function_in_argument_caster("from_pandas")
def from_pandas(df) -> DataFrame:
    """
    Convert a pandas DataFrame to a Modin DataFrame.

    Parameters
    ----------
    df : pandas.DataFrame
        The pandas DataFrame to convert.

    Returns
    -------
    modin.pandas.DataFrame
        A new Modin DataFrame object.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))


@wrap_free_function_in_argument_caster("from_arrow")
def from_arrow(at) -> DataFrame:
    """
    Convert an Arrow Table to a Modin DataFrame.

    Parameters
    ----------
    at : Arrow Table
        The Arrow Table to convert from.

    Returns
    -------
    DataFrame
        A new Modin DataFrame object.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_arrow(at))


@wrap_free_function_in_argument_caster("from_dataframe")
def from_dataframe(df: ProtocolDataframe) -> DataFrame:
    """
    Convert a DataFrame implementing the dataframe interchange protocol to a Modin DataFrame.

    See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.

    Parameters
    ----------
    df : ProtocolDataframe
        An object supporting the dataframe interchange protocol.

    Returns
    -------
    DataFrame
        A new Modin DataFrame object.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.from_interchange_dataframe(df)
    )


@wrap_free_function_in_argument_caster("from_ray")
def from_ray(ray_obj) -> DataFrame:
    """
    Convert a Ray Dataset into Modin DataFrame.

    Parameters
    ----------
    ray_obj : ray.data.Dataset
        The Ray Dataset to convert from.

    Returns
    -------
    DataFrame
        A new Modin DataFrame object.

    Notes
    -----
    Ray Dataset can only be converted to Modin DataFrame if Modin uses a Ray engine.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_ray(ray_obj))


@wrap_free_function_in_argument_caster("from_dask")
def from_dask(dask_obj) -> DataFrame:
    """
    Convert a Dask DataFrame to a Modin DataFrame.

    Parameters
    ----------
    dask_obj : dask.dataframe.DataFrame
        The Dask DataFrame to convert from.

    Returns
    -------
    DataFrame
        A new Modin DataFrame object.

    Notes
    -----
    Dask DataFrame can only be converted to Modin DataFrame if Modin uses a Dask engine.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_dask(dask_obj))


@wrap_free_function_in_argument_caster("from_map")
def from_map(func, iterable, *args, **kwargs) -> DataFrame:
    """
    Create a Modin DataFrame from map function applied to an iterable object.

    This method will construct a Modin DataFrame split by row partitions.
    The number of row partitions matches the number of elements in the iterable object.

    Parameters
    ----------
    func : callable
        Function to map across the iterable object.
    iterable : Iterable
        An iterable object.
    *args : tuple
        Positional arguments to pass in `func`.
    **kwargs : dict
        Keyword arguments to pass in `func`.

    Returns
    -------
    DataFrame
        A new Modin DataFrame object.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return ModinObjects.DataFrame(
        query_compiler=FactoryDispatcher.from_map(func, iterable, *args, *kwargs)
    )


@wrap_free_function_in_argument_caster("to_pandas")
def to_pandas(modin_obj: SupportsPublicToPandas) -> DataFrame | Series:
    """
    Convert a Modin DataFrame/Series to a pandas DataFrame/Series.

    Parameters
    ----------
    modin_obj : modin.DataFrame, modin.Series
        The Modin DataFrame/Series to convert.

    Returns
    -------
    pandas.DataFrame or pandas.Series
        Converted object with type depending on input.
    """
    return modin_obj._to_pandas()


@wrap_free_function_in_argument_caster("to_numpy")
def to_numpy(
    modin_obj: Union[SupportsPrivateToNumPy, SupportsPublicToNumPy],
) -> np.ndarray:
    """
    Convert a Modin object to a NumPy array.

    Parameters
    ----------
    modin_obj : modin.DataFrame, modin.Series, modin.numpy.array
        The Modin distributed object to convert.

    Returns
    -------
    numpy.array
        Converted object with type depending on input.
    """
    if isinstance(modin_obj, SupportsPrivateToNumPy):
        return modin_obj._to_numpy()
    array = modin_obj.to_numpy()
    if ModinNumpy.get():
        array = array._to_numpy()
    return array


@wrap_free_function_in_argument_caster("to_ray")
def to_ray(modin_obj):
    """
    Convert a Modin DataFrame/Series to a Ray Dataset.

    Parameters
    ----------
    modin_obj : modin.pandas.DataFrame, modin.pandas.Series
        The DataFrame/Series to convert.

    Returns
    -------
    ray.data.Dataset
        Converted object with type depending on input.

    Notes
    -----
    Modin DataFrame/Series can only be converted to a Ray Dataset if Modin uses a Ray engine.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return FactoryDispatcher.to_ray(modin_obj)


@wrap_free_function_in_argument_caster("to_dask")
def to_dask(modin_obj):
    """
    Convert a Modin DataFrame/Series to a Dask DataFrame/Series.

    Parameters
    ----------
    modin_obj : modin.pandas.DataFrame, modin.pandas.Series
        The Modin DataFrame/Series to convert.

    Returns
    -------
    dask.dataframe.DataFrame or dask.dataframe.Series
        Converted object with type depending on input.

    Notes
    -----
    Modin DataFrame/Series can only be converted to a Dask DataFrame/Series if Modin uses a Dask engine.
    """
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    return FactoryDispatcher.to_dask(modin_obj)


__all__ = [
    "ExcelFile",
    "HDFStore",
    "json_normalize",
    "read_clipboard",
    "read_csv",
    "read_excel",
    "read_feather",
    "read_fwf",
    "read_gbq",
    "read_hdf",
    "read_html",
    "read_json",
    "read_orc",
    "read_parquet",
    "read_pickle",
    "read_sas",
    "read_spss",
    "read_sql",
    "read_sql_query",
    "read_sql_table",
    "read_stata",
    "read_table",
    "read_xml",
    "from_non_pandas",
    "from_pandas",
    "from_arrow",
    "from_dataframe",
    "to_pickle",
    "to_pandas",
    "to_numpy",
]


================================================
FILE: modin/pandas/iterator.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Place to define the Modin iterator."""

from __future__ import annotations

from collections.abc import Iterator
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from modin.pandas import DataFrame


class PartitionIterator(Iterator):
    """
    Iterator on partitioned data.

    Parameters
    ----------
    df : modin.pandas.DataFrame
        The dataframe to iterate over.
    axis : {0, 1}
        Axis to iterate over.
    func : callable
        The function to get inner iterables from each partition.
    """

    df: DataFrame

    def __init__(self, df: DataFrame, axis, func):
        self.df = df
        self.axis = axis
        self.index_iter = (
            zip(
                iter(slice(None) for _ in range(len(self.df.columns))),
                range(len(self.df.columns)),
            )
            if axis
            else zip(
                range(len(self.df.index)),
                iter(slice(None) for _ in range(len(self.df.index))),
            )
        )
        self.func = func

    def __iter__(self):
        """
        Implement iterator interface.

        Returns
        -------
        PartitionIterator
            Iterator object.
        """
        return self

    def __next__(self):
        """
        Implement iterator interface.

        Returns
        -------
        PartitionIterator
            Incremented iterator object.
        """
        key = next(self.index_iter)
        df = self.df.iloc[key]
        return self.func(df)


================================================
FILE: modin/pandas/plotting.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement pandas plotting API."""

from pandas import plotting as pdplot

from modin.logging import ClassLogger
from modin.pandas.io import to_pandas
from modin.utils import instancer

from .dataframe import DataFrame


@instancer
class Plotting(ClassLogger):
    """Wrapper of pandas plotting module."""

    def __dir__(self):
        """
        Enable tab completion of plotting library.

        Returns
        -------
        list
            List of attributes in `self`.
        """
        return dir(pdplot)

    def __getattribute__(self, item):
        """
        Convert any Modin DataFrames in parameters to pandas so that they can be plotted normally.

        Parameters
        ----------
        item : str
            Attribute to look for.

        Returns
        -------
        object
            If attribute is found in pandas.plotting, and it is a callable, a wrapper function is
            returned which converts its arguments to pandas and calls a function pandas.plotting.`item`
            on these arguments.
            If attribute is found in pandas.plotting but it is not a callable, returns it.
            Otherwise function tries to look for an attribute in `self`.
        """
        if hasattr(pdplot, item):
            func = getattr(pdplot, item)
            if callable(func):

                def wrap_func(*args, **kwargs):
                    """Convert Modin DataFrames to pandas then call the function."""
                    args = tuple(
                        arg if not isinstance(arg, DataFrame) else to_pandas(arg)
                        for arg in args
                    )
                    kwargs = {
                        kwd: val if not isinstance(val, DataFrame) else to_pandas(val)
                        for kwd, val in kwargs.items()
                    }
                    return func(*args, **kwargs)

                return wrap_func
            else:
                return func
        else:
            return object.__getattribute__(self, item)


================================================
FILE: modin/pandas/resample.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement Resampler public API."""

from __future__ import annotations

from typing import TYPE_CHECKING, Optional, Union

import numpy as np
import pandas
import pandas.core.resample
from pandas._libs import lib
from pandas.core.dtypes.common import is_list_like

from modin.logging import ClassLogger
from modin.pandas.utils import cast_function_modin2pandas
from modin.utils import _inherit_docstrings

if TYPE_CHECKING:
    from modin.core.storage_formats import BaseQueryCompiler
    from modin.pandas import DataFrame, Series


@_inherit_docstrings(pandas.core.resample.Resampler)
class Resampler(ClassLogger):
    _dataframe: Union[DataFrame, Series]
    _query_compiler: BaseQueryCompiler

    def __init__(
        self,
        dataframe: Union[DataFrame, Series],
        rule,
        axis=0,
        closed=None,
        label=None,
        convention="start",
        kind=None,
        on=None,
        level=None,
        origin="start_day",
        offset=None,
        group_keys=lib.no_default,
    ):
        self._dataframe = dataframe
        self._query_compiler = dataframe._query_compiler
        self.axis = self._dataframe._get_axis_number(axis)
        self.resample_kwargs = {
            "rule": rule,
            "axis": axis,
            "closed": closed,
            "label": label,
            "convention": convention,
            "kind": kind,
            "on": on,
            "level": level,
            "origin": origin,
            "offset": offset,
            "group_keys": group_keys,
        }
        self.__groups = self._get_groups()

    def _get_groups(self):
        """
        Compute the resampled groups.

        Returns
        -------
        PandasGroupby
            Groups as specified by resampling arguments.
        """
        df = self._dataframe if self.axis == 0 else self._dataframe.T
        convention = self.resample_kwargs["convention"]
        groups = df.groupby(
            pandas.Grouper(
                key=self.resample_kwargs["on"],
                freq=self.resample_kwargs["rule"],
                closed=self.resample_kwargs["closed"],
                label=self.resample_kwargs["label"],
                convention=convention if convention is not lib.no_default else "start",
                level=self.resample_kwargs["level"],
                origin=self.resample_kwargs["origin"],
                offset=self.resample_kwargs["offset"],
            ),
            group_keys=self.resample_kwargs["group_keys"],
        )
        return groups

    def __getitem__(self, key):
        """
        Get ``Resampler`` based on `key` columns of original dataframe.

        Parameters
        ----------
        key : str or list
            String or list of selections.

        Returns
        -------
        modin.pandas.BasePandasDataset
            New ``Resampler`` based on `key` columns subset
            of the original dataframe.
        """

        def _get_new_resampler(key):
            subset = self._dataframe[key]
            resampler = type(self)(subset, **self.resample_kwargs)
            return resampler

        from .series import Series

        if isinstance(
            key, (list, tuple, Series, pandas.Series, pandas.Index, np.ndarray)
        ):
            if len(self._dataframe.columns.intersection(key)) != len(set(key)):
                missed_keys = list(set(key).difference(self._dataframe.columns))
                raise KeyError(f"Columns not found: {str(sorted(missed_keys))[1:-1]}")
            return _get_new_resampler(list(key))

        if key not in self._dataframe:
            raise KeyError(f"Column not found: {key}")

        return _get_new_resampler(key)

    @property
    def groups(self):
        return self._query_compiler.default_to_pandas(
            lambda df: pandas.DataFrame.resample(df, **self.resample_kwargs).groups
        )

    @property
    def indices(self):
        return self._query_compiler.default_to_pandas(
            lambda df: pandas.DataFrame.resample(df, **self.resample_kwargs).indices
        )

    def get_group(self, name, obj=None):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_get_group(
                self.resample_kwargs, name, obj
            )
        )

    def apply(self, func, *args, **kwargs):
        func = cast_function_modin2pandas(func)
        from .dataframe import DataFrame

        if isinstance(self._dataframe, DataFrame):
            query_comp_op = self._query_compiler.resample_app_df
        else:
            query_comp_op = self._query_compiler.resample_app_ser

        dataframe = DataFrame(
            query_compiler=query_comp_op(
                self.resample_kwargs,
                func,
                *args,
                **kwargs,
            )
        )
        if is_list_like(func) or isinstance(self._dataframe, DataFrame):
            return dataframe
        else:
            if len(dataframe.index) == 1:
                return dataframe.iloc[0]
            else:
                return dataframe.squeeze()

    def aggregate(self, func, *args, **kwargs):
        from .dataframe import DataFrame

        if isinstance(self._dataframe, DataFrame):
            query_comp_op = self._query_compiler.resample_agg_df
        else:
            query_comp_op = self._query_compiler.resample_agg_ser

        dataframe = DataFrame(
            query_compiler=query_comp_op(
                self.resample_kwargs,
                func,
                *args,
                **kwargs,
            )
        )
        if is_list_like(func) or isinstance(self._dataframe, DataFrame):
            return dataframe
        else:
            if len(dataframe.index) == 1:
                return dataframe.iloc[0]
            else:
                return dataframe.squeeze()

    def transform(self, arg, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_transform(
                self.resample_kwargs, arg, *args, **kwargs
            )
        )

    def pipe(self, func, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_pipe(
                self.resample_kwargs, func, *args, **kwargs
            )
        )

    def ffill(self, limit=None):
        return self.fillna(method="ffill", limit=limit)

    def bfill(self, limit=None):
        return self.fillna(method="bfill", limit=limit)

    def nearest(self, limit=None):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_nearest(
                self.resample_kwargs, limit
            )
        )

    def fillna(self, method, limit=None):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_fillna(
                self.resample_kwargs, method, limit
            )
        )

    def asfreq(self, fill_value=None):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_asfreq(
                self.resample_kwargs, fill_value
            )
        )

    def interpolate(
        self,
        method="linear",
        *,
        axis=0,
        limit=None,
        inplace=False,
        limit_direction: Optional[str] = None,
        limit_area=None,
        downcast=lib.no_default,
        **kwargs,
    ):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_interpolate(
                self.resample_kwargs,
                method,
                axis=axis,
                limit=limit,
                inplace=inplace,
                limit_direction=limit_direction,
                limit_area=limit_area,
                downcast=downcast,
                **kwargs,
            )
        )

    def count(self):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_count(self.resample_kwargs)
        )

    def nunique(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_nunique(
                self.resample_kwargs, *args, **kwargs
            )
        )

    def first(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_first(
                self.resample_kwargs,
                *args,
                **kwargs,
            )
        )

    def last(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_last(
                self.resample_kwargs,
                *args,
                **kwargs,
            )
        )

    def max(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_max(
                self.resample_kwargs,
                *args,
                **kwargs,
            )
        )

    def mean(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_mean(
                self.resample_kwargs,
                *args,
                **kwargs,
            )
        )

    def median(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_median(
                self.resample_kwargs,
                *args,
                **kwargs,
            )
        )

    def min(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_min(
                self.resample_kwargs,
                *args,
                **kwargs,
            )
        )

    def ohlc(self, *args, **kwargs):
        from .dataframe import DataFrame

        if isinstance(self._dataframe, DataFrame):
            return DataFrame(
                query_compiler=self._query_compiler.resample_ohlc_df(
                    self.resample_kwargs,
                    *args,
                    **kwargs,
                )
            )
        else:
            return DataFrame(
                query_compiler=self._query_compiler.resample_ohlc_ser(
                    self.resample_kwargs,
                    *args,
                    **kwargs,
                )
            )

    def prod(self, min_count=0, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_prod(
                self.resample_kwargs, min_count=min_count, *args, **kwargs
            )
        )

    def size(self):
        from .series import Series

        output_series = Series(
            query_compiler=self._query_compiler.resample_size(self.resample_kwargs)
        )
        if not isinstance(self._dataframe, Series):
            # If input is a DataFrame, rename output Series to None
            return output_series.rename(None)
        return output_series

    def sem(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_sem(
                self.resample_kwargs,
                *args,
                **kwargs,
            )
        )

    def std(self, ddof=1, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_std(
                self.resample_kwargs, *args, ddof=ddof, **kwargs
            )
        )

    def sum(self, min_count=0, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_sum(
                self.resample_kwargs, min_count=min_count, *args, **kwargs
            )
        )

    def var(self, ddof=1, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_var(
                self.resample_kwargs, *args, ddof=ddof, **kwargs
            )
        )

    def quantile(self, q=0.5, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.resample_quantile(
                self.resample_kwargs, q, **kwargs
            )
        )


================================================
FILE: modin/pandas/series.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `Series` class, that is distributed version of `pandas.Series`."""

from __future__ import annotations

import os
import warnings
from typing import IO, TYPE_CHECKING, Any, Hashable, Iterable, Optional, Union

import numpy as np
import pandas
from pandas._libs import lib
from pandas._typing import (
    ArrayLike,
    Axis,
    DtypeObj,
    IndexKeyFunc,
    Scalar,
    Sequence,
    StorageOptions,
)
from pandas.api.types import is_integer
from pandas.core.arrays import ExtensionArray
from pandas.core.common import apply_if_callable, is_bool_indexer
from pandas.core.dtypes.common import is_dict_like, is_list_like
from pandas.core.series import _coerce_method
from pandas.io.formats.info import SeriesInfo
from pandas.util._decorators import doc
from pandas.util._validators import validate_bool_kwarg

from modin.config import PersistentPickle
from modin.core.storage_formats.pandas.query_compiler_caster import (
    EXTENSION_DICT_TYPE,
    EXTENSION_NO_LOOKUP,
)
from modin.logging import disable_logging
from modin.pandas.io import from_pandas, to_pandas
from modin.utils import (
    MODIN_UNNAMED_SERIES_LABEL,
    _inherit_docstrings,
    import_optional_dependency,
    sentinel,
)

from .accessor import CachedAccessor, SparseAccessor
from .base import _ATTRS_NO_LOOKUP, BasePandasDataset
from .iterator import PartitionIterator
from .series_utils import (
    CategoryMethods,
    DatetimeProperties,
    ListAccessor,
    StringMethods,
    StructAccessor,
)
from .utils import (
    GET_BACKEND_DOC,
    SET_BACKEND_DOC,
    _doc_binary_op,
    cast_function_modin2pandas,
    is_scalar,
)

if TYPE_CHECKING:
    import numpy.typing as npt
    from typing_extensions import Self

    from modin.core.storage_formats import BaseQueryCompiler

    from .dataframe import DataFrame


@_inherit_docstrings(
    pandas.Series, excluded=[pandas.Series.__init__], apilink="pandas.Series"
)
class Series(BasePandasDataset):
    """
    Modin distributed representation of `pandas.Series`.

    Internally, the data can be divided into partitions in order to parallelize
    computations and utilize the user's hardware as much as possible.

    Inherit common for DataFrames and Series functionality from the
    `BasePandasDataset` class.

    Parameters
    ----------
    data : modin.pandas.Series, array-like, Iterable, dict, or scalar value, optional
        Contains data stored in Series. If data is a dict, argument order is
        maintained.
    index : array-like or Index (1d), optional
        Values must be hashable and have the same length as `data`.
    dtype : str, np.dtype, or pandas.ExtensionDtype, optional
        Data type for the output Series. If not specified, this will be
        inferred from `data`.
    name : str, optional
        The name to give to the Series.
    copy : bool, default: False
        Copy input data.
    fastpath : bool, default: False
        `pandas` internal parameter.
    query_compiler : BaseQueryCompiler, optional
        A query compiler object to create the Series from.
    """

    _pandas_class = pandas.Series
    __array_priority__ = pandas.Series.__array_priority__

    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)

    def __init__(
        self,
        data=None,
        index=None,
        dtype=None,
        name=None,
        copy=None,
        fastpath=lib.no_default,
        query_compiler: BaseQueryCompiler = None,
    ) -> None:
        from modin.numpy import array

        # Siblings are other dataframes that share the same query compiler. We
        # use this list to update inplace when there is a shallow copy.
        self._siblings = []
        if isinstance(data, type(self)):
            query_compiler = data._query_compiler.copy()
            if index is not None:
                if any(i not in data.index for i in index):
                    raise NotImplementedError(
                        "Passing non-existent columns or index values to constructor "
                        + "not yet implemented."
                    )
                query_compiler = data.loc[index]._query_compiler
        if isinstance(data, array):
            if data._ndim == 2:
                raise ValueError("Data must be 1-dimensional")
            query_compiler = data._query_compiler.copy()
            if index is not None:
                query_compiler.index = index
            if dtype is not None:
                query_compiler = query_compiler.astype(
                    {col_name: dtype for col_name in query_compiler.columns}
                )
            if name is None:
                query_compiler.columns = pandas.Index([MODIN_UNNAMED_SERIES_LABEL])
        if query_compiler is None:
            # Defaulting to pandas
            if name is None:
                name = MODIN_UNNAMED_SERIES_LABEL
                if isinstance(data, pandas.Series) and data.name is not None:
                    name = data.name

            pandas_df = pandas.DataFrame(
                pandas.Series(
                    data=data,
                    index=index,
                    dtype=dtype,
                    name=name,
                    copy=copy,
                    fastpath=fastpath,
                )
            )
            if pandas_df.size >= 2_500_000:
                warnings.warn(
                    "Distributing {} object. This may take some time.".format(
                        type(data)
                    )
                )
            query_compiler = from_pandas(pandas_df)._query_compiler
        self._query_compiler = query_compiler.columnarize()
        if name is not None:
            self.name = name

    def _get_name(self) -> Hashable:
        """
        Get the value of the `name` property.

        Returns
        -------
        hashable
        """
        name = self._query_compiler.columns[0]
        if name == MODIN_UNNAMED_SERIES_LABEL:
            return None
        return name

    def _set_name(self, name: Hashable) -> None:
        """
        Set the value of the `name` property.

        Parameters
        ----------
        name : hashable
            Name value to set.
        """
        if name is None:
            name = MODIN_UNNAMED_SERIES_LABEL
        if isinstance(name, tuple):
            columns = pandas.MultiIndex.from_tuples(tuples=[name])
        else:
            columns = [name]
        self._query_compiler.columns = columns

    name: Hashable = property(_get_name, _set_name)
    _parent = None
    # Parent axis denotes axis that was used to select series in a parent dataframe.
    # If _parent_axis == 0, then it means that index axis was used via df.loc[row]
    # indexing operations and assignments should be done to rows of parent.
    # If _parent_axis == 1 it means that column axis was used via df[column] and assignments
    # should be done to columns of parent.
    _parent_axis = 0

    @_doc_binary_op(operation="addition", bin_op="add")
    def __add__(self, right) -> Series:
        return self.add(right)

    @_doc_binary_op(operation="addition", bin_op="radd", right="left")
    def __radd__(self, left) -> Series:
        return self.radd(left)

    @_doc_binary_op(operation="union", bin_op="and", right="other")
    def __and__(self, other) -> Series:
        if isinstance(other, (list, np.ndarray, pandas.Series)):
            return self._default_to_pandas(pandas.Series.__and__, other)
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).__and__(new_other)

    @_doc_binary_op(operation="union", bin_op="and", right="other")
    def __rand__(self, other) -> Series:
        if isinstance(other, (list, np.ndarray, pandas.Series)):
            return self._default_to_pandas(pandas.Series.__rand__, other)
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).__rand__(new_other)

    # add `_inherit_docstrings` decorator to force method link addition.
    @_inherit_docstrings(pandas.Series.__array__, apilink="pandas.Series.__array__")
    def __array__(
        self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
    ) -> np.ndarray:
        return super(Series, self).__array__(dtype).flatten()

    def __column_consortium_standard__(
        self, *, api_version: str | None = None
    ):  # noqa: PR01, RT01
        """
        Provide entry point to the Consortium DataFrame Standard API.

        This is developed and maintained outside of Modin.
        Please report any issues to https://github.com/data-apis/dataframe-api-compat.
        """
        dataframe_api_compat = import_optional_dependency(
            "dataframe_api_compat", "implementation"
        )
        return dataframe_api_compat.modin_standard.convert_to_standard_compliant_column(
            self, api_version=api_version
        )

    def __contains__(self, key: Hashable) -> bool:
        """
        Check if `key` in the `Series.index`.

        Parameters
        ----------
        key : hashable
            Key to check the presence in the index.

        Returns
        -------
        bool
        """
        return key in self.index

    def __copy__(self, deep: bool = True) -> Series:
        """
        Return the copy of the Series.

        Parameters
        ----------
        deep : bool, default: True
            Whether the copy should be deep or not.

        Returns
        -------
        Series
        """
        return self.copy(deep=deep)

    def __deepcopy__(self, memo=None) -> Series:
        """
        Return the deep copy of the Series.

        Parameters
        ----------
        memo : Any, optional
           Deprecated parameter.

        Returns
        -------
        Series
        """
        return self.copy(deep=True)

    def __delitem__(self, key: Hashable) -> None:
        """
        Delete item identified by `key` label.

        Parameters
        ----------
        key : hashable
            Key to delete.
        """
        if key not in self.keys():
            raise KeyError(key)
        self.drop(labels=key, inplace=True)

    @_doc_binary_op(
        operation="integer division and modulo",
        bin_op="divmod",
        returns="tuple of two Series",
    )
    def __divmod__(self, right) -> tuple[Series, Series]:
        return self.divmod(right)

    @_doc_binary_op(
        operation="integer division and modulo",
        bin_op="divmod",
        right="left",
        returns="tuple of two Series",
    )
    def __rdivmod__(self, left) -> tuple[Series, Series]:
        return self.rdivmod(left)

    @_doc_binary_op(operation="integer division", bin_op="floordiv")
    def __floordiv__(self, right) -> Series:
        return self.floordiv(right)

    @_doc_binary_op(operation="integer division", bin_op="floordiv")
    def __rfloordiv__(self, right) -> Series:
        return self.rfloordiv(right)

    @disable_logging
    def __getattribute__(self, key: str) -> Any:
        """
        Get attribute identified by `key`.

        Parameters
        ----------
        key : str
            Key to get.

        Returns
        -------
        Any
            The attribute.
        """
        # NOTE that to get an attribute, python calls __getattribute__() first and
        # then falls back to __getattr__() if the former raises an AttributeError.
        if key not in EXTENSION_NO_LOOKUP:
            extensions_result = self._getattribute__from_extension_impl(
                key, __class__._extensions
            )
            if extensions_result is not sentinel:
                return extensions_result

        return super().__getattribute__(key)

    @disable_logging
    def __getattr__(self, key: Hashable) -> Any:
        """
        Return item identified by `key`.

        Parameters
        ----------
        key : hashable
            Key to get.

        Returns
        -------
        Any

        Notes
        -----
        First try to use `__getattribute__` method. If it fails
        try to get `key` from `Series` fields.
        """
        # NOTE that to get an attribute, python calls __getattribute__() first and
        # then falls back to __getattr__() if the former raises an AttributeError.
        if key not in _ATTRS_NO_LOOKUP and key in self._query_compiler.index:
            return self[key]
        raise AttributeError(f"'Series' object has no attribute '{key}'")

    __float__ = _coerce_method(float)
    __int__ = _coerce_method(int)

    def __iter__(self):
        """
        Return an iterator of the values.

        Returns
        -------
        iterable
        """
        return self._to_pandas().__iter__()

    @_doc_binary_op(operation="modulo", bin_op="mod")
    def __mod__(self, right) -> Series:
        return self.mod(right)

    @_doc_binary_op(operation="modulo", bin_op="mod", right="left")
    def __rmod__(self, left) -> Series:
        return self.rmod(left)

    @_doc_binary_op(operation="multiplication", bin_op="mul")
    def __mul__(self, right) -> Series:
        return self.mul(right)

    @_doc_binary_op(operation="multiplication", bin_op="mul", right="left")
    def __rmul__(self, left) -> Series:
        return self.rmul(left)

    @_doc_binary_op(operation="disjunction", bin_op="or", right="other")
    def __or__(self, other) -> Series:
        if isinstance(other, (list, np.ndarray, pandas.Series)):
            return self._default_to_pandas(pandas.Series.__or__, other)
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).__or__(new_other)

    @_doc_binary_op(operation="disjunction", bin_op="or", right="other")
    def __ror__(self, other) -> Series:
        if isinstance(other, (list, np.ndarray, pandas.Series)):
            return self._default_to_pandas(pandas.Series.__ror__, other)
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).__ror__(new_other)

    @_doc_binary_op(operation="exclusive or", bin_op="xor", right="other")
    def __xor__(self, other) -> Series:
        if isinstance(other, (list, np.ndarray, pandas.Series)):
            return self._default_to_pandas(pandas.Series.__xor__, other)
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).__xor__(new_other)

    @_doc_binary_op(operation="exclusive or", bin_op="xor", right="other")
    def __rxor__(self, other) -> Series:
        if isinstance(other, (list, np.ndarray, pandas.Series)):
            return self._default_to_pandas(pandas.Series.__rxor__, other)
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).__rxor__(new_other)

    @_doc_binary_op(operation="exponential power", bin_op="pow")
    def __pow__(self, right) -> Series:
        return self.pow(right)

    @_doc_binary_op(operation="exponential power", bin_op="pow", right="left")
    def __rpow__(self, left) -> Series:
        return self.rpow(left)

    def __repr__(self) -> str:
        """
        Return a string representation for a particular Series.

        Returns
        -------
        str
        """
        num_rows = pandas.get_option("display.max_rows") or 60
        num_cols = pandas.get_option("display.max_columns") or 20
        temp_df = self._build_repr_df(num_rows, num_cols)
        if isinstance(temp_df, pandas.DataFrame) and not temp_df.empty:
            temp_df = temp_df.iloc[:, 0]
        temp_str = repr(temp_df)
        freq_str = (
            "Freq: {}, ".format(self.index.freqstr)
            if isinstance(self.index, pandas.DatetimeIndex)
            else ""
        )
        if self.name is not None:
            name_str = "Name: {}, ".format(str(self.name))
        else:
            name_str = ""
        if len(self) > num_rows:
            len_str = "Length: {}, ".format(len(self))
        else:
            len_str = ""
        dtype_str = "dtype: {}".format(
            str(self.dtype) + ")"
            if temp_df.empty
            else temp_str.rsplit("dtype: ", 1)[-1]
        )
        if len(self) == 0:
            return "Series([], {}{}{}".format(freq_str, name_str, dtype_str)
        maxsplit = 1
        if (
            isinstance(temp_df, pandas.Series)
            and temp_df.name is not None
            and isinstance(temp_df.dtype, pandas.CategoricalDtype)
        ):
            maxsplit = 2
        return temp_str.rsplit("\n", maxsplit)[0] + "\n{}{}{}{}".format(
            freq_str, name_str, len_str, dtype_str
        )

    def __round__(self, decimals=0) -> Series:
        """
        Round each value in a Series to the given number of decimals.

        Parameters
        ----------
        decimals : int, default: 0
            Number of decimal places to round to.

        Returns
        -------
        Series
        """
        return self._create_or_update_from_compiler(
            self._query_compiler.round(decimals=decimals)
        )

    def __setitem__(self, key: Hashable, value: Any) -> None:
        """
        Set `value` identified by `key` in the Series.

        Parameters
        ----------
        key : hashable
            Key to set.
        value : Any
            Value to set.
        """
        if isinstance(key, slice):
            self._setitem_slice(key, value)
        else:
            self.loc[key] = value

    @disable_logging
    def __setattr__(self, name: str, value: Any) -> None:
        """
        Set attribute `name` to `value`.

        Parameters
        ----------
        name : str
            Name of the attribute to set.
        value : Any
            Value to set.

        Returns
        -------
        None
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(name, __class__._extensions)
        if extension is not sentinel and hasattr(extension, "__set__"):
            return extension.__set__(self, value)
        return super().__setattr__(name, value)

    @disable_logging
    def __delattr__(self, name) -> None:
        """
        Delete attribute `name`.

        Parameters
        ----------
        name : str
            Name of the attribute to delete.

        Returns
        -------
        None
        """
        # An extension property is only accessible if the backend supports it.
        extension = self._get_extension(name, __class__._extensions)
        if extension is not sentinel and hasattr(extension, "__delete__"):
            return extension.__delete__(self)
        return super().__delattr__(name)

    @_doc_binary_op(operation="subtraction", bin_op="sub")
    def __sub__(self, right) -> Series:
        return self.sub(right)

    @_doc_binary_op(operation="subtraction", bin_op="sub", right="left")
    def __rsub__(self, left) -> Series:
        return self.rsub(left)

    @_doc_binary_op(operation="floating division", bin_op="truediv")
    def __truediv__(self, right) -> Series:
        return self.truediv(right)

    @_doc_binary_op(operation="floating division", bin_op="truediv", right="left")
    def __rtruediv__(self, left) -> Series:
        return self.rtruediv(left)

    __iadd__ = __add__
    __imul__ = __mul__
    __ipow__ = __pow__
    __isub__ = __sub__
    __itruediv__ = __truediv__

    @property
    def values(self):  # noqa: RT01, D200
        """
        Return Series as ndarray or ndarray-like depending on the dtype.
        """
        import modin.pandas as pd

        if isinstance(
            self.dtype, pandas.core.dtypes.dtypes.ExtensionDtype
        ) and not isinstance(self.dtype, pd.CategoricalDtype):
            return self._default_to_pandas("values")

        data = self.to_numpy()
        if isinstance(self.dtype, pd.CategoricalDtype):
            from modin.config import ModinNumpy

            if ModinNumpy.get():
                data = data._to_numpy()
            data = pd.Categorical(data, dtype=self.dtype)
        return data

    def __arrow_array__(self, type=None):  # noqa: GL08
        # Although pandas.Series does not implement this method (true for version 2.2.*),
        # however, pyarrow has support for it. This method emulates this behavior and
        # allows pyarrow to work with modin.pandas.Series.
        import pyarrow

        return pyarrow.array(self._to_pandas(), type=type)

    def add(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return Addition of series and other, element-wise (binary operator add).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).add(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def radd(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return Addition of series and other, element-wise (binary operator radd).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).radd(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def add_prefix(
        self, prefix, axis=None
    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Prefix labels with string `prefix`.
        """
        axis = 0 if axis is None else self._get_axis_number(axis)
        return self.__constructor__(
            query_compiler=self._query_compiler.add_prefix(prefix, axis=axis)
        )

    def add_suffix(
        self, suffix, axis=None
    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Suffix labels with string `suffix`.
        """
        axis = 0 if axis is None else self._get_axis_number(axis)
        return self.__constructor__(
            query_compiler=self._query_compiler.add_suffix(suffix, axis=axis)
        )

    def aggregate(
        self, func=None, axis=0, *args, **kwargs
    ) -> Union[Series, Scalar]:  # noqa: PR01, RT01, D200
        """
        Aggregate using one or more operations over the specified axis.
        """

        def error_raiser(msg, exception):
            """Convert passed exception to the same type as pandas do and raise it."""
            # HACK: to concord with pandas error types by replacing all of the
            # TypeErrors to the AssertionErrors
            exception = exception if exception is not TypeError else AssertionError
            raise exception(msg)

        self._validate_function(func, on_invalid=error_raiser)
        return super(Series, self).aggregate(func, axis, *args, **kwargs)

    agg = aggregate

    def apply(
        self, func, convert_dtype=lib.no_default, args=(), by_row="compat", **kwargs
    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Invoke function on values of Series.
        """
        if by_row != "compat":
            # TODO: add test
            return self._default_to_pandas(
                pandas.Series.apply,
                func=func,
                convert_dtype=convert_dtype,
                args=args,
                by_row=by_row,
                **kwargs,
            )

        if convert_dtype is lib.no_default:
            convert_dtype = True
        else:
            warnings.warn(
                "the convert_dtype parameter is deprecated and will be removed in a "
                + "future version.  Do ``ser.astype(object).apply()`` "
                + "instead if you want ``convert_dtype=False``.",
                FutureWarning,
            )

        func = cast_function_modin2pandas(func)
        self._validate_function(func)
        # apply and aggregate have slightly different behaviors, so we have to use
        # each one separately to determine the correct return type. In the case of
        # `agg`, the axis is set, but it is not required for the computation, so we use
        # it to determine which function to run.
        if kwargs.pop("axis", None) is not None:
            apply_func = "agg"
        else:
            apply_func = "apply"

        # This is the simplest way to determine the return type, but there are checks
        # in pandas that verify that some results are created. This is a challenge for
        # empty DataFrames, but fortunately they only happen when the `func` type is
        # a list or a dictionary, which means that the return type won't change from
        # type(self), so we catch that error and use `type(self).__name__` for the return
        # type.
        # We create a "dummy" `Series` to do the error checking and determining
        # the return type.
        try:
            return_type = type(
                getattr(
                    pandas.Series(self[:1].values, index=self.index[:1]), apply_func
                )(func, *args, **kwargs)
            ).__name__
        except Exception:
            return_type = type(self).__name__
        if (
            isinstance(func, str)
            or is_list_like(func)
            or return_type not in ["DataFrame", "Series"]
        ):
            # use the explicit non-Compat parent to avoid infinite recursion
            result = super(Series, self).apply(
                func,
                axis=0,
                raw=False,
                result_type=None,
                args=args,
                **kwargs,
            )
        else:
            # handle ufuncs and lambdas
            if kwargs or args and not isinstance(func, np.ufunc):

                def f(x):
                    return func(x, *args, **kwargs)

            else:
                f = func
            with np.errstate(all="ignore"):
                if isinstance(f, np.ufunc):
                    return f(self)

                # The return_type is only a DataFrame when we have a function
                # return a Series object. This is a very particular case that
                # has to be handled by the underlying pandas.Series apply
                # function and not our default map call.
                if return_type == "DataFrame":
                    result = self._query_compiler.apply_on_series(f)
                else:
                    result = self.map(f)._query_compiler

        if return_type == "DataFrame":
            from .dataframe import DataFrame

            result = DataFrame(query_compiler=result)
        elif return_type == "Series":
            result = self.__constructor__(query_compiler=result)
            if result.name == self.index[0]:
                result.name = None
        elif isinstance(result, type(self._query_compiler)):
            # sometimes result can be not a query_compiler, but scalar (for example
            # for sum or count functions)
            return result.to_pandas().squeeze()
        return result

    def transform(
        self, func, axis=0, *args, **kwargs
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.
        """
        if isinstance(func, list):
            # drop nonunique functions to align with pandas behavior instead of getting
            # "pandas.errors.SpecificationError: Function names must be unique..."
            # Example:
            # >>> pandas.Series([0., 1., 4.]).transform(["sqrt", "sqrt"])
            # sqrt
            # 0   0.0
            # 1   1.0
            # 2   2.0
            unique_func = [func[0]]
            for one_func in func[1:]:
                if one_func not in unique_func:
                    unique_func.append(one_func)
            func = unique_func
        return super(Series, self).transform(func, axis, *args, **kwargs)

    def argmax(
        self, axis=None, skipna=True, *args, **kwargs
    ) -> int:  # noqa: PR01, RT01, D200
        """
        Return int position of the largest value in the Series.
        """
        result = self.reset_index(drop=True).idxmax(
            axis=axis, skipna=skipna, *args, **kwargs
        )
        if np.isnan(result) or result is pandas.NA:
            result = -1
        return result

    def argmin(
        self, axis=None, skipna=True, *args, **kwargs
    ) -> int:  # noqa: PR01, RT01, D200
        """
        Return int position of the smallest value in the Series.
        """
        result = self.reset_index(drop=True).idxmin(
            axis=axis, skipna=skipna, *args, **kwargs
        )
        if np.isnan(result) or result is pandas.NA:
            result = -1
        return result

    def argsort(
        self, axis=0, kind="quicksort", order=None, stable=None
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return the integer indices that would sort the Series values.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.argsort(
                # 'stable' parameter has no effect in Pandas and is only accepted
                # for compatibility with NumPy, so we're not passing it forward on purpose
                axis=axis,
                kind=kind,
                order=order,
            )
        )

    def autocorr(self, lag=1) -> float:  # noqa: PR01, RT01, D200
        """
        Compute the lag-N autocorrelation.
        """
        return self.corr(self.shift(lag))

    def between(
        self, left, right, inclusive: str = "both"
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return boolean Series equivalent to left <= series <= right.
        """
        # 'pandas.Series.between()' only uses public Series' API,
        # so passing a Modin Series there is safe
        return pandas.Series.between(self, left, right, inclusive)

    def combine(self, other, func, fill_value=None) -> Series:  # noqa: PR01, RT01, D200
        """
        Combine the Series with a Series or scalar according to `func`.
        """
        return super(Series, self).combine(
            other, lambda s1, s2: s1.combine(s2, func, fill_value=fill_value)
        )

    def compare(
        self,
        other: Series,
        align_axis: Union[str, int] = 1,
        keep_shape: bool = False,
        keep_equal: bool = False,
        result_names: tuple = ("self", "other"),
    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Compare to another Series and show the differences.
        """
        if not isinstance(other, Series):
            raise TypeError(f"Cannot compare Series to {type(other)}")
        result = self.to_frame().compare(
            other.to_frame(),
            align_axis=align_axis,
            keep_shape=keep_shape,
            keep_equal=keep_equal,
            result_names=result_names,
        )
        if align_axis == "columns" or align_axis == 1:
            # Pandas.DataFrame.Compare returns a dataframe with a multidimensional index object as the
            # columns so we have to change column object back.
            result.columns = pandas.Index(["self", "other"])
        else:
            result = result.squeeze().rename(None)
        return result

    def corr(
        self, other, method="pearson", min_periods=None
    ) -> float:  # noqa: PR01, RT01, D200
        """
        Compute correlation with `other` Series, excluding missing values.
        """
        if method == "pearson":
            this, other = self.align(other, join="inner", copy=False)
            this = self.__constructor__(this)
            other = self.__constructor__(other)

            if len(this) == 0:
                return np.nan
            if len(this) != len(other):
                raise ValueError("Operands must have same size")

            if min_periods is None:
                min_periods = 1

            valid = this.notna() & other.notna()
            if not valid.all():
                this = this[valid]
                other = other[valid]
            if len(this) < min_periods:
                return np.nan

            this = this.astype(dtype="float64")
            other = other.astype(dtype="float64")
            this -= this.mean()
            other -= other.mean()

            other = other.__constructor__(query_compiler=other._query_compiler.conj())
            result = this * other / (len(this) - 1)
            result = np.array([result.sum()])

            stddev_this = ((this * this) / (len(this) - 1)).sum()
            stddev_other = ((other * other) / (len(other) - 1)).sum()

            stddev_this = np.array([np.sqrt(stddev_this)])
            stddev_other = np.array([np.sqrt(stddev_other)])

            result /= stddev_this * stddev_other

            np.clip(result.real, -1, 1, out=result.real)
            if np.iscomplexobj(result):
                np.clip(result.imag, -1, 1, out=result.imag)
            return result[0]

        return self._query_compiler.series_corr(
            other=other, method=method, min_periods=min_periods
        )

    def count(self) -> int:  # noqa: PR01, RT01, D200
        """
        Return number of non-NA/null observations in the Series.
        """
        return super(Series, self).count()

    def cov(
        self, other, min_periods=None, ddof: Optional[int] = 1
    ) -> float:  # noqa: PR01, RT01, D200
        """
        Compute covariance with Series, excluding missing values.
        """
        this, other = self.align(other, join="inner", copy=False)
        this = self.__constructor__(this)
        other = self.__constructor__(other)
        if len(this) == 0:
            return np.nan

        if len(this) != len(other):
            raise ValueError("Operands must have same size")

        if min_periods is None:
            min_periods = 1

        valid = this.notna() & other.notna()
        if not valid.all():
            this = this[valid]
            other = other[valid]

        if len(this) < min_periods:
            return np.nan

        this = this.astype(dtype="float64")
        other = other.astype(dtype="float64")

        this -= this.mean()
        other -= other.mean()

        other = other.__constructor__(query_compiler=other._query_compiler.conj())
        result = this * other / (len(this) - ddof)
        result = result.sum()
        return result

    def describe(
        self,
        percentiles=None,
        include=None,
        exclude=None,
    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200
        """
        Generate descriptive statistics.
        """
        # Pandas ignores the `include` and `exclude` for Series for some reason.
        return super(Series, self).describe(
            percentiles=percentiles,
            include=None,
            exclude=None,
        )

    def diff(self, periods=1) -> Series:  # noqa: PR01, RT01, D200
        """
        First discrete difference of element.
        """
        return super(Series, self).diff(periods=periods, axis=0)

    def divmod(
        self, other, level=None, fill_value=None, axis=0
    ) -> tuple[Series, Series]:  # noqa: PR01, RT01, D200
        """
        Return Integer division and modulo of series and `other`, element-wise (binary operator `divmod`).
        """
        division, modulo = self._query_compiler.divmod(
            other=other, level=level, fill_value=fill_value, axis=axis
        )
        return self.__constructor__(query_compiler=division), self.__constructor__(
            query_compiler=modulo
        )

    def dot(self, other) -> Union[Series, np.ndarray]:  # noqa: PR01, RT01, D200
        """
        Compute the dot product between the Series and the columns of `other`.
        """
        if isinstance(other, BasePandasDataset):
            common = self.index.union(other.index)
            if len(common) > len(self) or len(common) > len(other):
                raise ValueError("Matrices are not aligned")

            qc = other.reindex(index=common)._query_compiler
            if isinstance(other, Series):
                return self._reduce_dimension(
                    query_compiler=self._query_compiler.dot(
                        qc, squeeze_self=True, squeeze_other=True
                    )
                )
            else:
                return self.__constructor__(
                    query_compiler=self._query_compiler.dot(
                        qc, squeeze_self=True, squeeze_other=False
                    )
                )

        other = np.asarray(other)
        if self.shape[0] != other.shape[0]:
            raise ValueError(
                "Dot product shape mismatch, {} vs {}".format(self.shape, other.shape)
            )

        if len(other.shape) > 1:
            return (
                self._query_compiler.dot(other, squeeze_self=True).to_numpy().squeeze()
            )

        return self._reduce_dimension(
            query_compiler=self._query_compiler.dot(other, squeeze_self=True)
        )

    def drop_duplicates(
        self, *, keep="first", inplace=False, ignore_index=False
    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200
        """
        Return Series with duplicate values removed.
        """
        return super(Series, self).drop_duplicates(
            keep=keep, inplace=inplace, ignore_index=ignore_index
        )

    def dropna(
        self, *, axis=0, inplace=False, how=None, ignore_index=False
    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200
        """
        Return a new Series with missing values removed.
        """
        return super(Series, self).dropna(
            axis=axis, inplace=inplace, ignore_index=ignore_index
        )

    def duplicated(self, keep="first") -> Series:  # noqa: PR01, RT01, D200
        """
        Indicate duplicate Series values.
        """
        name = self.name
        result = self.to_frame().duplicated(keep=keep)
        # DataFrame.duplicated drops the name, so we need to manually restore it
        if name is not None:
            result.name = name
        return result

    def eq(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return Equal to of series and `other`, element-wise (binary operator `eq`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return new_self._binary_op(
            "eq",
            new_other,
            level=level,
            fill_value=fill_value,
            axis=axis,
            squeeze_other=isinstance(other, Series),
        )

    def equals(self, other) -> bool:  # noqa: PR01, RT01, D200
        """
        Test whether two objects contain the same elements.
        """
        if isinstance(other, pandas.Series):
            # Copy into a Modin Series to simplify logic below
            other = self.__constructor__(other)

        if type(self) is not type(other) or not self.index.equals(other.index):
            return False

        old_name_self = self.name
        old_name_other = other.name
        try:
            self.name = "temp_name_for_equals_op"
            other.name = "temp_name_for_equals_op"
            # this function should return only scalar
            res = self.__constructor__(
                query_compiler=self._query_compiler.equals(other._query_compiler)
            )
        finally:
            self.name = old_name_self
            other.name = old_name_other
        return res.all()

    def explode(self, ignore_index: bool = False) -> Series:  # noqa: PR01, RT01, D200
        """
        Transform each element of a list-like to a row.
        """
        return super(Series, self).explode(
            MODIN_UNNAMED_SERIES_LABEL if self.name is None else self.name,
            ignore_index=ignore_index,
        )

    def factorize(self, sort=False, use_na_sentinel=True):  # noqa: PR01, RT01, D200
        """
        Encode the object as an enumerated type or categorical variable.
        """
        return self._default_to_pandas(
            pandas.Series.factorize,
            sort=sort,
            use_na_sentinel=use_na_sentinel,
        )

    def case_when(self, caselist) -> Series:  # noqa: PR01, RT01, D200
        """
        Replace values where the conditions are True.
        """
        modin_type = type(self)
        caselist = [
            tuple(
                data._query_compiler if isinstance(data, modin_type) else data
                for data in case_tuple
            )
            for case_tuple in caselist
        ]
        return self.__constructor__(
            query_compiler=self._query_compiler.case_when(caselist=caselist)
        )

    def fillna(
        self,
        value=None,
        *,
        method=None,
        axis=None,
        inplace=False,
        limit=None,
        downcast=lib.no_default,
    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200
        """
        Fill NaNs inside of a Series object.
        """
        if isinstance(value, BasePandasDataset) and not isinstance(value, Series):
            raise TypeError(
                '"value" parameter must be a scalar, dict or Series, but '
                + f'you passed a "{type(value).__name__}"'
            )
        return super(Series, self).fillna(
            squeeze_self=True,
            squeeze_value=isinstance(value, Series),
            value=value,
            method=method,
            axis=axis,
            inplace=inplace,
            limit=limit,
            downcast=downcast,
        )

    def floordiv(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Get Integer division of series and `other`, element-wise (binary operator `floordiv`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).floordiv(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def ge(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return greater than or equal to of series and `other`, element-wise (binary operator `ge`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return new_self._binary_op(
            "ge",
            new_other,
            level=level,
            fill_value=fill_value,
            axis=axis,
            squeeze_other=isinstance(other, Series),
        )

    def groupby(
        self,
        by=None,
        axis=0,
        level=None,
        as_index=True,
        sort=True,
        group_keys=True,
        observed=lib.no_default,
        dropna: bool = True,
    ):  # noqa: PR01, RT01, D200
        """
        Group Series using a mapper or by a Series of columns.
        """
        from .groupby import SeriesGroupBy

        if not as_index:
            raise TypeError("as_index=False only valid with DataFrame")
        # SeriesGroupBy expects a query compiler object if it is available
        if isinstance(by, Series):
            by = by._query_compiler
        elif callable(by):
            by = by(self.index)
        elif by is None and level is None:
            raise TypeError("You have to supply one of 'by' and 'level'")
        return SeriesGroupBy(
            self,
            by,
            axis,
            level,
            as_index,
            sort,
            group_keys,
            idx_name=None,
            observed=observed,
            drop=False,
            dropna=dropna,
            backend_pinned=self.is_backend_pinned(),
        )

    def gt(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return greater than of series and `other`, element-wise (binary operator `gt`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return new_self._binary_op(
            "gt",
            new_other,
            level=level,
            fill_value=fill_value,
            axis=axis,
            squeeze_other=isinstance(other, Series),
        )

    def hist(
        self,
        by=None,
        ax=None,
        grid: bool = True,
        xlabelsize: int | None = None,
        xrot: float | None = None,
        ylabelsize: int | None = None,
        yrot: float | None = None,
        figsize: tuple[int, int] | None = None,
        bins: int | Sequence[int] = 10,
        backend: str | None = None,
        legend: bool = False,
        **kwargs,
    ):  # noqa: PR01, RT01, D200
        """
        Draw histogram of the input series using matplotlib.
        """
        return self._default_to_pandas(
            pandas.Series.hist,
            by=by,
            ax=ax,
            grid=grid,
            xlabelsize=xlabelsize,
            xrot=xrot,
            ylabelsize=ylabelsize,
            yrot=yrot,
            figsize=figsize,
            bins=bins,
            backend=backend,
            legend=legend,
            **kwargs,
        )

    def idxmax(
        self, axis=0, skipna=True, *args, **kwargs
    ) -> Hashable:  # noqa: PR01, RT01, D200
        """
        Return the row label of the maximum value.
        """
        return super(Series, self).idxmax(axis=axis, skipna=skipna, *args, **kwargs)

    def idxmin(
        self, axis=0, skipna=True, *args, **kwargs
    ) -> Hashable:  # noqa: PR01, RT01, D200
        """
        Return the row label of the minimum value.
        """
        return super(Series, self).idxmin(axis=axis, skipna=skipna, *args, **kwargs)

    def info(
        self,
        verbose: bool | None = None,
        buf: IO[str] | None = None,
        max_cols: int | None = None,
        memory_usage: bool | str | None = None,
        show_counts: bool = True,
    ) -> None:
        return SeriesInfo(self, memory_usage).render(
            buf=buf,
            max_cols=max_cols,
            verbose=verbose,
            show_counts=show_counts,
        )

    def isna(self) -> Series:
        """
        Detect missing values.

        Returns
        -------
        The result of detecting missing values.
        """
        return super(Series, self).isna()

    def isnull(self) -> Series:
        """
        Detect missing values.

        Returns
        -------
        The result of detecting missing values.
        """
        return super(Series, self).isnull()

    def item(self) -> Scalar:  # noqa: RT01, D200
        """
        Return the first element of the underlying data as a Python scalar.
        """
        return self[0]

    def items(self) -> Iterable[tuple[Hashable, Any]]:  # noqa: D200
        """
        Lazily iterate over (index, value) tuples.
        """

        def item_builder(s):
            return s.name, s.squeeze()

        partition_iterator = PartitionIterator(self.to_frame(), 0, item_builder)
        for v in partition_iterator:
            yield v

    def keys(self) -> pandas.Index:  # noqa: RT01, D200
        """
        Return alias for index.
        """
        return self.index

    def le(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return less than or equal to of series and `other`, element-wise (binary operator `le`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return new_self._binary_op(
            "le",
            new_other,
            level=level,
            fill_value=fill_value,
            axis=axis,
            squeeze_other=isinstance(other, Series),
        )

    def lt(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return less than of series and `other`, element-wise (binary operator `lt`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return new_self._binary_op(
            "lt",
            new_other,
            level=level,
            fill_value=fill_value,
            axis=axis,
            squeeze_other=isinstance(other, Series),
        )

    def map(self, arg, na_action=None) -> Series:  # noqa: PR01, RT01, D200
        """
        Map values of Series according to input correspondence.
        """
        if isinstance(arg, type(self)):
            # HACK: if we don't cast to pandas, then the execution engine will try to
            # propagate the distributed Series to workers and most likely would have
            # some performance problems.
            # TODO: A better way of doing so could be passing this `arg` as a query compiler
            # and broadcast accordingly.
            arg = arg._to_pandas()

        if not callable(arg) and hasattr(arg, "get"):
            mapper = arg

            def arg(s):
                return mapper.get(s, np.nan)

        return self.__constructor__(
            query_compiler=self._query_compiler.map(
                lambda s: (
                    arg(s) if pandas.isnull(s) is not True or na_action is None else s
                )
            )
        )

    def sem(
        self,
        axis: Optional[Axis] = None,
        skipna: bool = True,
        ddof: int = 1,
        numeric_only=False,
        **kwargs,
    ) -> Union[float, Series]:  # noqa: PR01, RT01, D200
        """
        Return unbiased standard error of the mean over requested axis.
        """
        return super(Series, self)._stat_operation(
            "sem", axis, skipna, numeric_only, ddof=ddof, **kwargs
        )

    def std(
        self,
        axis: Optional[Axis] = None,
        skipna: bool = True,
        ddof: int = 1,
        numeric_only=False,
        **kwargs,
    ) -> Union[float, Series]:  # noqa: PR01, RT01, D200
        """
        Return sample standard deviation over requested axis.
        """
        return super(Series, self)._stat_operation(
            "std", axis, skipna, numeric_only, ddof=ddof, **kwargs
        )

    def var(
        self,
        axis: Optional[Axis] = None,
        skipna: bool = True,
        ddof: int = 1,
        numeric_only=False,
        **kwargs,
    ) -> Union[float, Series]:  # noqa: PR01, RT01, D200
        """
        Return unbiased variance over requested axis.
        """
        return super(Series, self)._stat_operation(
            "var", axis, skipna, numeric_only, ddof=ddof, **kwargs
        )

    def memory_usage(self, index=True, deep=False) -> int:  # noqa: PR01, RT01, D200
        """
        Return the memory usage of the Series.
        """
        return super(Series, self).memory_usage(index=index, deep=deep).sum()

    def mod(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return Modulo of series and `other`, element-wise (binary operator `mod`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).mod(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def mode(self, dropna=True) -> Series:  # noqa: PR01, RT01, D200
        """
        Return the mode(s) of the Series.
        """
        return super(Series, self).mode(numeric_only=False, dropna=dropna)

    def mul(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return multiplication of series and `other`, element-wise (binary operator `mul`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).mul(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    multiply = mul

    def rmul(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return multiplication of series and `other`, element-wise (binary operator `mul`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).rmul(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def ne(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return not equal to of series and `other`, element-wise (binary operator `ne`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return new_self._binary_op(
            "ne",
            new_other,
            level=level,
            fill_value=fill_value,
            axis=axis,
            squeeze_other=isinstance(other, Series),
        )

    def nlargest(self, n=5, keep="first") -> Series:  # noqa: PR01, RT01, D200
        """
        Return the largest `n` elements.
        """
        if len(self._query_compiler.columns) == 0:
            # pandas returns empty series when requested largest/smallest from empty series
            return self.__constructor__(data=[], dtype=float)
        return Series(
            query_compiler=self._query_compiler.nlargest(
                n=n, columns=self.name, keep=keep
            )
        )

    def nsmallest(self, n=5, keep="first") -> Series:  # noqa: PR01, RT01, D200
        """
        Return the smallest `n` elements.
        """
        if len(self._query_compiler.columns) == 0:
            # pandas returns empty series when requested largest/smallest from empty series
            return self.__constructor__(data=[], dtype=float)
        return self.__constructor__(
            query_compiler=self._query_compiler.nsmallest(
                n=n, columns=self.name, keep=keep
            )
        )

    def shift(
        self,
        periods=1,
        freq=None,
        axis=0,
        fill_value=lib.no_default,
        suffix=None,
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Shift index by desired number of periods with an optional time `freq`.
        """
        # pandas 2.1.0 ignores suffix parameter (https://github.com/pandas-dev/pandas/issues/54806)
        if freq is not None and fill_value is not lib.no_default:
            raise ValueError(
                "Cannot pass both 'freq' and 'fill_value' to "
                + f"{type(self).__name__}.shift"
            )
        if axis == 1:
            raise ValueError(
                f"No axis named {axis} for object type {type(self).__name__}"
            )
        return super(type(self), self).shift(
            periods=periods, freq=freq, axis=axis, fill_value=fill_value
        )

    def unstack(
        self, level=-1, fill_value=None, sort=True
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
        """
        from .dataframe import DataFrame

        if not sort:
            # TODO: it should be easy to add support for sort == False
            return self._default_to_pandas(
                pandas.Series.unstack, level=level, fill_value=fill_value, sort=sort
            )

        # We can't unstack a Series object, if we don't have a MultiIndex.
        if len(self.index.names) > 1:
            result = DataFrame(
                query_compiler=self._query_compiler.unstack(level, fill_value)
            )
        else:
            raise ValueError(
                f"index must be a MultiIndex to unstack, {type(self.index)} was passed"
            )

        return result.droplevel(0, axis=1) if result.columns.nlevels > 1 else result

    @property
    def plot(
        self,
        kind="line",
        ax=None,
        figsize=None,
        use_index=True,
        title=None,
        grid=None,
        legend=False,
        style=None,
        logx=False,
        logy=False,
        loglog=False,
        xticks=None,
        yticks=None,
        xlim=None,
        ylim=None,
        rot=None,
        fontsize=None,
        colormap=None,
        table=False,
        yerr=None,
        xerr=None,
        label=None,
        secondary_y=False,
        **kwds,
    ):  # noqa: PR01, RT01, D200
        """
        Make plot of Series.
        """
        return self._to_pandas().plot

    def pow(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return exponential power of series and `other`, element-wise (binary operator `pow`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).pow(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    @_inherit_docstrings(pandas.Series.prod, apilink="pandas.Series.prod")
    def prod(
        self,
        axis=None,
        skipna=True,
        numeric_only=False,
        min_count=0,
        **kwargs,
    ) -> Scalar:
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        axis = self._get_axis_number(axis)
        new_index = self.columns if axis else self.index
        if min_count > len(new_index):
            return np.nan

        data = self._validate_dtypes_prod_mean(axis, numeric_only, ignore_axis=True)
        if min_count > 1:
            return data._reduce_dimension(
                data._query_compiler.prod_min_count(
                    axis=axis,
                    skipna=skipna,
                    numeric_only=numeric_only,
                    min_count=min_count,
                    **kwargs,
                )
            )
        return data._reduce_dimension(
            data._query_compiler.prod(
                axis=axis,
                skipna=skipna,
                numeric_only=numeric_only,
                min_count=min_count,
                **kwargs,
            )
        )

    product = prod

    def ravel(self, order="C") -> ArrayLike:  # noqa: PR01, RT01, D200
        """
        Return the flattened underlying data as an ndarray.
        """
        data = self._query_compiler.to_numpy().flatten(order=order)
        if isinstance(self.dtype, pandas.CategoricalDtype):
            data = pandas.Categorical(data, dtype=self.dtype)

        return data

    @_inherit_docstrings(pandas.Series.reindex, apilink="pandas.Series.reindex")
    def reindex(
        self,
        index=None,
        *,
        axis: Axis = None,
        method: str = None,
        copy: Optional[bool] = None,
        level=None,
        fill_value=None,
        limit: int = None,
        tolerance=None,
    ) -> Series:  # noqa: PR01, RT01, D200
        if fill_value is None:
            fill_value = np.nan
        return super(Series, self).reindex(
            index=index,
            columns=None,
            method=method,
            level=level,
            copy=copy,
            limit=limit,
            tolerance=tolerance,
            fill_value=fill_value,
        )

    def rename_axis(
        self,
        mapper=lib.no_default,
        *,
        index=lib.no_default,
        axis=0,
        copy=True,
        inplace=False,
    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200
        """
        Set the name of the axis for the index or columns.
        """
        return super().rename_axis(
            mapper=mapper, index=index, axis=axis, copy=copy, inplace=inplace
        )

    def _set_axis_name(self, name, axis=0, inplace=False) -> Union[Series, None]:
        """
        Alter the name of the axis.

        Parameters
        ----------
        name : str
            Name for the Series.
        axis : str or int, default: 0
            The axis to set the label.
            Only 0 is valid for Series.
        inplace : bool, default: False
            Whether to modify `self` directly or return a copy.

        Returns
        -------
        Series or None
        """
        self._get_axis_number(axis)  # raises ValueError if not 0
        renamed = self if inplace else self.copy()
        renamed.index = renamed.index.set_names(name)
        return None if inplace else renamed

    def rename(
        self,
        index=None,
        *,
        axis=None,
        copy=None,
        inplace=False,
        level=None,
        errors="ignore",
    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200
        """
        Alter Series index labels or name.
        """
        non_mapping = is_scalar(index) or (
            is_list_like(index) and not is_dict_like(index)
        )
        if non_mapping:
            if inplace:
                self.name = index
            else:
                self_cp = self.copy()
                self_cp.name = index
                return self_cp
        else:
            from .dataframe import DataFrame

            result = DataFrame(self.copy()).rename(index=index).squeeze(axis=1)
            result.name = self.name
            return result

    def repeat(self, repeats, axis=None) -> Series:  # noqa: PR01, RT01, D200
        """
        Repeat elements of a Series.
        """
        if (isinstance(repeats, int) and repeats == 0) or (
            is_list_like(repeats) and len(repeats) == 1 and repeats[0] == 0
        ):
            return self.__constructor__()

        return self.__constructor__(query_compiler=self._query_compiler.repeat(repeats))

    def reset_index(
        self,
        level=None,
        *,
        drop=False,
        name=lib.no_default,
        inplace=False,
        allow_duplicates=False,
    ) -> Union[DataFrame, Series, None]:  # noqa: PR01, RT01, D200
        """
        Generate a new Series with the index reset.
        """
        if name is lib.no_default:
            # For backwards compatibility, keep columns as [0] instead of
            #  [None] when self.name is None
            name = 0 if self.name is None else self.name

        if drop and level is None:
            new_idx = pandas.RangeIndex(len(self))
            if inplace:
                self.index = new_idx
            else:
                result = self.copy()
                result.index = new_idx
                return result
        elif not drop and inplace:
            raise TypeError(
                "Cannot reset_index inplace on a Series to create a DataFrame"
            )
        else:
            obj = self.copy()
            obj.name = name
            from .dataframe import DataFrame

            # Here `query_compiler` is passed instead of `obj` to avoid unnecessary `copy()`
            # inside `DataFrame` constructor
            return DataFrame(query_compiler=obj._query_compiler).reset_index(
                level=level,
                drop=drop,
                inplace=inplace,
                col_level=0,
                col_fill="",
                allow_duplicates=allow_duplicates,
                names=None,
            )

    def rdivmod(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return integer division and modulo of series and `other`, element-wise (binary operator `rdivmod`).
        """
        division, modulo = self._query_compiler.rdivmod(
            other=other, level=level, fill_value=fill_value, axis=axis
        )
        return self.__constructor__(query_compiler=division), self.__constructor__(
            query_compiler=modulo
        )

    def rfloordiv(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return integer division of series and `other`, element-wise (binary operator `rfloordiv`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).rfloordiv(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def rmod(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return modulo of series and `other`, element-wise (binary operator `rmod`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).rmod(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def rpow(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return exponential power of series and `other`, element-wise (binary operator `rpow`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).rpow(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def rsub(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return subtraction of series and `other`, element-wise (binary operator `rsub`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).rsub(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    def rtruediv(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return floating division of series and `other`, element-wise (binary operator `rtruediv`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).rtruediv(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    rdiv = rtruediv

    def quantile(
        self, q=0.5, interpolation="linear"
    ) -> Union[float, Series]:  # noqa: PR01, RT01, D200
        """
        Return value at the given quantile.
        """
        return super(Series, self).quantile(
            q=q,
            axis=0,
            numeric_only=False,
            interpolation=interpolation,
            method="single",
        )

    def reorder_levels(self, order) -> Series:  # noqa: PR01, RT01, D200
        """
        Rearrange index levels using input order.
        """
        return super(Series, self).reorder_levels(order)

    def replace(
        self,
        to_replace=None,
        value=lib.no_default,
        *,
        inplace=False,
        limit=None,
        regex=False,
        method: str | lib.NoDefault = lib.no_default,
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Replace values given in `to_replace` with `value`.
        """
        inplace = validate_bool_kwarg(inplace, "inplace")
        new_query_compiler = self._query_compiler.replace(
            to_replace=to_replace,
            value=value,
            inplace=False,
            limit=limit,
            regex=regex,
            method=method,
        )
        return self._create_or_update_from_compiler(new_query_compiler, inplace)

    def searchsorted(
        self, value, side="left", sorter=None
    ) -> Union[npt.NDArray[np.intp], np.intp]:  # noqa: PR01, RT01, D200
        """
        Find indices where elements should be inserted to maintain order.
        """
        searchsorted_qc = self._query_compiler
        if sorter is not None:
            # `iloc` method works slowly (https://github.com/modin-project/modin/issues/1903),
            # so _default_to_pandas is used for now
            # searchsorted_qc = self.iloc[sorter].reset_index(drop=True)._query_compiler
            # sorter = None
            return self._default_to_pandas(
                pandas.Series.searchsorted, value, side=side, sorter=sorter
            )
        # searchsorted should return item number irrespective of Series index, so
        # Series.index is always set to pandas.RangeIndex, which can be easily processed
        # on the query_compiler level
        if not isinstance(searchsorted_qc.index, pandas.RangeIndex):
            searchsorted_qc = searchsorted_qc.reset_index(drop=True)

        result = self.__constructor__(
            query_compiler=searchsorted_qc.searchsorted(
                value=value, side=side, sorter=sorter
            )
        ).squeeze()

        # matching Pandas output
        if not is_scalar(value) and not is_list_like(result):
            result = np.array([result])
        elif isinstance(result, type(self)):
            result = result.to_numpy()

        return result

    def sort_values(
        self,
        *,
        axis=0,
        ascending=True,
        inplace=False,
        kind="quicksort",
        na_position="last",
        ignore_index: bool = False,
        key: Optional[IndexKeyFunc] = None,
    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200
        """
        Sort by the values.
        """
        from .dataframe import DataFrame

        # When we convert to a DataFrame, the name is automatically converted to 0 if it
        # is None, so we do this to avoid a KeyError.
        by = self.name if self.name is not None else 0
        result = (
            DataFrame(self.copy())
            .sort_values(
                by=by,
                ascending=ascending,
                inplace=False,
                kind=kind,
                na_position=na_position,
                ignore_index=ignore_index,
                key=key,
            )
            .squeeze(axis=1)
        )
        result.name = self.name
        return self._create_or_update_from_compiler(
            result._query_compiler, inplace=inplace
        )

    cat = CachedAccessor("cat", CategoryMethods)
    sparse = CachedAccessor("sparse", SparseAccessor)
    str = CachedAccessor("str", StringMethods)
    dt = CachedAccessor("dt", DatetimeProperties)
    list = CachedAccessor("list", ListAccessor)
    struct = CachedAccessor("struct", StructAccessor)

    def squeeze(self, axis=None) -> Union[Series, Scalar]:  # noqa: PR01, RT01, D200
        """
        Squeeze 1 dimensional axis objects into scalars.
        """
        if axis is not None:
            # Validate `axis`
            pandas.Series._get_axis_number(axis)
        if len(self) == 1:
            return self._reduce_dimension(self._query_compiler)
        else:
            return self.copy()

    def sub(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return subtraction of Series and `other`, element-wise (binary operator `sub`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).sub(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    subtract = sub

    def sum(
        self,
        axis=None,
        skipna=True,
        numeric_only=False,
        min_count=0,
        **kwargs,
    ) -> Scalar:  # noqa: PR01, RT01, D200
        """
        Return the sum of the values.
        """
        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
        axis = self._get_axis_number(axis)

        new_index = self.columns if axis else self.index
        if min_count > len(new_index):
            return np.nan

        data = self._validate_dtypes_prod_mean(axis, numeric_only, ignore_axis=False)
        if min_count > 1:
            return data._reduce_dimension(
                data._query_compiler.sum_min_count(
                    axis=axis,
                    skipna=skipna,
                    numeric_only=numeric_only,
                    min_count=min_count,
                    **kwargs,
                )
            )
        return data._reduce_dimension(
            data._query_compiler.sum(
                axis=axis,
                skipna=skipna,
                numeric_only=numeric_only,
                min_count=min_count,
                **kwargs,
            )
        )

    def swaplevel(self, i=-2, j=-1, copy=None) -> Series:  # noqa: PR01, RT01, D200
        """
        Swap levels `i` and `j` in a `MultiIndex`.
        """
        copy = True if copy is None else copy
        obj = self.copy() if copy else self
        return super(Series, obj).swaplevel(i, j, axis=0)

    def take(self, indices, axis=0, **kwargs) -> Series:  # noqa: PR01, RT01, D200
        """
        Return the elements in the given positional indices along an axis.
        """
        return super(Series, self).take(indices, axis=axis, **kwargs)

    def to_dict(self, into=dict) -> dict:  # pragma: no cover # noqa: PR01, RT01, D200
        """
        Convert Series to {label -> value} dict or dict-like object.
        """
        return self._query_compiler.series_to_dict(into)

    def to_frame(
        self, name: Hashable = lib.no_default
    ) -> DataFrame:  # noqa: PR01, RT01, D200
        """
        Convert Series to {label -> value} dict or dict-like object.
        """
        from .dataframe import DataFrame

        if name is None:
            name = lib.no_default

        self_cp = self.copy()
        if name is not lib.no_default:
            self_cp.name = name

        return DataFrame(self_cp)

    def to_json(
        self,
        path_or_buf=None,
        orient=None,
        date_format=None,
        double_precision=10,
        force_ascii=True,
        date_unit="ms",
        default_handler=None,
        lines=False,
        compression="infer",
        index=None,
        indent=None,
        storage_options: StorageOptions = None,
        mode="w",
    ) -> str | None:
        from modin.core.execution.dispatching.factories.dispatcher import (
            FactoryDispatcher,
        )

        return FactoryDispatcher.to_json_series(
            self._query_compiler,
            path_or_buf,
            orient=orient,
            date_format=date_format,
            double_precision=double_precision,
            force_ascii=force_ascii,
            date_unit=date_unit,
            default_handler=default_handler,
            lines=lines,
            compression=compression,
            index=index,
            indent=indent,
            storage_options=storage_options,
            mode=mode,
        )

    def to_list(self) -> list:  # noqa: RT01, D200
        """
        Return a list of the values.
        """
        return self._query_compiler.to_list()

    def to_numpy(
        self, dtype=None, copy=False, na_value=lib.no_default, **kwargs
    ) -> np.ndarray:  # noqa: PR01, RT01, D200
        """
        Return the NumPy ndarray representing the values in this Series or Index.
        """
        from modin.config import ModinNumpy

        if not ModinNumpy.get():
            return (
                super(Series, self)
                .to_numpy(
                    dtype=dtype,
                    copy=copy,
                    na_value=na_value,
                )
                .flatten()
            )
        else:
            from ..numpy.arr import array

            return array(self, copy=copy)

    tolist = to_list

    # TODO(williamma12): When we implement to_timestamp, have this call the version
    # in base.py
    def to_period(self, freq=None, copy=None) -> Series:  # noqa: PR01, RT01, D200
        """
        Cast to PeriodArray/Index at a particular frequency.
        """
        return self._default_to_pandas("to_period", freq=freq, copy=copy)

    def to_string(
        self,
        buf=None,
        na_rep="NaN",
        float_format=None,
        header=True,
        index=True,
        length=False,
        dtype=False,
        name=False,
        max_rows=None,
        min_rows=None,
    ) -> Union[str, None]:  # noqa: PR01, RT01, D200
        """
        Render a string representation of the Series.
        """
        return self._default_to_pandas(
            pandas.Series.to_string,
            buf=buf,
            na_rep=na_rep,
            float_format=float_format,
            header=header,
            index=index,
            length=length,
            dtype=dtype,
            name=name,
            max_rows=max_rows,
        )

    # TODO(williamma12): When we implement to_timestamp, have this call the version
    # in base.py
    def to_timestamp(
        self, freq=None, how="start", copy=None
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Cast to DatetimeIndex of Timestamps, at beginning of period.
        """
        return self._default_to_pandas("to_timestamp", freq=freq, how=how, copy=copy)

    def transpose(self, *args, **kwargs) -> Series:  # noqa: PR01, RT01, D200
        """
        Return the transpose, which is by definition `self`.
        """
        return self

    # To enable dynamic backend switching, we must use a `def` so the lookup of `self.transpose`
    # is performed dynamically, whereas declaring `T = property(transpose)` makes it always use
    # the originally-defined version without the switching wrapper.
    @property
    def T(self) -> Series:
        return self.transpose()

    def truediv(
        self, other, level=None, fill_value=None, axis=0
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return floating division of series and `other`, element-wise (binary operator `truediv`).
        """
        new_self, new_other = self._prepare_inter_op(other)
        return super(Series, new_self).truediv(
            new_other, level=level, fill_value=fill_value, axis=axis
        )

    div = divide = truediv

    def unique(self) -> ArrayLike:  # noqa: RT01, D200
        """
        Return unique values of Series object.
        """
        # `values` can't be used here because it performs unnecessary conversion,
        # after which the result type does not match the pandas
        return (
            self.__constructor__(query_compiler=self._query_compiler.unique())
            .modin.to_pandas()
            ._values
        )

    def update(self, other) -> None:  # noqa: PR01, D200
        """
        Modify Series in place using values from passed Series.
        """
        if not isinstance(other, Series):
            other = self.__constructor__(other)
        query_compiler = self._query_compiler.series_update(other._query_compiler)
        self._update_inplace(new_query_compiler=query_compiler)

    def value_counts(
        self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
    ) -> Series:  # noqa: PR01, RT01, D200
        """
        Return a Series containing counts of unique values.
        """
        if bins is not None:
            # Potentially we could implement `cut` function from pandas API, which
            # bins values into intervals, and then we can just count them as regular values.
            # TODO #1333: new_self = self.__constructor__(pd.cut(self, bins, include_lowest=True), dtype="interval")
            return self._default_to_pandas(
                pandas.Series.value_counts,
                normalize=normalize,
                sort=sort,
                ascending=ascending,
                bins=bins,
                dropna=dropna,
            )
        counted_values = super(Series, self).value_counts(
            subset=self,
            normalize=normalize,
            sort=sort,
            ascending=ascending,
            dropna=dropna,
        )
        return counted_values

    def view(self, dtype=None) -> Series:  # noqa: PR01, RT01, D200
        """
        Create a new view of the Series.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.series_view(dtype=dtype)
        )

    def where(
        self,
        cond,
        other=np.nan,
        *,
        inplace=False,
        axis=None,
        level=None,
    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200
        """
        Replace values where the condition is False.
        """
        # TODO: probably need to remove this conversion to pandas
        if isinstance(other, Series):
            other = to_pandas(other)
        # TODO: add error checking like for dataframe where, then forward to
        # same query compiler method
        return self._default_to_pandas(
            pandas.Series.where,
            cond,
            other=other,
            inplace=inplace,
            axis=axis,
            level=level,
        )

    @property
    def attrs(self) -> dict:  # noqa: RT01, D200
        """
        Return dictionary of global attributes of this dataset.
        """

        def attrs(df):
            return df.attrs

        return self._default_to_pandas(attrs)

    @property
    def array(self) -> ExtensionArray:  # noqa: RT01, D200
        """
        Return the ExtensionArray of the data backing this Series or Index.
        """

        def array(df):
            return df.array

        return self._default_to_pandas(array)

    @property
    def axes(self) -> list[pandas.Index]:  # noqa: RT01, D200
        """
        Return a list of the row axis labels.
        """
        return [self.index]

    @property
    def dtype(self) -> DtypeObj:  # noqa: RT01, D200
        """
        Return the dtype object of the underlying data.
        """
        return self._query_compiler.dtypes.squeeze()

    dtypes = dtype

    @property
    def empty(self) -> bool:  # noqa: RT01, D200
        """
        Indicate whether Series is empty.
        """
        return len(self) == 0

    @property
    def hasnans(self) -> bool:  # noqa: RT01, D200
        """
        Return True if Series has any nans.
        """
        return self.isna().sum() > 0

    @property
    def is_monotonic_increasing(self) -> bool:  # noqa: RT01, D200
        """
        Return True if values in the Series are monotonic_increasing.
        """
        return self._reduce_dimension(self._query_compiler.is_monotonic_increasing())

    @property
    def is_monotonic_decreasing(self) -> bool:  # noqa: RT01, D200
        """
        Return True if values in the Series are monotonic_decreasing.
        """
        return self._reduce_dimension(self._query_compiler.is_monotonic_decreasing())

    @property
    def is_unique(self) -> bool:  # noqa: RT01, D200
        """
        Return True if values in the Series are unique.
        """
        return self.nunique(dropna=False) == len(self)

    @property
    def nbytes(self) -> int:  # noqa: RT01, D200
        """
        Return the number of bytes in the underlying data.
        """
        return self.memory_usage(index=False)

    @property
    def ndim(self) -> int:  # noqa: RT01, D200
        """
        Return the number of dimensions of the underlying data, by definition 1.
        """
        return 1

    def nunique(self, dropna=True) -> int:  # noqa: PR01, RT01, D200
        """
        Return number of unique elements in the object.
        """
        return super(Series, self).nunique(dropna=dropna)

    @property
    def shape(self) -> tuple[int]:  # noqa: RT01, D200
        """
        Return a tuple of the shape of the underlying data.
        """
        return (len(self),)

    def reindex_like(
        self,
        other,
        method=None,
        copy: Optional[bool] = None,
        limit=None,
        tolerance=None,
    ) -> Series:
        # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).":
        # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.Series.reindex_like.html
        return self.reindex(
            index=other.index,
            method=method,
            copy=copy,
            limit=limit,
            tolerance=tolerance,
        )

    def _to_pandas(self) -> pandas.Series:
        """
        Convert Modin Series to pandas Series.

        Recommended conversion method: `series.modin.to_pandas()`.

        Returns
        -------
        pandas.Series
        """
        df = self._query_compiler.to_pandas()
        series = df[df.columns[0]]
        if self._query_compiler.columns[0] == MODIN_UNNAMED_SERIES_LABEL:
            series.name = None
        return series

    def _to_datetime(self, **kwargs) -> Series:
        """
        Convert `self` to datetime.

        Parameters
        ----------
        **kwargs : dict
            Optional arguments to use during query compiler's
            `to_datetime` invocation.

        Returns
        -------
        datetime
            Series of datetime64 dtype.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.to_datetime(**kwargs)
        )

    def _to_numeric(self, **kwargs) -> Series:
        """
        Convert `self` to numeric.

        Parameters
        ----------
        **kwargs : dict
            Optional arguments to use during query compiler's
            `to_numeric` invocation.

        Returns
        -------
        numeric
            Series of numeric dtype.
        """
        return self.__constructor__(
            query_compiler=self._query_compiler.to_numeric(**kwargs)
        )

    def _qcut(self, q, **kwargs):  # noqa: PR01, RT01, D200
        """
        Quantile-based discretization function.
        """
        return self._default_to_pandas(pandas.qcut, q, **kwargs)

    def _reduce_dimension(self, query_compiler) -> Series | Scalar:
        """
        Try to reduce the dimension of data from the `query_compiler`.

        Parameters
        ----------
        query_compiler : BaseQueryCompiler
            Query compiler to retrieve the data.

        Returns
        -------
        pandas.Series or scalar.
        """
        return query_compiler.to_pandas().squeeze()

    def _validate_dtypes_prod_mean(
        self, axis, numeric_only, ignore_axis=False
    ) -> Series:
        """
        Validate data dtype for `prod` and `mean` methods.

        Parameters
        ----------
        axis : {0, 1}
            Axis to validate over.
        numeric_only : bool
            Whether or not to allow only numeric data.
            If True and non-numeric data is found, exception
            will be raised.
        ignore_axis : bool, default: False
            Whether or not to ignore `axis` parameter.

        Returns
        -------
        Series

        Notes
        -----
        Actually returns unmodified `self` object,
        added for compatibility with Modin DataFrame.
        """
        return self

    def _validate_dtypes_min_max(self, axis, numeric_only) -> Series:
        """
        Validate data dtype for `min` and `max` methods.

        Parameters
        ----------
        axis : {0, 1}
            Axis to validate over.
        numeric_only : bool
            Whether or not to allow only numeric data.
            If True and non-numeric data is found, exception.

        Returns
        -------
        Series

        Notes
        -----
        Actually returns unmodified `self` object,
        added for compatibility with Modin DataFrame.
        """
        return self

    def _validate_dtypes(self, numeric_only=False) -> None:
        """
        Check that all the dtypes are the same.

        Parameters
        ----------
        numeric_only : bool, default: False
            Whether or not to allow only numeric data.
            If True and non-numeric data is found, exception
            will be raised.

        Notes
        -----
        Actually does nothing, added for compatibility with Modin DataFrame.
        """
        pass

    def _get_numeric_data(self, axis: int) -> Series:
        """
        Grab only numeric data from Series.

        Parameters
        ----------
        axis : {0, 1}
            Axis to inspect on having numeric types only.

        Returns
        -------
        Series

        Notes
        -----
        `numeric_only` parameter is not supported by Series, so this method
        does not do anything. The method is added for compatibility with Modin DataFrame.
        """
        return self

    def _update_inplace(self, new_query_compiler) -> None:
        """
        Update the current Series in-place using `new_query_compiler`.

        Parameters
        ----------
        new_query_compiler : BaseQueryCompiler
            QueryCompiler to use to manage the data.
        """
        super(Series, self)._update_inplace(new_query_compiler=new_query_compiler)
        # Propagate changes back to parent so that column in dataframe had the same contents
        if self._parent is not None:
            if self._parent_axis == 0:
                self._parent.loc[self.name] = self
            else:
                self._parent[self.name] = self

    def _create_or_update_from_compiler(
        self, new_query_compiler, inplace=False
    ) -> Union[Series, None]:
        """
        Return or update a Series with given `new_query_compiler`.

        Parameters
        ----------
        new_query_compiler : PandasQueryCompiler
            QueryCompiler to use to manage the data.
        inplace : bool, default: False
            Whether or not to perform update or creation inplace.

        Returns
        -------
        Series or None
            None if update was done, Series otherwise.
        """
        assert (
            isinstance(new_query_compiler, type(self._query_compiler))
            or type(new_query_compiler) in self._query_compiler.__class__.__bases__
        ), "Invalid Query Compiler object: {}".format(type(new_query_compiler))
        if not inplace and new_query_compiler.is_series_like():
            return self.__constructor__(query_compiler=new_query_compiler)
        elif not inplace:
            # This can happen with things like `reset_index` where we can add columns.
            from .dataframe import DataFrame

            return DataFrame(query_compiler=new_query_compiler)
        else:
            self._update_inplace(new_query_compiler=new_query_compiler)

    def _prepare_inter_op(self, other) -> tuple[Series, Series]:
        """
        Prepare `self` and `other` for further interaction.

        Parameters
        ----------
        other : Series or scalar value
            Another object `self` should interact with.

        Returns
        -------
        Series
            Prepared `self`.
        Series
            Prepared `other`.
        """
        if isinstance(other, Series):
            names_different = self.name != other.name
            # NB: if we don't need a rename, do the interaction with shallow
            # copies so that we preserve obj.index._id. It's fine to work
            # with shallow copies because we'll discard the copies but keep
            # the result after the interaction opreation. We can't do a rename
            # on shallow copies because we'll mutate the original objects.
            new_self = self.copy(deep=names_different)
            new_other = other.copy(deep=names_different)
            if names_different:
                new_self.name = new_other.name = MODIN_UNNAMED_SERIES_LABEL
        else:
            new_self = self
            new_other = other
        return new_self, new_other

    def _getitem(self, key) -> Union[Series, Scalar]:
        """
        Get the data specified by `key` for this Series.

        Parameters
        ----------
        key : Any
            Column id to retrieve from Series.

        Returns
        -------
        Series or scalar
            Retrieved data.
        """
        key = apply_if_callable(key, self)
        if isinstance(key, Series) and key.dtype == np.bool_:
            # This ends up being significantly faster than looping through and getting
            # each item individually.
            key = key._to_pandas()
        if is_bool_indexer(key):
            return self.__constructor__(
                query_compiler=self._query_compiler.getitem_row_array(
                    pandas.RangeIndex(len(self))[key]
                )
            )
        # TODO: More efficiently handle `tuple` case for `Series.__getitem__`
        if isinstance(key, tuple):
            return self._default_to_pandas(pandas.Series.__getitem__, key)

        if not is_list_like(key):
            reduce_dimension = True
            key = [key]
        else:
            reduce_dimension = False
        # The check for whether or not `key` is in `keys()` will throw a TypeError
        # if the object is not hashable. When that happens, we just assume the
        # key is a list-like of row positions.
        try:
            is_indexer = all(k in self.keys() for k in key)
        except TypeError:
            is_indexer = False
        row_positions = self.index.get_indexer_for(key) if is_indexer else key
        if not all(is_integer(x) for x in row_positions):
            raise KeyError(key[0] if reduce_dimension else key)
        result = self._query_compiler.getitem_row_array(row_positions)

        if reduce_dimension:
            return self._reduce_dimension(result)
        return self.__constructor__(query_compiler=result)

    def _repartition(self) -> Series:
        """
        Repartitioning Series to get ideal partitions inside.

        Allows to improve performance where the query compiler can't improve
        yet by doing implicit repartitioning.

        Returns
        -------
        Series
            The repartitioned Series.
        """
        return super()._repartition(axis=0)

    # Persistance support methods - BEGIN
    @classmethod
    def _inflate_light(cls, query_compiler, name, source_pid) -> Series:
        """
        Re-creates the object from previously-serialized lightweight representation.

        The method is used for faster but not disk-storable persistence.

        Parameters
        ----------
        query_compiler : BaseQueryCompiler
            Query compiler to use for object re-creation.
        name : str
            The name to give to the new object.
        source_pid : int
            Determines whether a Modin or pandas object needs to be created.
            Modin objects are created only on the main process.

        Returns
        -------
        Series
            New Series based on the `query_compiler`.
        """
        if os.getpid() != source_pid:
            res = query_compiler.to_pandas()
            # at the query compiler layer, `to_pandas` always returns a DataFrame,
            # even if it stores a Series, as a single-column DataFrame
            if res.columns == [MODIN_UNNAMED_SERIES_LABEL]:
                res = res.squeeze(axis=1)
                res.name = None
            return res
        # The current logic does not involve creating Modin objects
        # and manipulation with them in worker processes
        return cls(query_compiler=query_compiler, name=name)

    @classmethod
    def _inflate_full(cls, pandas_series, source_pid) -> Series:
        """
        Re-creates the object from previously-serialized disk-storable representation.

        Parameters
        ----------
        pandas_series : pandas.Series
            Data to use for object re-creation.
        source_pid : int
            Determines whether a Modin or pandas object needs to be created.
            Modin objects are created only on the main process.

        Returns
        -------
        Series
            New Series based on the `pandas_series`.
        """
        if os.getpid() != source_pid:
            return pandas_series
        # The current logic does not involve creating Modin objects
        # and manipulation with them in worker processes
        return cls(data=pandas_series)

    def __reduce__(self):
        self._query_compiler.finalize()
        pid = os.getpid()
        if (
            PersistentPickle.get()
            or not self._query_compiler.support_materialization_in_worker_process()
        ):
            return self._inflate_full, (self._to_pandas(), pid)
        return self._inflate_light, (self._query_compiler, self.name, pid)

    # Persistance support methods - END

    @doc(SET_BACKEND_DOC, class_name=__qualname__)
    def set_backend(
        self,
        backend: str,
        inplace: bool = False,
        *,
        switch_operation: Optional[str] = None,
    ) -> Optional[Self]:
        # A series which is moved, potentially without its parent needs to
        # have it's parent reset. This is aligned with CoW chained assigment
        # semantics as well, but it is a little different from existing modin
        # semantics. This is why we only do this for hybrid and inplace
        # modification.
        if (
            inplace
            and self._parent is not None
            and backend != self._parent.get_backend()
        ):
            self._parent = None
        return super().set_backend(
            backend=backend, inplace=inplace, switch_operation=switch_operation
        )

    move_to = set_backend

    @doc(GET_BACKEND_DOC, class_name=__qualname__)
    @disable_logging
    def get_backend(self) -> str:
        return super().get_backend()

    @disable_logging
    @_inherit_docstrings(BasePandasDataset._copy_into)
    def _copy_into(self, other: Series):
        other._query_compiler = self._query_compiler
        other._siblings = self._siblings
        return None


================================================
FILE: modin/pandas/series_utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Implement Series's accessors public API as pandas does.

Accessors: `Series.cat`, `Series.str`, `Series.dt`
"""

from __future__ import annotations

import re
from functools import cached_property
from typing import TYPE_CHECKING

import numpy as np
import pandas
from pandas._libs import lib

from modin.logging import ClassLogger
from modin.utils import _inherit_docstrings

if TYPE_CHECKING:
    from datetime import tzinfo

    from pandas._typing import npt

    from modin.core.storage_formats import BaseQueryCompiler
    from modin.pandas import Series


@_inherit_docstrings(pandas.core.arrays.arrow.ListAccessor)
class ListAccessor(ClassLogger):
    _series: Series
    _query_compiler: BaseQueryCompiler

    def __init__(self, data: Series = None):
        self._series = data
        self._query_compiler = data._query_compiler

    @cached_property
    def _Series(self) -> Series:  # noqa: GL08
        # to avoid cyclic import
        from .series import Series

        return Series

    def flatten(self):
        return self._Series(query_compiler=self._query_compiler.list_flatten())

    def len(self):
        return self._Series(query_compiler=self._query_compiler.list_len())

    def __getitem__(self, key):
        return self._Series(
            query_compiler=self._query_compiler.list__getitem__(key=key)
        )


@_inherit_docstrings(pandas.core.arrays.arrow.StructAccessor)
class StructAccessor(ClassLogger):
    _series: Series
    _query_compiler: BaseQueryCompiler

    def __init__(self, data: Series = None):
        self._series = data
        self._query_compiler = data._query_compiler

    @cached_property
    def _Series(self) -> Series:  # noqa: GL08
        # to avoid cyclic import
        from modin.pandas.series import Series

        return Series

    @property
    def dtypes(self):
        return self._Series(query_compiler=self._query_compiler.struct_dtypes())

    def field(self, name_or_index):
        return self._Series(
            query_compiler=self._query_compiler.struct_field(
                name_or_index=name_or_index
            )
        )

    def explode(self):
        from modin.pandas.dataframe import DataFrame

        return DataFrame(query_compiler=self._query_compiler.struct_explode())


@_inherit_docstrings(pandas.core.arrays.categorical.CategoricalAccessor)
class CategoryMethods(ClassLogger):
    _series: Series
    _query_compiler: BaseQueryCompiler

    def __init__(self, data: Series):
        self._series = data
        self._query_compiler = data._query_compiler

    @cached_property
    def _Series(self) -> Series:  # noqa: GL08
        # to avoid cyclic import
        from modin.pandas.series import Series

        return Series

    @property
    def categories(self):
        return self._series.dtype.categories

    @categories.setter
    def categories(self, categories):
        def set_categories(series, categories):
            series.cat.categories = categories

        self._series._default_to_pandas(set_categories, categories=categories)

    @property
    def ordered(self):
        return self._series.dtype.ordered

    @property
    def codes(self):
        return self._Series(query_compiler=self._query_compiler.cat_codes())

    def rename_categories(self, new_categories):
        return self._default_to_pandas(
            pandas.Series.cat.rename_categories, new_categories
        )

    def reorder_categories(self, new_categories, ordered=None):
        return self._default_to_pandas(
            pandas.Series.cat.reorder_categories,
            new_categories,
            ordered=ordered,
        )

    def add_categories(self, new_categories):
        return self._default_to_pandas(pandas.Series.cat.add_categories, new_categories)

    def remove_categories(self, removals):
        return self._default_to_pandas(pandas.Series.cat.remove_categories, removals)

    def remove_unused_categories(self):
        return self._default_to_pandas(pandas.Series.cat.remove_unused_categories)

    def set_categories(self, new_categories, ordered=None, rename=False):
        return self._default_to_pandas(
            pandas.Series.cat.set_categories,
            new_categories,
            ordered=ordered,
            rename=rename,
        )

    def as_ordered(self):
        return self._default_to_pandas(pandas.Series.cat.as_ordered)

    def as_unordered(self):
        return self._default_to_pandas(pandas.Series.cat.as_unordered)

    def _default_to_pandas(self, op, *args, **kwargs):
        """
        Convert `self` to pandas type and call a pandas cat.`op` on it.

        Parameters
        ----------
        op : str
            Name of pandas function.
        *args : list
            Additional positional arguments to be passed in `op`.
        **kwargs : dict
            Additional keywords arguments to be passed in `op`.

        Returns
        -------
        object
            Result of operation.
        """
        return self._series._default_to_pandas(
            lambda series: op(series.cat, *args, **kwargs)
        )


@_inherit_docstrings(pandas.core.strings.accessor.StringMethods)
class StringMethods(ClassLogger):
    _series: Series
    _query_compiler: BaseQueryCompiler

    def __init__(self, data: Series):
        # Check if dtypes is objects

        self._series = data
        self._query_compiler = data._query_compiler

    @cached_property
    def _Series(self) -> Series:  # noqa: GL08
        # to avoid cyclic import
        from .series import Series

        return Series

    def casefold(self):
        return self._Series(query_compiler=self._query_compiler.str_casefold())

    def cat(self, others=None, sep=None, na_rep=None, join="left"):
        if isinstance(others, self._Series):
            others = others._to_pandas()
        compiler_result = self._query_compiler.str_cat(
            others=others, sep=sep, na_rep=na_rep, join=join
        )
        # if others is None, result is a string. otherwise, it's a series.
        return (
            compiler_result.to_pandas().squeeze()
            if others is None
            else self._Series(query_compiler=compiler_result)
        )

    def decode(self, encoding, errors="strict", dtype=None):
        return self._Series(
            query_compiler=self._query_compiler.str_decode(encoding, errors, dtype)
        )

    def split(self, pat=None, *, n=-1, expand=False, regex=None):
        if expand:
            from .dataframe import DataFrame

            return DataFrame(
                query_compiler=self._query_compiler.str_split(
                    pat=pat, n=n, expand=True, regex=regex
                )
            )
        else:
            return self._Series(
                query_compiler=self._query_compiler.str_split(
                    pat=pat, n=n, expand=expand, regex=regex
                )
            )

    def rsplit(self, pat=None, *, n=-1, expand=False):
        if not pat and pat is not None:
            raise ValueError("rsplit() requires a non-empty pattern match.")

        if expand:
            from .dataframe import DataFrame

            return DataFrame(
                query_compiler=self._query_compiler.str_rsplit(
                    pat=pat, n=n, expand=True
                )
            )
        else:
            return self._Series(
                query_compiler=self._query_compiler.str_rsplit(
                    pat=pat, n=n, expand=expand
                )
            )

    def get(self, i):
        return self._Series(query_compiler=self._query_compiler.str_get(i))

    def join(self, sep):
        if sep is None:
            raise AttributeError("'NoneType' object has no attribute 'join'")
        return self._Series(query_compiler=self._query_compiler.str_join(sep))

    def get_dummies(self, sep="|"):
        return self._Series(query_compiler=self._query_compiler.str_get_dummies(sep))

    def contains(self, pat, case=True, flags=0, na=lib.no_default, regex=True):
        if pat is None and not case:
            raise AttributeError("'NoneType' object has no attribute 'upper'")
        if na is lib.no_default:
            na = None
        return self._Series(
            query_compiler=self._query_compiler.str_contains(
                pat, case=case, flags=flags, na=na, regex=regex
            )
        )

    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=False):
        if not (isinstance(repl, str) or callable(repl)):
            raise TypeError("repl must be a string or callable")
        return self._Series(
            query_compiler=self._query_compiler.str_replace(
                pat, repl, n=n, case=case, flags=flags, regex=regex
            )
        )

    def pad(self, width, side="left", fillchar=" "):
        if len(fillchar) != 1:
            raise TypeError("fillchar must be a character, not str")
        return self._Series(
            query_compiler=self._query_compiler.str_pad(
                width, side=side, fillchar=fillchar
            )
        )

    def center(self, width, fillchar=" "):
        if len(fillchar) != 1:
            raise TypeError("fillchar must be a character, not str")
        return self._Series(
            query_compiler=self._query_compiler.str_center(width, fillchar=fillchar)
        )

    def ljust(self, width, fillchar=" "):
        if len(fillchar) != 1:
            raise TypeError("fillchar must be a character, not str")
        return self._Series(
            query_compiler=self._query_compiler.str_ljust(width, fillchar=fillchar)
        )

    def rjust(self, width, fillchar=" "):
        if len(fillchar) != 1:
            raise TypeError("fillchar must be a character, not str")
        return self._Series(
            query_compiler=self._query_compiler.str_rjust(width, fillchar=fillchar)
        )

    def zfill(self, width):
        return self._Series(query_compiler=self._query_compiler.str_zfill(width))

    def wrap(self, width, **kwargs):
        if width <= 0:
            raise ValueError("invalid width {} (must be > 0)".format(width))
        return self._Series(
            query_compiler=self._query_compiler.str_wrap(width, **kwargs)
        )

    def slice(self, start=None, stop=None, step=None):
        if step == 0:
            raise ValueError("slice step cannot be zero")
        return self._Series(
            query_compiler=self._query_compiler.str_slice(
                start=start, stop=stop, step=step
            )
        )

    def slice_replace(self, start=None, stop=None, repl=None):
        return self._Series(
            query_compiler=self._query_compiler.str_slice_replace(
                start=start, stop=stop, repl=repl
            )
        )

    def count(self, pat, flags=0):
        if not isinstance(pat, (str, re.Pattern)):
            raise TypeError("first argument must be string or compiled pattern")
        return self._Series(
            query_compiler=self._query_compiler.str_count(pat, flags=flags)
        )

    def startswith(self, pat, na=lib.no_default):
        if na is lib.no_default:
            na = None
        return self._Series(
            query_compiler=self._query_compiler.str_startswith(pat, na=na)
        )

    def encode(self, encoding, errors="strict"):
        return self._Series(
            query_compiler=self._query_compiler.str_encode(encoding, errors)
        )

    def endswith(self, pat, na=lib.no_default):
        if na is lib.no_default:
            na = None
        return self._Series(
            query_compiler=self._query_compiler.str_endswith(pat, na=na)
        )

    def findall(self, pat, flags=0):
        if not isinstance(pat, (str, re.Pattern)):
            raise TypeError("first argument must be string or compiled pattern")
        return self._Series(
            query_compiler=self._query_compiler.str_findall(pat, flags=flags)
        )

    def fullmatch(self, pat, case=True, flags=0, na=lib.no_default):
        if not isinstance(pat, (str, re.Pattern)):
            raise TypeError("first argument must be string or compiled pattern")
        if na is lib.no_default:
            na = None
        return self._Series(
            query_compiler=self._query_compiler.str_fullmatch(
                pat, case=case, flags=flags, na=na
            )
        )

    def match(self, pat, case=True, flags=0, na=lib.no_default):
        if not isinstance(pat, (str, re.Pattern)):
            raise TypeError("first argument must be string or compiled pattern")
        if na is lib.no_default:
            na = None
        return self._Series(
            query_compiler=self._query_compiler.str_match(
                pat, case=case, flags=flags, na=na
            )
        )

    def extract(self, pat, flags=0, expand=True):
        query_compiler = self._query_compiler.str_extract(
            pat, flags=flags, expand=expand
        )
        from .dataframe import DataFrame

        return (
            DataFrame(query_compiler=query_compiler)
            if expand or re.compile(pat).groups > 1
            else self._Series(query_compiler=query_compiler)
        )

    def extractall(self, pat, flags=0):
        return self._Series(
            query_compiler=self._query_compiler.str_extractall(pat, flags)
        )

    def len(self):
        return self._Series(query_compiler=self._query_compiler.str_len())

    def strip(self, to_strip=None):
        return self._Series(
            query_compiler=self._query_compiler.str_strip(to_strip=to_strip)
        )

    def rstrip(self, to_strip=None):
        return self._Series(
            query_compiler=self._query_compiler.str_rstrip(to_strip=to_strip)
        )

    def lstrip(self, to_strip=None):
        return self._Series(
            query_compiler=self._query_compiler.str_lstrip(to_strip=to_strip)
        )

    def partition(self, sep=" ", expand=True):
        if sep is not None and len(sep) == 0:
            raise ValueError("empty separator")

        from .dataframe import DataFrame

        return (DataFrame if expand else self._Series)(
            query_compiler=self._query_compiler.str_partition(sep=sep, expand=expand)
        )

    def removeprefix(self, prefix):
        return self._Series(
            query_compiler=self._query_compiler.str_removeprefix(prefix)
        )

    def removesuffix(self, suffix):
        return self._Series(
            query_compiler=self._query_compiler.str_removesuffix(suffix)
        )

    def repeat(self, repeats):
        return self._Series(query_compiler=self._query_compiler.str_repeat(repeats))

    def rpartition(self, sep=" ", expand=True):
        if sep is not None and len(sep) == 0:
            raise ValueError("empty separator")

        from .dataframe import DataFrame

        return (DataFrame if expand else self._Series)(
            query_compiler=self._query_compiler.str_rpartition(sep=sep, expand=expand)
        )

    def lower(self):
        return self._Series(query_compiler=self._query_compiler.str_lower())

    def upper(self):
        return self._Series(query_compiler=self._query_compiler.str_upper())

    def title(self):
        return self._Series(query_compiler=self._query_compiler.str_title())

    def find(self, sub, start=0, end=None):
        if not isinstance(sub, str):
            raise TypeError(
                "expected a string object, not {0}".format(type(sub).__name__)
            )
        return self._Series(
            query_compiler=self._query_compiler.str_find(sub, start=start, end=end)
        )

    def rfind(self, sub, start=0, end=None):
        if not isinstance(sub, str):
            raise TypeError(
                "expected a string object, not {0}".format(type(sub).__name__)
            )
        return self._Series(
            query_compiler=self._query_compiler.str_rfind(sub, start=start, end=end)
        )

    def index(self, sub, start=0, end=None):
        if not isinstance(sub, str):
            raise TypeError(
                "expected a string object, not {0}".format(type(sub).__name__)
            )
        return self._Series(
            query_compiler=self._query_compiler.str_index(sub, start=start, end=end)
        )

    def rindex(self, sub, start=0, end=None):
        if not isinstance(sub, str):
            raise TypeError(
                "expected a string object, not {0}".format(type(sub).__name__)
            )
        return self._Series(
            query_compiler=self._query_compiler.str_rindex(sub, start=start, end=end)
        )

    def capitalize(self):
        return self._Series(query_compiler=self._query_compiler.str_capitalize())

    def swapcase(self):
        return self._Series(query_compiler=self._query_compiler.str_swapcase())

    def normalize(self, form):
        return self._Series(query_compiler=self._query_compiler.str_normalize(form))

    def translate(self, table):
        return self._Series(query_compiler=self._query_compiler.str_translate(table))

    def isalnum(self):
        return self._Series(query_compiler=self._query_compiler.str_isalnum())

    def isalpha(self):
        return self._Series(query_compiler=self._query_compiler.str_isalpha())

    def isdigit(self):
        return self._Series(query_compiler=self._query_compiler.str_isdigit())

    def isspace(self):
        return self._Series(query_compiler=self._query_compiler.str_isspace())

    def islower(self):
        return self._Series(query_compiler=self._query_compiler.str_islower())

    def isupper(self):
        return self._Series(query_compiler=self._query_compiler.str_isupper())

    def istitle(self):
        return self._Series(query_compiler=self._query_compiler.str_istitle())

    def isnumeric(self):
        return self._Series(query_compiler=self._query_compiler.str_isnumeric())

    def isdecimal(self):
        return self._Series(query_compiler=self._query_compiler.str_isdecimal())

    def __getitem__(self, key):  # noqa: GL08
        return self._Series(query_compiler=self._query_compiler.str___getitem__(key))

    def _default_to_pandas(self, op, *args, **kwargs):
        """
        Convert `self` to pandas type and call a pandas str.`op` on it.

        Parameters
        ----------
        op : str
            Name of pandas function.
        *args : list
            Additional positional arguments to be passed in `op`.
        **kwargs : dict
            Additional keywords arguments to be passed in `op`.

        Returns
        -------
        object
            Result of operation.
        """
        return self._series._default_to_pandas(
            lambda series: op(series.str, *args, **kwargs)
        )


@_inherit_docstrings(pandas.core.indexes.accessors.CombinedDatetimelikeProperties)
class DatetimeProperties(ClassLogger):  # noqa: GL08
    _series: Series
    _query_compiler: BaseQueryCompiler

    def __init__(self, data: Series):
        self._series = data
        self._query_compiler = data._query_compiler

    @cached_property
    def _Series(self) -> Series:  # noqa: GL08
        # to avoid cyclic import
        from .series import Series

        return Series

    @property
    def date(self):
        return self._Series(query_compiler=self._query_compiler.dt_date())

    @property
    def time(self):
        return self._Series(query_compiler=self._query_compiler.dt_time())

    @property
    def timetz(self):
        return self._Series(query_compiler=self._query_compiler.dt_timetz())

    @property
    def year(self):
        return self._Series(query_compiler=self._query_compiler.dt_year())

    @property
    def month(self):
        return self._Series(query_compiler=self._query_compiler.dt_month())

    @property
    def day(self):
        return self._Series(query_compiler=self._query_compiler.dt_day())

    @property
    def hour(self):
        return self._Series(query_compiler=self._query_compiler.dt_hour())

    @property
    def minute(self):
        return self._Series(query_compiler=self._query_compiler.dt_minute())

    @property
    def second(self):
        return self._Series(query_compiler=self._query_compiler.dt_second())

    @property
    def microsecond(self):
        return self._Series(query_compiler=self._query_compiler.dt_microsecond())

    @property
    def nanosecond(self):
        return self._Series(query_compiler=self._query_compiler.dt_nanosecond())

    @property
    def dayofweek(self):
        return self._Series(query_compiler=self._query_compiler.dt_dayofweek())

    day_of_week = dayofweek

    @property
    def weekday(self):
        return self._Series(query_compiler=self._query_compiler.dt_weekday())

    @property
    def dayofyear(self):
        return self._Series(query_compiler=self._query_compiler.dt_dayofyear())

    day_of_year = dayofyear

    @property
    def quarter(self):
        return self._Series(query_compiler=self._query_compiler.dt_quarter())

    @property
    def is_month_start(self):
        return self._Series(query_compiler=self._query_compiler.dt_is_month_start())

    @property
    def is_month_end(self):
        return self._Series(query_compiler=self._query_compiler.dt_is_month_end())

    @property
    def is_quarter_start(self):
        return self._Series(query_compiler=self._query_compiler.dt_is_quarter_start())

    @property
    def is_quarter_end(self):
        return self._Series(query_compiler=self._query_compiler.dt_is_quarter_end())

    @property
    def is_year_start(self):
        return self._Series(query_compiler=self._query_compiler.dt_is_year_start())

    @property
    def is_year_end(self):
        return self._Series(query_compiler=self._query_compiler.dt_is_year_end())

    @property
    def is_leap_year(self):
        return self._Series(query_compiler=self._query_compiler.dt_is_leap_year())

    @property
    def daysinmonth(self):
        return self._Series(query_compiler=self._query_compiler.dt_daysinmonth())

    @property
    def days_in_month(self):
        return self._Series(query_compiler=self._query_compiler.dt_days_in_month())

    @property
    def tz(self) -> "tzinfo | None":
        dtype = self._series.dtype
        if isinstance(dtype, np.dtype):
            return None
        return dtype.tz

    @property
    def freq(self):  # noqa: GL08
        return self._query_compiler.dt_freq().to_pandas().squeeze()

    @property
    def unit(self):  # noqa: GL08
        # use `iloc[0]` to return scalar
        return self._Series(query_compiler=self._query_compiler.dt_unit()).iloc[0]

    def as_unit(self, *args, **kwargs):  # noqa: GL08
        return self._Series(
            query_compiler=self._query_compiler.dt_as_unit(*args, **kwargs)
        )

    def to_period(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_to_period(*args, **kwargs)
        )

    def asfreq(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_asfreq(*args, **kwargs)
        )

    def to_pydatetime(self):
        return self._Series(
            query_compiler=self._query_compiler.dt_to_pydatetime()
        ).to_numpy()

    def tz_localize(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_tz_localize(*args, **kwargs)
        )

    def tz_convert(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_tz_convert(*args, **kwargs)
        )

    def normalize(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_normalize(*args, **kwargs)
        )

    def strftime(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_strftime(*args, **kwargs)
        )

    def round(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_round(*args, **kwargs)
        )

    def floor(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_floor(*args, **kwargs)
        )

    def ceil(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_ceil(*args, **kwargs)
        )

    def month_name(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_month_name(*args, **kwargs)
        )

    def day_name(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_day_name(*args, **kwargs)
        )

    def total_seconds(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_total_seconds(*args, **kwargs)
        )

    def to_pytimedelta(self) -> "npt.NDArray[np.object_]":
        res = self._query_compiler.dt_to_pytimedelta()
        return res.to_numpy()[:, 0]

    @property
    def seconds(self):
        return self._Series(query_compiler=self._query_compiler.dt_seconds())

    @property
    def days(self):
        return self._Series(query_compiler=self._query_compiler.dt_days())

    @property
    def microseconds(self):
        return self._Series(query_compiler=self._query_compiler.dt_microseconds())

    @property
    def nanoseconds(self):
        return self._Series(query_compiler=self._query_compiler.dt_nanoseconds())

    @property
    def components(self):
        from .dataframe import DataFrame

        return DataFrame(query_compiler=self._query_compiler.dt_components())

    def isocalendar(self):
        from .dataframe import DataFrame

        return DataFrame(query_compiler=self._query_compiler.dt_isocalendar())

    @property
    def qyear(self):  # noqa: GL08
        return self._Series(query_compiler=self._query_compiler.dt_qyear())

    @property
    def start_time(self):
        return self._Series(query_compiler=self._query_compiler.dt_start_time())

    @property
    def end_time(self):
        return self._Series(query_compiler=self._query_compiler.dt_end_time())

    def to_timestamp(self, *args, **kwargs):
        return self._Series(
            query_compiler=self._query_compiler.dt_to_timestamp(*args, **kwargs)
        )


================================================
FILE: modin/pandas/testing/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Public testing utility functions.
"""

from __future__ import annotations

from typing import Literal

from pandas._libs import lib
from pandas.testing import assert_extension_array_equal
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from pandas.testing import assert_index_equal
from pandas.testing import assert_series_equal as pd_assert_series_equal

from modin.utils import _inherit_docstrings, try_cast_to_pandas


@_inherit_docstrings(pd_assert_frame_equal, apilink="pandas.testing.assert_frame_equal")
def assert_frame_equal(
    left,
    right,
    check_dtype: bool | Literal["equiv"] = True,
    check_index_type: bool | Literal["equiv"] = "equiv",
    check_column_type: bool | Literal["equiv"] = "equiv",
    check_frame_type: bool = True,
    check_names: bool = True,
    by_blocks: bool = False,
    check_exact: bool | lib.NoDefault = lib.no_default,
    check_datetimelike_compat: bool = False,
    check_categorical: bool = True,
    check_like: bool = False,
    check_freq: bool = True,
    check_flags: bool = True,
    rtol: float | lib.NoDefault = lib.no_default,
    atol: float | lib.NoDefault = lib.no_default,
    obj: str = "DataFrame",
) -> None:
    left = try_cast_to_pandas(left)
    right = try_cast_to_pandas(right)
    pd_assert_frame_equal(
        left,
        right,
        check_dtype=check_dtype,
        check_index_type=check_index_type,
        check_column_type=check_column_type,
        check_frame_type=check_frame_type,
        check_names=check_names,
        by_blocks=by_blocks,
        check_exact=check_exact,
        check_datetimelike_compat=check_datetimelike_compat,
        check_categorical=check_categorical,
        check_like=check_like,
        check_freq=check_freq,
        check_flags=check_flags,
        rtol=rtol,
        atol=atol,
        obj=obj,
    )


@_inherit_docstrings(
    pd_assert_series_equal, apilink="pandas.testing.assert_series_equal"
)
def assert_series_equal(
    left,
    right,
    check_dtype: bool | Literal["equiv"] = True,
    check_index_type: bool | Literal["equiv"] = "equiv",
    check_series_type: bool = True,
    check_names: bool = True,
    check_exact: bool | lib.NoDefault = lib.no_default,
    check_datetimelike_compat: bool = False,
    check_categorical: bool = True,
    check_category_order: bool = True,
    check_freq: bool = True,
    check_flags: bool = True,
    rtol: float | lib.NoDefault = lib.no_default,
    atol: float | lib.NoDefault = lib.no_default,
    obj: str = "Series",
    *,
    check_index: bool = True,
    check_like: bool = False,
) -> None:
    left = try_cast_to_pandas(left)
    right = try_cast_to_pandas(right)
    pd_assert_series_equal(
        left,
        right,
        check_dtype=check_dtype,
        check_index_type=check_index_type,
        check_series_type=check_series_type,
        check_names=check_names,
        check_exact=check_exact,
        check_datetimelike_compat=check_datetimelike_compat,
        check_categorical=check_categorical,
        check_category_order=check_category_order,
        check_freq=check_freq,
        check_flags=check_flags,
        rtol=rtol,
        atol=atol,
        obj=obj,
        check_index=check_index,
        check_like=check_like,
    )


__all__ = [
    "assert_extension_array_equal",
    "assert_frame_equal",
    "assert_series_equal",
    "assert_index_equal",
]


================================================
FILE: modin/pandas/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement utils for pandas component."""

from __future__ import annotations

from typing import Any, Iterator, Optional, Tuple

import numpy as np
import pandas
from pandas._typing import AggFuncType, AggFuncTypeBase, AggFuncTypeDict, IndexLabel
from pandas.util._decorators import doc

from modin.utils import hashable

_doc_binary_operation = """
Return {operation} of {left} and `{right}` (binary operator `{bin_op}`).

Parameters
----------
{right} : {right_type}
    The second operand to perform computation.

Returns
-------
{returns}
"""

SET_DATAFRAME_ATTRIBUTE_WARNING = (
    "Modin doesn't allow columns to be created via a new attribute name - see "
    + "https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access"
)


GET_BACKEND_DOC = """
Get the backend for this ``{class_name}``.

Returns
-------
str
    The name of the backend.
"""

SET_BACKEND_DOC = """
Move the data in this ``{class_name}`` from its current backend to the given one.

Further operations on this ``{class_name}`` will use the new backend instead of
the current one.

Parameters
----------
backend : str
    The name of the backend to set.
inplace : bool, default: False
    Whether to modify this ``{class_name}`` in place.
switch_operation : Optional[str], default: None
    The name of the operation that triggered the set_backend call.
    Internal argument used for displaying progress bar information.

Returns
-------
``{class_name}`` or None
    If ``inplace`` is False, returns a new instance of the ``{class_name}``
    with the given backend. If ``inplace`` is ``True``, returns None.

Notes
-----
This method will attempt to use the starting and new backend's move_from or move_to
methods if the backends implement them. Otherwise, it will

    1) convert the data in this ``{class_name}`` to a pandas DataFrame in this
       Python process
    2) load the data from pandas to the new backend.

Either step may be slow and/or memory-intensive, especially if this
``{class_name}``'s data is large, or one or both of the backends do not store
their data locally.
"""


def cast_function_modin2pandas(func):
    """
    Replace Modin functions with pandas functions if `func` is callable.

    Parameters
    ----------
    func : object

    Returns
    -------
    object
    """
    if callable(func) and (module := getattr(func, "__module__", None)) is not None:
        if module == "modin.pandas.series":
            func = getattr(pandas.Series, func.__name__)
        elif module in ("modin.pandas.dataframe", "modin.pandas.base"):
            # FIXME: when the method is defined in `modin.pandas.base` file, then the
            # type cannot be determined, in general there may be an error, but at the
            # moment it is better.
            func = getattr(pandas.DataFrame, func.__name__)
    return func


def is_scalar(obj):
    """
    Return True if given object is scalar.

    This method works the same as is_scalar method from pandas but
    it is optimized for Modin frames. For BasePandasDataset objects
    pandas version of is_scalar tries to access missing attribute
    causing index scan. This triggers execution for lazy frames and
    we avoid it by handling BasePandasDataset objects separately.

    Parameters
    ----------
    obj : object
        Object to check.

    Returns
    -------
    bool
        True if given object is scalar and False otherwise.
    """
    from pandas.api.types import is_scalar as pandas_is_scalar

    from .base import BasePandasDataset

    return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj)


def get_pandas_backend(dtypes: pandas.Series) -> str | None:
    """
    Determine the backend based on the `dtypes`.

    Parameters
    ----------
    dtypes : pandas.Series
        DataFrame dtypes.

    Returns
    -------
    str | None
        Backend name.
    """
    backend = None
    if any(isinstance(x, pandas.ArrowDtype) for x in dtypes):
        backend = "pyarrow"
    return backend


def is_full_grab_slice(slc, sequence_len=None):
    """
    Check that the passed slice grabs the whole sequence.

    Parameters
    ----------
    slc : slice
        Slice object to check.
    sequence_len : int, optional
        Length of the sequence to index with the passed `slc`.
        If not specified the function won't be able to check whether
        ``slc.stop`` is equal or greater than the sequence length to
        consider `slc` to be a full-grab, and so, only slices with
        ``.stop is None`` are considered to be a full-grab.

    Returns
    -------
    bool
    """
    assert isinstance(slc, slice), "slice object required"
    return (
        slc.start in (None, 0)
        and slc.step in (None, 1)
        and (
            slc.stop is None or (sequence_len is not None and slc.stop >= sequence_len)
        )
    )


def from_modin_frame_to_mi(df, sortorder=None, names=None):
    """
    Make a pandas.MultiIndex from a DataFrame.

    Parameters
    ----------
    df : DataFrame
        DataFrame to be converted to pandas.MultiIndex.
    sortorder : int, default: None
        Level of sortedness (must be lexicographically sorted by that
        level).
    names : list-like, optional
        If no names are provided, use the column names, or tuple of column
        names if the columns is a MultiIndex. If a sequence, overwrite
        names with the given sequence.

    Returns
    -------
    pandas.MultiIndex
        The pandas.MultiIndex representation of the given DataFrame.
    """
    from .dataframe import DataFrame

    if isinstance(df, DataFrame):
        from modin.error_message import ErrorMessage

        ErrorMessage.default_to_pandas("`MultiIndex.from_frame`")
        df = df._to_pandas()
    return _original_pandas_MultiIndex_from_frame(df, sortorder, names)


def is_label(obj, label, axis=0):
    """
    Check whether or not 'obj' contain column or index level with name 'label'.

    Parameters
    ----------
    obj : modin.pandas.DataFrame, modin.pandas.Series or modin.core.storage_formats.base.BaseQueryCompiler
        Object to check.
    label : object
        Label name to check.
    axis : {0, 1}, default: 0
        Axis to search for `label` along.

    Returns
    -------
    bool
        True if check is successful, False otherwise.
    """
    qc = getattr(obj, "_query_compiler", obj)
    return hashable(label) and (
        label in qc.get_axis(axis ^ 1) or label in qc.get_index_names(axis)
    )


def check_both_not_none(option1, option2):
    """
    Check that both `option1` and `option2` are not None.

    Parameters
    ----------
    option1 : Any
        First object to check if not None.
    option2 : Any
        Second object to check if not None.

    Returns
    -------
    bool
        True if both option1 and option2 are not None, False otherwise.
    """
    return not (option1 is None or option2 is None)


def broadcast_item(
    obj,
    row_lookup,
    col_lookup,
    item,
    need_columns_reindex: bool = True,
    sort_lookups_and_item: bool = True,
):
    """
    Use NumPy to broadcast or reshape item with reindexing.

    Parameters
    ----------
    obj : DataFrame or Series or query compiler
        The object containing the necessary information about the axes.
    row_lookup : slice or scalar
        The global row index to locate inside of `item`.
    col_lookup : range, array, list, slice or scalar
        The global col index to locate inside of `item`.
    item : DataFrame, Series, or query_compiler
        Value that should be broadcast to a new shape of `to_shape`.
    need_columns_reindex : bool, default: True
        In the case of assigning columns to a dataframe (broadcasting is
        part of the flow), reindexing is not needed.
    sort_lookups_and_item : bool, default: True
        If set, sort the lookups in ascending order and the item to match. This is necessary to
        ensure writes across multiple partitions are ordered correctly when the lookups are unsorted.

    Returns
    -------
    (np.ndarray, Optional[Series], array-like, array-like)
        * np.ndarray - `item` after it was broadcasted to `to_shape`.
        * Series - item's dtypes.
        * array-like - sorted version of `row_lookup` (may or may not be the same reference)
        * array-like - sorted version of `col_lookup` (may or may not be the same reference)

    Raises
    ------
    ValueError
        1) If `row_lookup` or `col_lookup` contains values missing in
        DataFrame/Series index or columns correspondingly.
        2) If `item` cannot be broadcast from its own shape to `to_shape`.

    Notes
    -----
    NumPy is memory efficient, there shouldn't be performance issue.
    """
    # It is valid to pass a DataFrame or Series to __setitem__ that is larger than
    # the target the user is trying to overwrite.

    from .dataframe import DataFrame
    from .series import Series

    new_row_len = (
        len(obj.index[row_lookup]) if isinstance(row_lookup, slice) else len(row_lookup)
    )
    new_col_len = (
        len(obj.columns[col_lookup])
        if isinstance(col_lookup, slice)
        else len(col_lookup)
    )
    to_shape = new_row_len, new_col_len

    dtypes = None
    if isinstance(item, (pandas.Series, pandas.DataFrame, Series, DataFrame)):
        # convert indices in lookups to names, as pandas reindex expects them to be so
        axes_to_reindex = {}
        index_values = obj.index[row_lookup]
        if not index_values.equals(item.index):
            axes_to_reindex["index"] = index_values
        if need_columns_reindex and isinstance(item, (pandas.DataFrame, DataFrame)):
            column_values = obj.columns[col_lookup]
            if not column_values.equals(item.columns):
                axes_to_reindex["columns"] = column_values
        # New value for columns/index make that reindex add NaN values
        if axes_to_reindex:
            item = item.reindex(**axes_to_reindex)

        dtypes = item.dtypes
        if not isinstance(dtypes, pandas.Series):
            dtypes = pandas.Series([dtypes])

    try:
        # Cast to numpy drop information about heterogeneous types (cast to common)
        # TODO: we shouldn't do that, maybe there should be the if branch
        item = np.array(item)

        def sort_index(lookup: Any) -> np.ndarray:
            """
            Return the argsort and sorted version of the lookup index.

            Values in the lookup are guaranteed by the indexing frontend to be non-negative.

            The sort operation must be stable to ensure proper behavior for iloc set, which
            will use the last item encountered if two items share an index.
            """
            if isinstance(lookup, slice):
                # Special case for if a descending slice is passed
                # Directly calling np.array(slice(...)) does not work
                lookup = range(lookup.start or 0, lookup.stop or 0, lookup.step or 0)
            argsort_index = np.argsort(lookup, kind="stable")
            return argsort_index, np.array(lookup)[argsort_index]

        def should_avoid_sort(lookup: Any) -> bool:
            return (
                not sort_lookups_and_item
                or (
                    isinstance(lookup, (range, pandas.RangeIndex, slice))
                    and lookup.step is not None
                    and lookup.step > 0
                )
                or (isinstance(lookup, slice) and lookup == slice(None))
            )

        # Fast path to avoid sorting for range/RangeIndex, which are already sorted, or the empty slice
        avoid_row_lookup_sort = should_avoid_sort(row_lookup)
        avoid_col_lookup_sort = should_avoid_sort(col_lookup)
        # Sort both the columns and rows if necessary
        if item.ndim >= 2:
            if avoid_row_lookup_sort:
                if not avoid_col_lookup_sort:
                    col_argsort, col_lookup = sort_index(col_lookup)
                    item = item[:, col_argsort]
            elif avoid_col_lookup_sort:
                row_argsort, row_lookup = sort_index(row_lookup)
                item = item[row_argsort, :]
            else:
                row_argsort, row_lookup = sort_index(row_lookup)
                col_argsort, col_lookup = sort_index(col_lookup)
                # Use np.ix_ to handle broadcasting errors
                item = item[np.ix_(row_argsort, col_argsort)]
        elif not avoid_row_lookup_sort:
            # Item is 1D, so only sort row indexer
            row_argsort, row_lookup = sort_index(row_lookup)
            item = item[row_argsort]
        if dtypes is None:
            dtypes = pandas.Series([item.dtype] * len(col_lookup))
        if np.prod(to_shape) == np.prod(item.shape):
            return item.reshape(to_shape), dtypes, row_lookup, col_lookup
        else:
            return np.broadcast_to(item, to_shape), dtypes, row_lookup, col_lookup
    except ValueError:
        from_shape = np.array(item).shape
        raise ValueError(
            f"could not broadcast input array from shape {from_shape} into shape "
            + f"{to_shape}"
        )


def _walk_aggregation_func(
    key: IndexLabel, value: AggFuncType, depth: int = 0
) -> Iterator[Tuple[IndexLabel, AggFuncTypeBase, Optional[str], bool]]:
    """
    Walk over a function from a dictionary-specified aggregation.

    Note: this function is not supposed to be called directly and
    is used by ``walk_aggregation_dict``.

    Parameters
    ----------
    key : IndexLabel
        A key in a dictionary-specified aggregation for the passed `value`.
        This means an index label to apply the `value` functions against.
    value : AggFuncType
        An aggregation function matching the `key`.
    depth : int, default: 0
        Specifies a nesting level for the `value` where ``depth=0`` is when
        you call the function on a raw dictionary value.

    Yields
    ------
    (col: IndexLabel, func: AggFuncTypeBase, func_name: Optional[str], col_renaming_required: bool)
        Yield an aggregation function with its metadata:
            - `col`: column name to apply the function.
            - `func`: aggregation function to apply to the column.
            - `func_name`: custom function name that was specified in the dict.
            - `col_renaming_required`: whether it's required to rename the
                `col` into ``(col, func_name)``.
    """
    col_renaming_required = bool(depth)

    if isinstance(value, (list, tuple)):
        if depth == 0:
            for val in value:
                yield from _walk_aggregation_func(key, val, depth + 1)
        elif depth == 1:
            if len(value) != 2:
                raise ValueError(
                    f"Incorrect rename format. Renamer must consist of exactly two elements, got: {len(value)}."
                )
            func_name, func = value
            yield key, func, func_name, col_renaming_required
        else:
            # pandas doesn't support this as well
            raise NotImplementedError("Nested renaming is not supported.")
    else:
        yield key, value, None, col_renaming_required


def walk_aggregation_dict(
    agg_dict: AggFuncTypeDict,
) -> Iterator[Tuple[IndexLabel, AggFuncTypeBase, Optional[str], bool]]:
    """
    Walk over an aggregation dictionary.

    Parameters
    ----------
    agg_dict : AggFuncTypeDict

    Yields
    ------
    (col: IndexLabel, func: AggFuncTypeBase, func_name: Optional[str], col_renaming_required: bool)
        Yield an aggregation function with its metadata:
            - `col`: column name to apply the function.
            - `func`: aggregation function to apply to the column.
            - `func_name`: custom function name that was specified in the dict.
            - `col_renaming_required`: whether it's required to rename the
                `col` into ``(col, func_name)``.
    """
    for key, value in agg_dict.items():
        yield from _walk_aggregation_func(key, value)


def _doc_binary_op(operation, bin_op, left="Series", right="right", returns="Series"):
    """
    Return callable documenting `Series` or `DataFrame` binary operator.

    Parameters
    ----------
    operation : str
        Operation name.
    bin_op : str
        Binary operation name.
    left : str, default: 'Series'
        The left object to document.
    right : str, default: 'right'
        The right operand name.
    returns : str, default: 'Series'
        Type of returns.

    Returns
    -------
    callable
    """
    if left == "Series":
        right_type = "Series or scalar value"
    elif left == "DataFrame":
        right_type = "DataFrame, Series or scalar value"
    elif left == "BasePandasDataset":
        right_type = "BasePandasDataset or scalar value"
    else:
        raise NotImplementedError(
            f"Only 'BasePandasDataset', `DataFrame` and 'Series' `left` are allowed, actually passed: {left}"
        )
    doc_op = doc(
        _doc_binary_operation,
        operation=operation,
        right=right,
        right_type=right_type,
        bin_op=bin_op,
        returns=returns,
        left=left,
    )

    return doc_op


_original_pandas_MultiIndex_from_frame = pandas.MultiIndex.from_frame
pandas.MultiIndex.from_frame = from_modin_frame_to_mi


================================================
FILE: modin/pandas/window.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement Window and Rolling public API."""

from __future__ import annotations

from typing import TYPE_CHECKING, Optional, Union

import pandas.core.window.rolling
from pandas.core.dtypes.common import is_list_like

from modin.error_message import ErrorMessage
from modin.logging import ClassLogger
from modin.pandas.utils import cast_function_modin2pandas
from modin.utils import _inherit_docstrings

if TYPE_CHECKING:
    from modin.core.storage_formats import BaseQueryCompiler
    from modin.pandas import DataFrame, Series


@_inherit_docstrings(pandas.core.window.rolling.Window)
class Window(ClassLogger):
    _dataframe: Union[DataFrame, Series]
    _query_compiler: BaseQueryCompiler

    def __init__(
        self,
        dataframe: Union[DataFrame, Series],
        window=None,
        min_periods=None,
        center=False,
        win_type=None,
        on=None,
        axis=0,
        closed=None,
        step=None,
        method="single",
    ):
        self._dataframe = dataframe
        self._query_compiler = dataframe._query_compiler
        self.window_kwargs = {
            "window": window,
            "min_periods": min_periods,
            "center": center,
            "win_type": win_type,
            "on": on,
            "axis": axis,
            "closed": closed,
            "step": step,
            "method": method,
        }
        self.axis = axis

    def mean(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.window_mean(
                self.axis, self.window_kwargs, *args, **kwargs
            )
        )

    def sum(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.window_sum(
                self.axis, self.window_kwargs, *args, **kwargs
            )
        )

    def var(self, ddof=1, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.window_var(
                self.axis, self.window_kwargs, ddof, *args, **kwargs
            )
        )

    def std(self, ddof=1, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.window_std(
                self.axis, self.window_kwargs, ddof, *args, **kwargs
            )
        )


@_inherit_docstrings(
    pandas.core.window.rolling.Rolling,
    excluded=[pandas.core.window.rolling.Rolling.__init__],
)
class Rolling(ClassLogger):
    def __init__(
        self,
        dataframe,
        window=None,
        min_periods=None,
        center=False,
        win_type=None,
        on=None,
        axis=0,
        closed=None,
        step=None,
        method="single",
    ):
        if step is not None:
            raise NotImplementedError("step parameter is not implemented yet.")
        self._dataframe = dataframe
        self._query_compiler = dataframe._query_compiler
        self.rolling_kwargs = {
            "window": window,
            "min_periods": min_periods,
            "center": center,
            "win_type": win_type,
            "on": on,
            "axis": axis,
            "closed": closed,
            "step": step,
            "method": method,
        }
        self.axis = axis

    def _call_qc_method(self, method_name, *args, **kwargs):
        """
        Call a query compiler method for the specified rolling aggregation.

        Parameters
        ----------
        method_name : str
            Name of the aggregation.
        *args : tuple
            Positional arguments to pass to the query compiler method.
        **kwargs : dict
            Keyword arguments to pass to the query compiler method.

        Returns
        -------
        BaseQueryCompiler
            QueryCompiler holding the result of the aggregation.
        """
        qc_method = getattr(self._query_compiler, f"rolling_{method_name}")
        return qc_method(self.axis, self.rolling_kwargs, *args, **kwargs)

    def _aggregate(self, method_name, *args, **kwargs):
        """
        Run the specified rolling aggregation.

        Parameters
        ----------
        method_name : str
            Name of the aggregation.
        *args : tuple
            Positional arguments to pass to the aggregation.
        **kwargs : dict
            Keyword arguments to pass to the aggregation.

        Returns
        -------
        DataFrame or Series
            Result of the aggregation.
        """
        qc_result = self._call_qc_method(method_name, *args, **kwargs)
        return self._dataframe.__constructor__(query_compiler=qc_result)

    def count(self):
        return self._aggregate("count")

    def sem(self, *args, **kwargs):
        return self._aggregate("sem", *args, **kwargs)

    def sum(self, *args, **kwargs):
        return self._aggregate("sum", *args, **kwargs)

    def mean(self, *args, **kwargs):
        return self._aggregate("mean", *args, **kwargs)

    def median(self, **kwargs):
        return self._aggregate("median", **kwargs)

    def var(self, ddof=1, *args, **kwargs):
        return self._aggregate("var", ddof, *args, **kwargs)

    def std(self, ddof=1, *args, **kwargs):
        return self._aggregate("std", ddof, *args, **kwargs)

    def min(self, *args, **kwargs):
        return self._aggregate("min", *args, **kwargs)

    def max(self, *args, **kwargs):
        return self._aggregate("max", *args, **kwargs)

    def corr(self, other=None, pairwise=None, *args, **kwargs):
        from .dataframe import DataFrame
        from .series import Series

        if isinstance(other, DataFrame):
            other = other._query_compiler.to_pandas()
        elif isinstance(other, Series):
            other = other._query_compiler.to_pandas().squeeze()

        return self._aggregate("corr", other, pairwise, *args, **kwargs)

    def cov(self, other=None, pairwise=None, ddof: Optional[int] = 1, **kwargs):
        from .dataframe import DataFrame
        from .series import Series

        if isinstance(other, DataFrame):
            other = other._query_compiler.to_pandas()
        elif isinstance(other, Series):
            other = other._query_compiler.to_pandas().squeeze()

        return self._aggregate("cov", other, pairwise, ddof, **kwargs)

    def skew(self, **kwargs):
        return self._aggregate("skew", **kwargs)

    def kurt(self, **kwargs):
        return self._aggregate("kurt", **kwargs)

    def apply(
        self,
        func,
        raw=False,
        engine="cython",
        engine_kwargs=None,
        args=None,
        kwargs=None,
    ):
        func = cast_function_modin2pandas(func)
        return self._aggregate("apply", func, raw, engine, engine_kwargs, args, kwargs)

    def aggregate(
        self,
        func,
        *args,
        **kwargs,
    ):
        from .dataframe import DataFrame

        dataframe = DataFrame(
            query_compiler=self._call_qc_method(
                "aggregate",
                func,
                *args,
                **kwargs,
            )
        )
        if isinstance(self._dataframe, DataFrame):
            return dataframe
        elif is_list_like(func) and dataframe.columns.nlevels > 1:
            dataframe.columns = dataframe.columns.droplevel()
            return dataframe
        else:
            return dataframe.squeeze()

    agg = aggregate

    def quantile(self, q, interpolation="linear", **kwargs):
        return self._aggregate("quantile", q, interpolation, **kwargs)

    def rank(
        self, method="average", ascending=True, pct=False, numeric_only=False, **kwargs
    ):
        return self._aggregate("rank", method, ascending, pct, numeric_only, **kwargs)


@_inherit_docstrings(Rolling)
class RollingGroupby(Rolling):
    def __init__(self, groupby_obj, *args, **kwargs):
        self._as_index = groupby_obj._kwargs.get("as_index", True)
        self._groupby_obj = (
            groupby_obj if self._as_index else groupby_obj._override(as_index=True)
        )
        super().__init__(self._groupby_obj._df, *args, **kwargs)

    def sem(self, *args, **kwargs):
        ErrorMessage.mismatch_with_pandas(
            operation="RollingGroupby.sem() when 'as_index=False'",
            message=(
                "The group columns won't be involved in the aggregation.\n"
                + "See this gh-issue for more information: https://github.com/modin-project/modin/issues/6291"
            ),
        )
        return super().sem(*args, **kwargs)

    def corr(self, other=None, pairwise=None, *args, **kwargs):
        # pandas behavior is that it always assumes that 'as_index=True' for the '.corr()' method
        return super().corr(
            *args, as_index=True, other=other, pairwise=pairwise, **kwargs
        )

    def cov(self, other=None, pairwise=None, ddof: Optional[int] = 1, **kwargs):
        # pandas behavior is that it always assumes that 'as_index=True' for the '.cov()' method
        return super().cov(as_index=True, other=other, pairwise=pairwise, **kwargs)

    def _aggregate(self, method_name, *args, as_index=None, **kwargs):
        """
        Run the specified rolling aggregation.

        Parameters
        ----------
        method_name : str
            Name of the aggregation.
        *args : tuple
            Positional arguments to pass to the aggregation.
        as_index : bool, optional
            Whether the result should have the group labels as index levels or as columns.
            If not specified the parameter value will be taken from groupby kwargs.
        **kwargs : dict
            Keyword arguments to pass to the aggregation.

        Returns
        -------
        DataFrame or Series
            Result of the aggregation.
        """
        res = self._groupby_obj._wrap_aggregation(
            qc_method=type(self._query_compiler).groupby_rolling,
            numeric_only=False,
            agg_args=args,
            agg_kwargs=kwargs,
            agg_func=method_name,
            rolling_kwargs=self.rolling_kwargs,
        )

        if as_index is None:
            as_index = self._as_index

        if not as_index:
            res = res.reset_index(
                level=[i for i in range(len(self._groupby_obj._internal_by))],
                drop=False,
            )

        return res

    def _call_qc_method(self, method_name, *args, **kwargs):
        return self._aggregate(method_name, *args, **kwargs)._query_compiler


@_inherit_docstrings(
    pandas.core.window.expanding.Expanding,
    excluded=[pandas.core.window.expanding.Expanding.__init__],
)
class Expanding(ClassLogger):
    def __init__(self, dataframe, min_periods=1, axis=0, method="single"):
        self._dataframe = dataframe
        self._query_compiler = dataframe._query_compiler
        self.expanding_args = [min_periods, axis, method]
        self.axis = axis

    def aggregate(self, func, *args, **kwargs):
        from .dataframe import DataFrame

        dataframe = DataFrame(
            query_compiler=self._query_compiler.expanding_aggregate(
                self.axis, self.expanding_args, func, *args, **kwargs
            )
        )
        if isinstance(self._dataframe, DataFrame):
            return dataframe
        elif is_list_like(func):
            dataframe.columns = dataframe.columns.droplevel()
            return dataframe
        else:
            return dataframe.squeeze()

    def sum(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_sum(
                self.axis, self.expanding_args, *args, **kwargs
            )
        )

    def min(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_min(
                self.axis, self.expanding_args, *args, **kwargs
            )
        )

    def max(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_max(
                self.axis, self.expanding_args, *args, **kwargs
            )
        )

    def mean(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_mean(
                self.axis, self.expanding_args, *args, **kwargs
            )
        )

    def median(self, numeric_only=False, engine=None, engine_kwargs=None, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_median(
                self.axis,
                self.expanding_args,
                numeric_only=numeric_only,
                engine=engine,
                engine_kwargs=engine_kwargs,
                **kwargs,
            )
        )

    def var(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_var(
                self.axis, self.expanding_args, *args, **kwargs
            )
        )

    def std(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_std(
                self.axis, self.expanding_args, *args, **kwargs
            )
        )

    def count(self, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_count(
                self.axis, self.expanding_args, *args, **kwargs
            )
        )

    def cov(self, other=None, pairwise=None, ddof=1, numeric_only=False, **kwargs):
        from .dataframe import DataFrame
        from .series import Series

        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_cov(
                self.axis,
                self.expanding_args,
                squeeze_self=isinstance(self._dataframe, Series),
                squeeze_other=isinstance(other, Series),
                other=(
                    other._query_compiler
                    if isinstance(other, (Series, DataFrame))
                    else other
                ),
                pairwise=pairwise,
                ddof=ddof,
                numeric_only=numeric_only,
                **kwargs,
            )
        )

    def corr(self, other=None, pairwise=None, ddof=1, numeric_only=False, **kwargs):
        from .dataframe import DataFrame
        from .series import Series

        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_corr(
                self.axis,
                self.expanding_args,
                squeeze_self=isinstance(self._dataframe, Series),
                squeeze_other=isinstance(other, Series),
                other=(
                    other._query_compiler
                    if isinstance(other, (Series, DataFrame))
                    else other
                ),
                pairwise=pairwise,
                ddof=ddof,
                numeric_only=numeric_only,
                **kwargs,
            )
        )

    def sem(self, ddof=1, numeric_only=False, *args, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_sem(
                self.axis,
                self.expanding_args,
                ddof=ddof,
                numeric_only=numeric_only,
                *args,
                **kwargs,
            )
        )

    def skew(self, numeric_only=False, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_skew(
                self.axis, self.expanding_args, numeric_only=numeric_only, **kwargs
            )
        )

    def kurt(self, **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_kurt(
                self.axis, self.expanding_args, **kwargs
            )
        )

    def quantile(self, q, interpolation="linear", **kwargs):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_quantile(
                self.axis, self.expanding_args, q, interpolation, **kwargs
            )
        )

    def rank(
        self, method="average", ascending=True, pct=False, numeric_only=False, **kwargs
    ):
        return self._dataframe.__constructor__(
            query_compiler=self._query_compiler.expanding_rank(
                self.axis,
                self.expanding_args,
                method,
                ascending,
                pct,
                numeric_only,
                **kwargs,
            )
        )


================================================
FILE: modin/polars/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from modin.polars.dataframe import DataFrame
from modin.polars.series import Series

__all__ = ["DataFrame", "Series"]


================================================
FILE: modin/polars/base.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement DataFrame/Series public API as polars does."""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Sequence

import polars

from modin.core.storage_formats import BaseQueryCompiler

if TYPE_CHECKING:
    import numpy as np

    from modin.polars import DataFrame, Series


class BasePolarsDataset:

    _query_compiler: BaseQueryCompiler

    @property
    def __constructor__(self):
        """
        DataFrame constructor.

        Returns:
            Constructor of the DataFrame
        """
        return type(self)

    def __eq__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.eq(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __ne__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.ne(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __add__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.add(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __sub__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.sub(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __mul__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.mul(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __truediv__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.truediv(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __floordiv__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.floordiv(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __mod__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.mod(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __pow__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.pow(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __and__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.__and__(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __or__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.__or__(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __xor__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.__xor__(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __lt__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.lt(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __le__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.le(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __gt__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.gt(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __ge__(self, other) -> "BasePolarsDataset":
        return self.__constructor__(
            _query_compiler=self._query_compiler.ge(
                other._query_compiler if isinstance(other, BasePolarsDataset) else other
            )
        )

    def __invert__(self) -> "BasePolarsDataset":
        return self.__constructor__(_query_compiler=self._query_compiler.invert())

    def __neg__(self) -> "BasePolarsDataset":
        return self.__constructor__(_query_compiler=self._query_compiler.negative())

    def __abs__(self) -> "BasePolarsDataset":
        return self.__constructor__(_query_compiler=self._query_compiler.abs())

    def is_duplicated(self):
        """
        Determine whether each row is a duplicate in the DataFrame.

        Returns:
            DataFrame with True for each duplicate row, and False for unique rows.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.duplicated(keep=False)
        )

    def is_empty(self) -> bool:
        """
        Determine whether the DataFrame is empty.

        Returns:
            True if the DataFrame is empty, False otherwise
        """
        return self.height == 0

    def is_unique(self):
        """
        Determine whether each row is unique in the DataFrame.

        Returns:
            DataFrame with True for each unique row, and False for duplicate rows.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.duplicated(keep=False).invert()
        )

    def n_chunks(self, strategy: str = "first") -> int | list[int]:
        raise NotImplementedError("not yet")

    def to_arrow(self):
        """
        Convert the DataFrame to Arrow format.

        Returns:
            Arrow representation of the DataFrame.
        """
        return polars.from_pandas(self._query_compiler.to_pandas()).to_arrow()

    def to_jax(self, device=None):
        """
        Convert the DataFrame to JAX format.

        Args:
            device: The device to use.

        Returns:
            JAX representation of the DataFrame.
        """
        return polars.from_pandas(self._query_compiler.to_pandas()).to_jax(
            device=device
        )

    def to_numpy(
        self,
        *,
        writable: bool = False,
        allow_copy: bool = True,
        use_pyarrow: bool | None = None,
        zero_copy_only: bool | None = None,
    ) -> "np.ndarray":
        """
        Convert the DataFrame to a NumPy representation.

        Args:
            writable: Whether the NumPy array should be writable.
            allow_copy: Whether to allow copying the data.
            use_pyarrow: Whether to use PyArrow for conversion.
            zero_copy_only: Whether to use zero-copy conversion only.

        Returns:
            NumPy representation of the DataFrame.
        """
        return polars.from_pandas(self._query_compiler.to_pandas()).to_numpy(
            writable=writable,
            allow_copy=allow_copy,
            use_pyarrow=use_pyarrow,
            zero_copy_only=zero_copy_only,
        )

    def to_torch(self):
        """
        Convert the DataFrame to PyTorch format.

        Returns:
            PyTorch representation of the DataFrame.
        """
        return polars.from_pandas(self._query_compiler.to_pandas()).to_torch()

    def bottom_k(
        self,
        k: int,
        *,
        by,
        descending: bool | Sequence[bool] = False,
        nulls_last: bool | Sequence[bool] | None = None,
        maintain_order: bool | None = None,
    ) -> "BasePolarsDataset":
        raise NotImplementedError("not yet")

    def cast(self, dtypes, *, strict: bool = True) -> "BasePolarsDataset":
        """
        Cast the DataFrame to the given dtypes.

        Args:
            dtypes: Dtypes to cast the DataFrame to.
            strict: Whether to enforce strict casting.

        Returns:
            DataFrame with the new dtypes.
        """
        # TODO: support strict
        return self.__constructor__(_query_compiler=self._query_compiler.astype(dtypes))

    def clone(self) -> "BasePolarsDataset":
        """
        Clone the DataFrame.

        Returns:
            Cloned DataFrame.
        """
        return self.copy()

    def drop_nulls(self, subset=None):
        """
        Drop the rows with null values.

        Args:
            subset: Columns to consider for null values.

        Returns:
            DataFrame with the rows with null values dropped.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.dropna(subset=subset, how="any")
        )

    def explode(self, columns: str, *more_columns: str) -> "BasePolarsDataset":
        """
        Explode the given columns to long format.

        Args:
            columns: Columns to explode.
            more_columns: Additional columns to explode.

        Returns:
            DataFrame with the columns exploded.
        """
        if len(more_columns) > 0:
            columns = [columns, *more_columns]
        return self.__constructor__(
            _query_compiler=self._query_compiler.explode(columns)
        )

    def extend(self, other: "BasePolarsDataset") -> "BasePolarsDataset":
        """
        Extend the DataFrame with another DataFrame.

        Args:
            other: DataFrame to extend with.

        Returns:
            Extended DataFrame for convenience. DataFrame is modified in place.
        """
        self._query_compiler = self._query_compiler.concat(
            axis=0, other=other._query_compiler
        )
        return self

    def fill_nan(self, value):
        """
        Fill NaN values with the given value.

        Args:
            value: Value to fill NaN values with.

        Returns:
            DataFrame with NaN values filled.
        """
        # TODO: Handle null values differently than nan.
        return self.__constructor__(_query_compiler=self._query_compiler.fillna(value))

    def fill_null(
        self,
        value: Any | None = None,
        strategy: str | None = None,
        limit: int | None = None,
        *,
        matches_supertype: bool = True,
    ) -> "BasePolarsDataset":
        """
        Fill null values with the given value or strategy.

        Args:
            value: Value to fill null values with.
            strategy: Strategy to fill null values with.
            limit: Maximum number of null values to fill.
            matches_supertype: Whether the value matches the supertype.

        Returns:
            DataFrame with null values filled.
        """
        if strategy == "forward":
            strategy = "ffill"
        elif strategy == "backward":
            strategy = "bfill"
        elif strategy in ["min", "max", "mean"]:
            value = getattr(self, strategy)()._query_compiler
            strategy = None
        elif strategy == "zero":
            strategy = None
            value = 0
        elif strategy == "one":
            strategy = None
            value = 1
        else:
            raise ValueError(f"Unknown strategy: {strategy}")
        return self.__constructor__(
            _query_compiler=self._query_compiler.fillna(
                value=value, method=strategy, limit=limit
            )
        )

    def filter(self, *predicates, **constraints: Any) -> "BasePolarsDataset":
        predicates = predicates[0]
        for p in predicates[1:]:
            predicates = predicates & p
        if constraints:
            raise NotImplementedError("Named constraints are not supported")
        return self.__constructor__(
            _query_compiler=self._query_compiler.getitem_array(
                predicates._query_compiler
            )
        )

    def gather_every(self, n: int, offset: int = 0) -> "BasePolarsDataset":
        """
        Gather every nth row of the DataFrame.

        Args:
            n: Number of rows to gather.
            offset: Offset to start gathering from.

        Returns:
            DataFrame with every nth row gathered.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.getitem_row_array(
                slice(offset, None, n)
            )
        )

    def head(self, n: int = 5) -> "BasePolarsDataset":
        """
        Get the first n rows of the DataFrame.

        Args:
            n: Number of rows to get.

        Returns:
            DataFrame with the first n rows.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.getitem_row_array(slice(0, n))
        )

    def limit(self, n: int = 10) -> "BasePolarsDataset":
        """
        Limit the DataFrame to the first n rows.

        Args:
            n: Number of rows to limit to.

        Returns:
            DataFrame with the first n rows.
        """
        return self.head(n)

    def interpolate(self) -> "BasePolarsDataset":
        """
        Interpolate values the DataFrame using a linear method.

        Returns:
            DataFrame with the interpolated values.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.interpolate())

    def sample(
        self,
        n: int | "Series" | None = None,
        *,
        fraction: float | "Series" | None = None,
        with_replacement: bool = False,
        shuffle: bool = False,
        seed: int | None = None,
    ) -> "BasePolarsDataset":
        """
        Sample the DataFrame.

        Args:
            n: Number of rows to sample.
            fraction: Fraction of rows to sample.
            with_replacement: Whether to sample with replacement.
            shuffle: Whether to shuffle the rows.
            seed: Seed for the random number generator.

        Returns:
            Sampled DataFrame.
        """
        return self.__constructor__(
            _query_compiler=self.to_pandas()
            .sample(n=n, frac=fraction, replace=with_replacement, random_state=seed)
            ._query_compiler
        )

    def shift(self, n: int = 1, *, fill_value=None) -> "DataFrame":
        raise NotImplementedError("not yet")

    def shrink_to_fit(self) -> "DataFrame":
        """
        Shrink the DataFrame to fit in memory.

        Returns:
            A copy of the DataFrame.
        """
        return self.copy()

    def slice(self, offset: int, length: int) -> "DataFrame":
        """
        Slice the DataFrame.

        Args:
            offset: Offset to start the slice from.
            length: Length of the slice.

        Returns:
            Sliced DataFrame.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.getitem_row_array(
                slice(offset, offset + length)
            )
        )

    def sort(
        self,
        by,
        *more_by,
        descending: bool | Sequence[bool] = False,
        nulls_last: bool | Sequence[bool] | None = None,
        multithreaded: bool = True,
        maintain_order: bool = False,
    ) -> "DataFrame":
        """
        Sort the DataFrame.

        Args:
            by: Column to sort by.
            more_by: Additional columns to sort by.
            descending: Whether to sort in descending order.
            nulls_last: Whether to sort null values last.
            multithreaded: Whether to use multiple threads.
            maintain_order: Whether to maintain the order of the DataFrame.

        Returns:
            Sorted DataFrame.
        """
        # TODO: support expressions in by
        if len(more_by) > 0:
            by = [by, *more_by]
        return self.__constructor__(
            _query_compiler=self._query_compiler.sort_rows_by_column_values(
                by=by,
                reverse=descending,
                nulls_first=None if nulls_last is None else not nulls_last,
            )
        )

    def tail(self, n: int = 5) -> "DataFrame":
        """
        Get the last n rows of the DataFrame.

        Args:
            n: Number of rows to get.

        Returns:
            DataFrame with the last n rows.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.getitem_row_array(slice(-n, None))
        )

    def to_dummies(
        self,
        columns: str | Sequence[str] | None = None,
        *,
        separator: str = "_",
        drop_first: bool = False,
    ) -> "DataFrame":
        """
        Convert the columns to dummy variables.

        Args:
            columns: Columns to convert to dummy variables.
            separator: Separator for the dummy variables.
            drop_first: Whether to drop the first dummy variable.

        Returns:
            DataFrame with the columns converted to dummy variables.
        """
        if columns is not None:
            if isinstance(columns, str):
                columns = [columns]
        else:
            columns = self.columns
        result = self.__constructor__(
            _query_compiler=self._query_compiler.get_dummies(columns)
        )
        if separator != "_":
            result.columns = [
                c.replace(separator, "_") if separator in c else c
                for c in result.columns
            ]
        if drop_first:
            columns_to_drop = [
                next(
                    result_col
                    for result_col in result.columns
                    if result_col.startswith(c)
                )
                for c in columns
            ]
            return result.drop(columns_to_drop)
        else:
            return result

    def top_k(
        self,
        k: int,
        *,
        by,
        descending: bool | Sequence[bool] = False,
        nulls_last: bool | Sequence[bool] | None = None,
        maintain_order: bool | None = None,
    ) -> "DataFrame":
        raise NotImplementedError("not yet")

    def unique(self, subset=None, *, keep="any", maintain_order: bool = False):
        """
        Get the unique values in each column.

        Args:
            subset: Columns to consider for unique values.
            keep: Strategy to keep unique values.
            maintain_order: Whether to maintain the order of the unique values.

        Returns:
            DataFrame with the unique values in each column.
        """
        if keep == "none" or keep == "last":
            # TODO: support keep="none"
            raise NotImplementedError("not yet")
        return self.__constructor__(
            _query_compiler=self._query_compiler.unique(subset=subset)
        )

    def equals(self, other: "BasePolarsDataset", *, null_equal: bool = True) -> bool:
        """
        Determine whether the DataFrame is equal to another DataFrame.

        Args:
            other: DataFrame to compare with.

        Returns:
            True if the DataFrames are equal, False otherwise.
        """
        return (
            isinstance(other, type(self))
            and self._query_compiler.equals(other._query_compiler)
            and (
                null_equal
                or (
                    not self.to_pandas().isna().any(axis=None)
                    and not other.to_pandas().isna().any(axis=None)
                )
            )
        )

    @property
    def plot(self):
        return polars.from_pandas(self._query_compiler.to_pandas()).plot

    def count(self):
        """
        Get the number of non-null values in each column.

        Returns:
            DataFrame with the counts.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.count(axis=0))


================================================
FILE: modin/polars/dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses ``DataFrame`` class, that is distributed version of ``polars.DataFrame``."""

from __future__ import annotations

from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Sequence

import numpy as np
import pandas
import polars
from pandas.core.dtypes.common import is_list_like

from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
from modin.pandas import DataFrame as ModinPandasDataFrame
from modin.pandas import Series as ModinPandasSeries
from modin.pandas.io import from_pandas
from modin.polars.base import BasePolarsDataset

if TYPE_CHECKING:
    from modin.polars import Series
    from modin.polars.groupby import GroupBy
    from modin.polars.lazyframe import LazyFrame


class DataFrame(BasePolarsDataset):

    def __init__(
        self,
        data=None,
        schema=None,
        *,
        schema_overrides=None,
        strict=True,
        orient=None,
        infer_schema_length=100,
        nan_to_null=False,
        _query_compiler=None,
    ) -> None:
        """
        Constructor for DataFrame object.

        Args:
            data: Data to be converted to DataFrame.
            schema: Schema of the data.
            schema_overrides: Schema overrides.
            strict: Whether to enforce strict schema.
            orient: Orientation of the data.
            infer_schema_length: Length of the data to infer schema.
            nan_to_null: Whether to convert NaNs to nulls.
            _query_compiler: Query compiler to use.
        """
        if _query_compiler is None:
            if isinstance(data, (ModinPandasDataFrame, ModinPandasSeries)):
                self._query_compiler: BaseQueryCompiler = data._query_compiler.copy()
            else:
                self._query_compiler: BaseQueryCompiler = from_pandas(
                    polars.DataFrame(
                        data=data,
                        schema=schema,
                        schema_overrides=schema_overrides,
                        strict=strict,
                        orient=orient,
                        infer_schema_length=infer_schema_length,
                        nan_to_null=nan_to_null,
                    ).to_pandas()
                )._query_compiler
        else:
            self._query_compiler: BaseQueryCompiler = _query_compiler

    def __getitem__(self, item):
        """
        Get item from DataFrame.

        Args:
            item: Column to get.

        Returns:
            Series or DataFrame with the column.
        """
        if is_list_like(item):
            missing = [i for i in item if i not in self.columns]
            if len(missing) > 0:
                raise polars.exceptions.ColumnNotFoundError(missing[0])
            return self.__constructor__(
                _query_compiler=self._query_compiler.getitem_array(item)
            )
        else:
            if item not in self.columns:
                raise polars.exceptions.ColumnNotFoundError(item)
            from .series import Series

            return Series(_query_compiler=self._query_compiler.getitem_array([item]))

    def _copy(self):
        return self.__constructor__(_query_compiler=self._query_compiler.copy())

    def _to_polars(self) -> polars.DataFrame:
        """
        Convert the DataFrame to Polars format.

        Returns:
            Polars representation of the DataFrame.
        """
        return polars.from_pandas(self._query_compiler.to_pandas())

    def _get_columns(self):
        """
        Get columns of the DataFrame.

        Returns:
            List of columns.
        """
        return list(self._query_compiler.columns)

    def _set_columns(self, new_columns):
        """
        Set columns of the DataFrame.

        Args:
            new_columns: New columns to set.
        """
        new_query_compiler = self._query_compiler.copy()
        new_query_compiler.columns = new_columns
        self._query_compiler = new_query_compiler

    columns = property(_get_columns, _set_columns)

    _sorted_columns_cache = None

    def _get_sorted_columns(self):
        if self._sorted_columns_cache is None:
            self._sorted_columns_cache = [False] * len(self.columns)
        return self._sorted_columns_cache

    def _set_sorted_columns(self, value):
        self._sorted_columns_cache = value

    _sorted_columns = property(_get_sorted_columns, _set_sorted_columns)

    @property
    def dtypes(self):
        """
        Get dtypes of the DataFrame.

        Returns:
            List of dtypes.
        """
        return polars.from_pandas(
            pandas.DataFrame(columns=self.columns).astype(self._query_compiler.dtypes)
        ).dtypes

    @property
    def flags(self):
        """
        Get flags of the DataFrame.

        Returns:
            List of flags.
        """
        # TODO: Add flags support
        return []

    @property
    def height(self):
        """
        Get height of the DataFrame.

        Returns:
            Number of rows in the DataFrame.
        """
        return len(self._query_compiler.index)

    @property
    def schema(self):
        """
        Get schema of the DataFrame.

        Returns:
            OrderedDict of column names and dtypes.
        """
        return OrderedDict(zip(self.columns, self.dtypes, strict=True))

    @property
    def shape(self):
        """
        Get shape of the DataFrame.

        Returns:
            Tuple of (height, width
        """
        return self.height, self.width

    @property
    def width(self):
        """
        Get width of the DataFrame.

        Returns:
            Number of columns in the DataFrame.
        """
        return len(self.columns)

    def __repr__(self):
        """
        Get string representation of the DataFrame.

        Returns:
            String representation of the DataFrame.
        """
        return repr(polars.from_pandas(self._query_compiler.to_pandas()))

    def max(self, axis=None):
        """
        Get the maximum value in each column.

        Args:
            axis: Axis to get the maximum value on.

        Returns:
            DataFrame with the maximum values.
        """
        if axis is None or axis == 0:
            return self.__constructor__(
                _query_compiler=self._query_compiler.max(axis=0)
            )
        else:
            return self.max_horizontal()

    def max_horizontal(self):
        """
        Get the maximum value in each row.

        Returns:
            DataFrame with the maximum values.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.max(axis=1))

    def _convert_non_numeric_to_null(self):
        """
        Convert non-numeric columns to null.

        Returns:
            DataFrame with non-numeric columns converted to null.
        """
        non_numeric_cols = [
            c
            for c, t in zip(self.columns, self.dtypes, strict=True)
            if not t.is_numeric()
        ]
        if len(non_numeric_cols) > 0:
            return self.__constructor__(
                _query_compiler=self._query_compiler.write_items(
                    slice(None),
                    [self.columns.index(c) for c in non_numeric_cols],
                    pandas.NA,
                    need_columns_reindex=False,
                ).astype({c: self._query_compiler.dtypes[c] for c in non_numeric_cols})
            )
        return self._copy()

    def mean(self, *, axis=None, null_strategy="ignore"):
        """
        Get the mean of each column.

        Args:
            axis: Axis to get the mean on.
            null_strategy: Strategy to handle null values.

        Returns:
            DataFrame with the mean of each column or row.
        """
        # TODO: this converts non numeric columns to numeric
        obj = self._convert_non_numeric_to_null()
        if axis is None or axis == 0:
            return self.__constructor__(
                _query_compiler=obj._query_compiler.mean(
                    axis=0,
                    skipna=True if null_strategy == "ignore" else False,
                )
            )
        else:
            return obj.mean_horizontal(
                ignore_nulls=True if null_strategy == "ignore" else False
            )

    def median(self) -> "DataFrame":
        """
        Get the median of each column.

        Returns:
            DataFrame with the median of each column.
        """
        return self.__constructor__(
            _query_compiler=self._convert_non_numeric_to_null()._query_compiler.median(
                0
            )
        )

    def mean_horizontal(self, *, ignore_nulls: bool = True):
        """
        Get the mean of each row.

        Args:
            ignore_nulls: Whether to ignore null values.

        Returns:
            DataFrame with the mean of each row.
        """
        obj = self._convert_non_numeric_to_null()
        return self.__constructor__(
            _query_compiler=obj._query_compiler.mean(axis=1, skipna=ignore_nulls)
        )

    def min(self, axis=None):
        """
        Get the minimum value in each column.

        Args:
            axis: Axis to get the minimum value on.

        Returns:
            DataFrame with the minimum values of each row or column.
        """
        if axis is None or axis == 0:
            return self.__constructor__(
                _query_compiler=self._query_compiler.min(axis=0)
            )
        else:
            return self.max_horizontal()

    def min_horizontal(self):
        """
        Get the minimum value in each row.

        Returns:
            DataFrame with the minimum values of each row.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.min(axis=1))

    def product(self):
        """
        Get the product of each column.

        Returns:
            DataFrame with the product of each column.
        """
        obj = self._convert_non_numeric_to_null()
        return self.__constructor__(_query_compiler=obj._query_compiler.prod(axis=0))

    def quantile(self, quantile: float, interpolation="nearest"):
        """
        Get the quantile of each column.

        Args:
            quantile: Quantile to get.
            interpolation: Interpolation method.

        Returns:
            DataFrame with the quantile of each column.
        """
        obj = self._convert_non_numeric_to_null()
        # TODO: interpolation support
        return self.__constructor__(
            _query_compiler=obj._query_compiler.quantile_for_single_value(quantile)
        )

    def std(self, ddof: int = 1):
        """
        Get the standard deviation of each column.

        Args:
            ddof: Delta degrees of freedom.

        Returns:
            DataFrame with the standard deviation of each column
        """
        obj = self._convert_non_numeric_to_null()
        return self.__constructor__(_query_compiler=obj._query_compiler.std(ddof=ddof))

    def sum(self, axis: int | None = None, null_strategy="ignore"):
        """
        Get the sum of each column.

        Args:
            axis: Axis to get the sum on.
            null_strategy: Strategy to handle null values.

        Returns:
            DataFrame with the sum of each column or row.
        """
        obj = self._convert_non_numeric_to_null()
        if axis is None or axis == 0:
            return self.__constructor__(
                _query_compiler=obj._query_compiler.sum(
                    axis=0,
                    skipna=True if null_strategy == "ignore" else False,
                )
            )
        else:
            return obj.sum_horizontal(
                ignore_nulls=True if null_strategy == "ignore" else False
            )

    def sum_horizontal(self, *, ignore_nulls: bool = True):
        """
        Get the sum of each row.

        Args:
            ignore_nulls: Whether to ignore null values.

        Returns:
            DataFrame with the sum of each row.
        """
        # TODO: if there are strings in the row, polars will append numeric values
        # this behavior may not be intended so doing this instead (for now)
        obj = self._convert_non_numeric_to_null()
        return self.__constructor__(
            _query_compiler=obj._query_compiler.sum(axis=1, skipna=ignore_nulls)
        )

    def var(self, ddof: int = 1):
        """
        Get the variance of each column.

        Args:
            ddof: Delta degrees of freedom.

        Returns:
            DataFrame with the variance of each column.
        """
        obj = self._convert_non_numeric_to_null()
        return self.__constructor__(_query_compiler=obj._query_compiler.var(ddof=ddof))

    def approx_n_unique(self):
        """
        Get the approximate number of unique values in each column.

        Returns:
            DataFrame with the approximate number of unique values in each column.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.nunique())

    def describe(self, percentiles: Sequence[float] | float = (0.25, 0.5, 0.75)):
        """
        Get the descriptive statistics of each column.

        Args:
            percentiles: Percentiles to get.

        Returns:
            DataFrame with the descriptive statistics of each column.
        """
        return self.__constructor__(
            self.__constructor__(
                _query_compiler=self._query_compiler.describe(
                    percentiles=np.array(percentiles)
                ).astype(
                    {
                        k: str
                        for k, v in zip(self.columns, self.dtypes, strict=True)
                        if v == polars.String
                    }
                )
            )
            .to_pandas()
            .loc[
                [
                    "count",
                    # "null_count",  TODO: support null_count in describe
                    "mean",
                    "std",
                    "min",
                    "25%",
                    "50%",
                    "75%",
                    "max",
                ]
            ]
            .reset_index()
            .rename({"index": "statistic"})
        )

    def estimated_size(self, unit="b"):
        """
        Get the estimated amount of memory used by the DataFrame.

        Args:
            unit: Unit of the memory size.

        Returns:
            DataFrame with the extimated memory usage.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.memory_usage())

    def glimpse(
        self,
        *,
        max_items_per_column: int = 10,
        max_colname_length: int = 50,
        return_as_string: bool = False,
    ) -> str | None:
        raise NotImplementedError("not yet")

    def n_unique(self, subset=None) -> int:
        """
        Get the number of unique values in each column.

        Args:
            subset: Columns to get the number of unique values for.

        Returns:
            Number of unique values in each column.
        """
        if subset is not None:
            raise NotImplementedError("not yet")
        return (
            self.is_unique()._query_compiler.sum(axis=0).to_pandas().squeeze(axis=None)
        )

    def null_count(self) -> "DataFrame":
        """
        Get the number of null values in each column.

        Returns:
            DataFrame with the number of null values in each column.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.isna().sum(axis=0)
        )

    def to_pandas(self):
        """
        Convert the DataFrame to Pandas format.

        Returns:
            modin.pandas representation of the DataFrame.
        """
        return ModinPandasDataFrame(query_compiler=self._query_compiler.copy())

    def group_by(
        self,
        *by,
        maintain_order: bool = False,
        **named_by,
    ) -> "GroupBy":
        """
        Group the DataFrame by the given columns.

        Args:
            by: Columns to group by.
            maintain_order: Whether to maintain the order of the groups.
            named_by: Named columns to group by.

        Returns:
            GroupBy object.
        """
        from modin.polars.groupby import GroupBy

        return GroupBy(self, *by, maintain_order=maintain_order, **named_by)

    def drop(self, *columns, strict: bool = True) -> "DataFrame":
        """
        Drop the given columns.

        Args:
            columns: Columns to drop.
            strict: Whether to raise an error if a column is not found.

        Returns:
            DataFrame with the columns dropped.
        """
        if strict:
            for c in columns:
                if c not in self.columns:
                    raise KeyError(c)
        columns = list(columns) if not isinstance(columns[0], list) else columns[0]
        return self.__constructor__(_query_compiler=self._query_compiler.drop(columns))

    def drop_in_place(self, name: str) -> "DataFrame":
        """
        Drop the given column in place and return the dropped column.

        Args:
            name: Column to drop.

        Returns:
            The column that was dropped from the DataFrame.
        """
        col_to_return = self[name]
        self._query_compiler = self._query_compiler.drop([name])
        return col_to_return

    def get_column(self, name: str) -> "Series":
        """
        Get the column by name.

        Args:
            name: Name of the column to get.

        Returns:
            Series with the column.
        """
        return self[name]

    def get_column_index(self, name: str) -> int:
        """
        Find the index of the column by name.

        Args:
            name: Name of the column to find.

        Returns:
            Index of the column.
        """
        return self.columns.index(name)

    def get_columns(self) -> list["Series"]:
        """
        Get the columns of the DataFrame.

        Returns:
            List of Series with the columns.
        """
        return [self[name] for name in self.columns]

    def group_by_dynamic(
        self,
        index_column,
        *,
        every,
        period,
        offset,
        truncate,
        include_boundaries,
        closed,
        label,
        group_by,
        start_by,
        check_sorted,
    ):
        raise NotImplementedError("not yet")

    def hstack(self, columns, *, inplace: bool = False) -> "DataFrame":
        """
        Stack the given columns horizontally.

        Args:
            columns: Columns to stack.
            inplace: Whether to stack the columns in place.

        Returns:
            DataFrame with the columns stacked horizontally.
        """
        if isinstance(columns, DataFrame):
            columns = columns.get_columns()
        result_query_compiler = self._query_compiler.concat(
            axis=1, other=[c._query_compiler for c in columns]
        )
        if inplace:
            self._query_compiler = result_query_compiler
            return self
        return self.__constructor__(_query_compiler=result_query_compiler)

    def insert_column(self, index: int, column: "Series") -> "DataFrame":
        """
        Insert the given column at the given index.

        Args:
            index: Index to insert the column at.
            column: Column to insert.
            name: Name of the column to insert.

        Returns:
            DataFrame with the column inserted.
        """
        return self.__constructor__(
            self._query_compiler.insert(index, column.name, column._query_compiler)
        )

    def item(self, row: int | None = None, column: str | int | None = None) -> Any:
        """
        Get the value at the given row and column.

        Args:
            row: Row to get the value from.
            column: Column to get the value from.

        Returns:
            Value at the given row and column.
        """
        if row is None:
            row = 0
        if column is None:
            column = 0
        if isinstance(column, str):
            column = self.columns.index(column)
        return (
            self._query_compiler.take_2d_labels(row, column)
            .to_pandas()
            .squeeze(axis=None)
        )

    def iter_columns(self) -> Iterator["Series"]:
        """
        Iterate over the columns of the DataFrame.

        Returns:
            Iterator over the columns.
        """
        return iter(self.get_columns())

    def iter_rows(
        self,
        *,
        named: bool = False,
        buffer_size: int = 512,
    ) -> Iterator[tuple[Any]] | Iterator[dict[str, Any]]:
        """
        Iterate over the rows of the DataFrame.

        Returns:
            Iterator over the rows.
        """
        raise NotImplementedError("not yet")

    def iter_slices(
        self,
        n_rows: int = 10000,
    ) -> Iterator["DataFrame"]:
        """
        Iterate over the slices of the DataFrame.

        Args:
            n_rows: Number of rows in each slice.

        Returns:
            Iterator over the slices.
        """
        raise NotImplementedError("not yet")

    def join(
        self,
        other: "DataFrame",
        on: str | list[str] | None = None,
        how: str = "inner",
        *,
        left_on: str | list[str] | None = None,
        right_on: str | list[str] | None = None,
        suffix: str = "_right",
        validate="m:m",
        join_nulls: bool = False,
        coalesce: bool | None = None,
    ) -> "DataFrame":
        """
        Join the DataFrame with another DataFrame.

        Args:
            other: DataFrame to join with.
            on: Column to join on.
            how: How to join the DataFrames.

        Returns:
            Joined DataFrame.
        """
        if how == "full":
            how = "outer"
        elif how == "cross":
            raise NotImplementedError("not yet")
        elif how == "semi":
            how = "right"
        elif how == "anti":
            raise NotImplementedError("not yet")
        return self.__constructor__(
            _query_compiler=self._query_compiler.merge(
                other._query_compiler,
                on=on,
                how=how,
                suffixes=("", suffix),
                left_on=left_on,
                right_on=right_on,
            )
        )

    def join_asof(
        self,
        other: "DataFrame",
        *,
        left_on: str | None = None,
        right_on: str | None = None,
        on: str | None = None,
        by_left: str | Sequence[str] | None = None,
        by_right: str | Sequence[str] | None = None,
        by: str | Sequence[str] | None = None,
        strategy: str = "backward",
        suffix: str = "_right",
        tolerance: str,
    ) -> "DataFrame":
        """
        Join the DataFrame with another DataFrame using asof logic.

        Args:
            other: DataFrame to join with.
            left_on: Column to join on in the left DataFrame.
            right_on: Column to join on in the right DataFrame.
            on: Column to join on in both DataFrames.
            by_left: Columns to join on in the left DataFrame.
            by_right: Columns to join on in the right DataFrame.
            by: Columns to join on in both DataFrames.
            strategy: Strategy to use for the join.
            suffix: Suffix to add to the columns.
            tolerance: Tolerance for the join.

        Returns:
            Joined DataFrame.
        """
        if on is not None and left_on is None and right_on is None:
            left_on = right_on = on
        if by is not None and by_left is None and by_right is None:
            by_left = by_right = by
        return self.__constructor__(
            _query_compiler=self._query_compiler.merge_asof(
                other._query_compiler,
                left_on=left_on,
                right_on=right_on,
                left_by=by_left,
                right_by=by_right,
                direction=strategy,
                suffixes=("", suffix),
                tolerance=tolerance,
            )
        )

    def melt(
        self,
        id_vars=None,
        value_vars=None,
        variable_name: str | None = None,
        value_name: str | None = None,
    ) -> "DataFrame":
        """
        Melt the DataFrame.

        Args:
            id_vars: Columns to keep.
            value_vars: Columns to melt.
            variable_name: Name of the variable column.
            value_name: Name of the value column.

        Returns:
            Melted DataFrame.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.melt(
                id_vars=id_vars,
                value_vars=value_vars,
                var_name=variable_name,
                value_name=value_name,
            )
        )

    def merge_sorted(self, other: "DataFrame", on: str | list[str]) -> "DataFrame":
        # TODO: support natural join + sort
        raise NotImplementedError("not yet")

    def partition_by(
        self,
        by,
        *more_by,
        maintain_order: bool = True,
        include_key: bool = True,
        as_dict: bool = False,
    ) -> list["DataFrame"] | dict[Any, "DataFrame"]:
        """
        Partition the DataFrame by the given columns.

        Args:
            by: Columns to partition by.
            more_by: Additional columns to partition by.
            maintain_order: Whether to maintain the order of the partitions.
            include_key: Whether to include the partition key.
            as_dict: Whether to return the partitions as a dictionary.

        Returns:
            List of DataFrames or dictionary of DataFrames.
        """
        if isinstance(by, str):
            by = [by, *more_by]
        elif isinstance(by, list):
            by = [*by, *more_by]
        if as_dict:
            return {
                k: self.__constructor__(v)
                for k, v in self.to_pandas()
                .groupby(by, as_index=not include_key)
                .groups
            }
        else:
            return [
                self.__constructor__(g)
                for g in self.to_pandas().groupby(by, as_index=not include_key)
            ]

    def pipe(self, function, *args, **kwargs) -> Any:
        return function(self, *args, **kwargs)

    def pivot(
        self,
        *,
        values,
        index,
        columns,
        aggregate_function=None,
        maintain_order: bool = True,
        sort_columns: bool = False,
        separator: str = "_",
    ) -> "DataFrame":
        """
        Pivot the DataFrame.

        Args:
            values: Values to pivot.
            index: Index columns.
            columns: Columns to pivot.
            aggregate_function: Function to aggregate the values.
            maintain_order: Whether to maintain the order of the pivot.
            sort_columns: Whether to sort the columns.
            separator: Separator for the columns.

        Returns:
            Pivoted DataFrame.
        """
        # TODO: handle maintain_order, sort_columns, separator
        return self.__constructor__(
            _query_compiler=self._query_compiler.pivot(
                values=values,
                index=index,
                columns=columns,
                agg=aggregate_function,
            )
        )

    def rechunk(self) -> "DataFrame":
        """
        Rechunk the DataFrame into the given number of partitions.

        Returns:
            Rechunked DataFrame.
        """
        return self._copy()

    def rename(self, mapping: dict[str, str] | callable) -> "DataFrame":
        """
        Rename the columns of the DataFrame.

        Args:
            mapping: Mapping of old names to new names.

        Returns:
            DataFrame with the columns renamed.
        """
        if callable(mapping):
            mapping = {c: mapping(c) for c in self.columns}
        # TODO: add a query compiler method for `rename`
        new_columns = {c: mapping.get(c, c) for c in self.columns}
        new_obj = self._copy()
        new_obj.columns = new_columns
        return new_obj

    def replace_column(self, index: int, column: "Series") -> "DataFrame":
        """
        Replace the column at the given index with the new column.

        Args:
            index: Index of the column to replace.
            column: New column to replace with.

        Returns:
            DataFrame with the column replaced.
        """
        self._query_compiler = self._query_compiler.drop([self.columns[index]]).insert(
            index,
            column.name,
            column._query_compiler,
        )
        return self

    def reverse(self) -> "DataFrame":
        """
        Reverse the DataFrame.

        Returns:
            Reversed DataFrame.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.getitem_row_array(
                slice(None, None, -1)
            )
        )

    def rolling(self, index_column, *, period, offset, closed, group_by, check_sorted):
        raise NotImplementedError("not yet")

    def row(
        self, index: int | None = None, *, by_predicate=None, named: bool = False
    ) -> tuple[Any] | dict[str, Any]:
        """
        Get the row at the given index.

        Args:
            index: Index of the row to get.
            by_predicate: Predicate to get the row by.
            named: Whether to return the row as a dictionary.

        Returns:
            Row at the given index.
        """
        if index is not None:
            if named:
                return dict(self.to_pandas().iloc[index])
            else:
                return tuple(self.to_pandas().iloc[index])
        else:
            # TODO: support expressions
            raise NotImplementedError("not yet")

    def rows(self, *, named: bool = False) -> list[tuple[Any]] | list[dict[str, Any]]:
        raise NotImplementedError("not yet")

    def rows_by_key(
        self,
        key: Any,
        *,
        named: bool = False,
        include_key: bool = False,
        unique: bool = False,
    ) -> dict[Any, Iterable[Any]]:
        raise NotImplementedError("not yet")

    def select(self, *exprs, **named_exprs) -> "DataFrame":
        # TODO: support expressions
        raise NotImplementedError("not yet")

    def select_seq(self, *exprs, **named_exprs) -> "DataFrame":
        # TODO: support expressions
        raise NotImplementedError("not yet")

    def set_sorted(
        self, column: str | Iterable[str], *more_columns: str, descending: bool = False
    ) -> "DataFrame":
        """
        Set the columns to be sorted.

        Args:
            column: Column to sort by.
            more_columns: Additional columns to sort by.
            descending: Whether to sort in descending order.

        Returns:
            DataFrame with the columns sorted.
        """
        if len(more_columns) > 0:
            if isinstance(column, Iterable):
                column = [*column, *more_columns]
            else:
                column = [column, *more_columns]
        if isinstance(column, str):
            column = [column]
        new_sorted_columns = [c in column for c in self.columns]
        obj = self._copy()
        obj._sorted_columns = new_sorted_columns
        return obj

    def sql(self, query: str, *, table_name: str = "self") -> "DataFrame":
        raise NotImplementedError("not yet")

    def to_series(self, index: int = 0) -> "Series":
        """
        Convert the DataFrame at index provided to a Series.

        Args:
            index: Index of the column to convert to a Series.

        Returns:
            Series representation of the DataFrame at index provided.
        """
        return self[self.columns[index]]

    def transpose(
        self,
        *,
        include_header: bool = False,
        header_name: str = "column",
        column_names: str | Sequence[str] | None = None,
    ) -> "DataFrame":
        """
        Transpose the DataFrame.

        Args:
            include_header: Whether to include a header.
            header_name: Name of the header.
            column_names: Names of the columns.

        Returns:
            Transposed DataFrame.
        """
        result = self.__constructor__(_query_compiler=self._query_compiler.transpose())
        if column_names is not None:
            result.columns = column_names
        elif include_header:
            result.columns = [f"{header_name}_{i}" for i in range(result.width)]
        return result

    def unnest(self, columns, *more_columns) -> "DataFrame":
        """
        Unnest the given columns.

        Args:
            columns: Columns to unnest.
            more_columns: Additional columns to unnest.

        Returns:
            DataFrame with the columns unnested.
        """
        raise NotImplementedError("not yet")

    def unstack(
        self,
        step: int,
        how: str = "vertical",
        columns=None,
        fill_values: list[Any] | None = None,
    ):
        """
        Unstack the DataFrame.

        Args:
            step: Step to unstack by.
            how: How to unstack the DataFrame.
            columns: Columns to unstack.
            fill_values: Values to fill the unstacked DataFrame with.

        Returns:
            Unstacked DataFrame.
        """
        raise NotImplementedError("not yet")

    def update(
        self,
        other: "DataFrame",
        on: str | Sequence[str] | None = None,
        how: Literal["left", "inner", "full"] = "left",
        *,
        left_on: str | Sequence[str] | None = None,
        right_on: str | Sequence[str] | None = None,
        include_nulls: bool = False,
    ) -> "DataFrame":
        """
        Update the DataFrame with another DataFrame.

        Args:
            other: DataFrame to update with.
            on: Column to update on.
            how: How to update the DataFrame.

        Returns:
            Updated DataFrame.
        """
        raise NotImplementedError("not yet")

    def upsample(
        self,
        time_column: str,
        *,
        every: str,
        offset: str | None = None,
        group_by: str | Sequence[str] | None = None,
        maintain_order: bool = False,
    ) -> "DataFrame":
        raise NotImplementedError("not yet")

    def vstack(self, other: "DataFrame", *, in_place: bool = False) -> "DataFrame":
        """
        Stack the given DataFrame vertically.

        Args:
            other: DataFrame to stack.
            in_place: Whether to stack the DataFrames in place.

        Returns:
            Stacked DataFrame.
        """
        if in_place:
            self._query_compiler = self._query_compiler.concat(
                axis=0, other=other._query_compiler
            )
            return self
        else:
            return self.__constructor__(
                _query_compiler=self._query_compiler.concat(
                    axis=0, other=other._query_compiler
                )
            )

    def with_columns(self, *exprs, **named_exprs) -> "DataFrame":
        # TODO: support expressions
        raise NotImplementedError("not yet")

    def with_columns_seq(self, *exprs, **named_exprs) -> "DataFrame":
        # TODO: support expressions
        raise NotImplementedError("not yet")

    def with_row_index(self, name: str = "index", offset: int = 0) -> "DataFrame":
        """
        Add a row index to the DataFrame.

        Args:
            name: Name of the row index.
            offset: Offset for the row index.

        Returns:
            DataFrame with the row index added.
        """
        if offset != 0:
            obj = self._copy()
            obj.index = obj.index + offset
        result = self.__constructor__(
            _query_compiler=self._query_compiler.reset_index(drop=False)
        )
        result.columns = [name, *self.columns]
        return result

    with_row_count = with_row_index

    def map_rows(
        self, function: callable, return_dtype=None, *, inference_size: int = 256
    ) -> "DataFrame":
        """
        Apply the given function to the DataFrame.

        Args:
            function: Function to apply.
            return_dtype: Return type of the function.
            inference_size: Size of the inference.

        Returns:
            DataFrame with the function applied.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.apply(function, axis=1)
        )

    def corr(self, **kwargs: Any) -> "DataFrame":
        """
        Compute the correlation of the DataFrame.

        Returns:
            DataFrame with the correlation.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.corr(**kwargs))

    def lazy(self) -> "LazyFrame":
        """
        Convert the DataFrame to a lazy DataFrame.

        Returns:
            Lazy DataFrame.
        """
        raise NotImplementedError("not yet")

    @classmethod
    def deserialize(cls, source) -> "DataFrame":
        """
        Deserialize the DataFrame.

        Args:
            source: Source to deserialize.

        Returns:
            Deserialized DataFrame.
        """
        return cls(polars.DataFrame.deserialize(source))

    def serialize(self, file=None) -> str | None:
        """
        Serialize the DataFrame.

        Args:
            file: File to serialize to.

        Returns:
            Serialized DataFrame.
        """
        return polars.from_pandas(self._query_compiler.to_pandas()).serialize(file)

    @property
    def style(self):
        """
        Create a Great Table for styling.

        Returns:
            GreatTable object.
        """
        return self._to_polars().style

    def to_dict(
        self, *, as_series: bool = True
    ) -> dict[str, "Series"] | dict[str, list[Any]]:
        """
        Convert the DataFrame to a dictionary representation.

        Args:
            as_series: Whether to convert the columns to Series.

        Returns:
            Dictionary representation of the DataFrame.
        """
        if as_series:
            return {name: self[name] for name in self.columns}
        else:
            return polars.from_pandas(self._query_compiler.to_pandas()).to_dict(
                as_series=as_series
            )

    def to_dicts(self) -> list[dict[str, Any]]:
        """
        Convert the DataFrame to a list of dictionaries.

        Returns:
            List of dictionaries.
        """
        return self._to_polars().to_dicts()

    def to_init_repr(self, n: int = 1000) -> str:
        """
        Get the string representation of the DataFrame for initialization.

        Returns:
            String representation of the DataFrame for initialization.
        """
        return self._to_polars().to_init_repr(n)

    def to_struct(self, name: str = "") -> "Series":
        """
        Convert the DataFrame to a struct.

        Args:
            name: Name of the struct.

        Returns:
            Series representation of the DataFrame as a struct.
        """
        raise NotImplementedError("not yet")

    def unpivot(
        self,
        on,
        *,
        index,
        variable_name: str | None = None,
        value_name: str | None = None,
    ) -> "DataFrame":
        """
        Unpivot a DataFrame from wide to long format.

        Args:
            on: Columns to unpivot.
            index: Columns to keep.
            variable_name: Name of the variable column.
            value_name: Name of the value column.

        Returns:
            Unpivoted DataFrame.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.melt(
                on=on,
                index=index,
                var_name=variable_name,
                value_name=value_name,
            )
        )

    write_avro = write_clipboard = write_csv = write_database = write_delta = (
        write_excel
    ) = write_ipc = write_ipc_stream = write_json = write_ndjson = write_parquet = (
        write_parquet_partitioned
    ) = lambda *args, **kwargs: (_ for _ in ()).throw(NotImplementedError("not yet"))

    def clear(self, n: int = 0) -> "DataFrame":
        """
        Create an empty (n=0) or null filled (n>0) DataFrame.

        Args:
            n: Number of rows to create.

        Returns:
            Empty or null filled DataFrame.
        """
        return self.__constructor__(polars.DataFrame(schema=self.schema).clear(n=n))

    def collect_schema(self) -> dict[str, str]:
        """
        Collect the schema of the DataFrame.

        Returns:
            Dictionary of the schema.
        """
        return self.schema

    def fold(self, operation: callable) -> "Series":
        """
        Fold the DataFrame.

        Args:
            operation: Operation to fold the DataFrame with.

        Returns:
            Series with the folded DataFrame.
        """
        raise NotImplementedError("not yet")

    def hash_rows(
        self,
        seed: int = 0,
        seed_1: int | None = None,
        seed_2: int | None = None,
        seed_3: int | None = None,
    ) -> "Series":
        raise NotImplementedError("not yet")


================================================
FILE: modin/polars/groupby.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement GroupBy public API as pandas does."""

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from modin.polars import DataFrame


class GroupBy:

    def __init__(
        self,
        df: "DataFrame",
        *by,
        maintain_order: bool = False,
        **named_by,
    ) -> None:
        self.df = df
        if len(by) == 1:
            self.by = by[0]
        else:
            if all(isinstance(b, str) and b in self.df.columns for b in by):
                self.by = self.df[list(by)]._query_compiler
            elif all(isinstance(b, type(self._df._query_compiler)) for b in by):
                self.by = by
            else:
                raise NotImplementedError("not yet")
        self.named_by = named_by
        self.maintain_order = maintain_order

    def agg(self, *aggs, **named_aggs):
        raise NotImplementedError("not yet")

    def all(self):
        raise NotImplementedError("not yet")

    def map_groups(self, function) -> "DataFrame":
        raise NotImplementedError("not yet")

    apply = map_groups

    def count(self):
        return self.len(name="count")

    def first(self) -> "DataFrame":
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_first(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=True,
                ),
                agg_args=(),
                agg_kwargs={},
                drop=False,
            ).reset_index(drop=False)
        )

    def head(self, n: int = 5):
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_head(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=False,
                ),
                agg_args=(),
                agg_kwargs=dict(n=n),
                drop=False,
            )
        )

    def last(self) -> "DataFrame":
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_last(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=True,
                ),
                agg_args=(),
                agg_kwargs={},
                drop=False,
            ).reset_index(drop=False)
        )

    def len(self, name: str | None = None) -> "DataFrame":
        if name is None:
            name = "len"
        result = self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_size(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=False,
                ),
                agg_args=(),
                agg_kwargs={},
                drop=False,
            )
        )
        result._query_compiler.columns = [
            c if c != "size" else name for c in result.columns
        ]
        return result

    def max(self) -> "DataFrame":
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_max(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=False,
                ),
                agg_args=(),
                agg_kwargs={},
                drop=False,
            )
        )

    def mean(self) -> "DataFrame":
        # TODO: Non numeric columns are dropped, but in Polars they are converted to null
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_mean(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=True,
                ),
                agg_args=(),
                agg_kwargs=dict(numeric_only=True),
                drop=False,
            ).reset_index(drop=False)
        )

    def median(self) -> "DataFrame":
        # TODO: Non numeric columns are dropped, but in Polars they are converted to null
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_median(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=True,
                ),
                agg_args=(),
                agg_kwargs=dict(numeric_only=True),
                drop=False,
            ).reset_index(drop=False)
        )

    def min(self) -> "DataFrame":
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_min(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=False,
                ),
                agg_args=(),
                agg_kwargs={},
                drop=False,
            )
        )

    def n_unique(self) -> "DataFrame":
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_nunique(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=False,
                ),
                agg_args=(),
                agg_kwargs={},
                drop=False,
            )
        )

    def quantile(self, quantile: float, interpolation="nearest") -> "DataFrame":
        # TODO: Non numeric columns are dropped, but in Polars they are converted to null
        # TODO: interpolation types not yet supported
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_quantile(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=True,
                ),
                agg_args=(),
                agg_kwargs=dict(numeric_only=True, q=quantile),
                drop=False,
            ).reset_index(drop=False)
        )

    def sum(self) -> "DataFrame":
        # TODO: Non numeric columns are dropped, but in Polars they are converted to null
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_sum(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=True,
                ),
                agg_args=(),
                agg_kwargs=dict(numeric_only=True),
                drop=False,
            ).reset_index(drop=False)
        )

    def tail(self, n: int = 5):
        return self.df.__constructor__(
            _query_compiler=self.df._query_compiler.groupby_tail(
                self.by,
                axis=0,
                groupby_kwargs=dict(
                    sort=not self.maintain_order,
                    as_index=False,
                ),
                agg_args=(),
                agg_kwargs=dict(n=n),
                drop=False,
            )
        )


================================================
FILE: modin/polars/lazyframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from modin.polars.base import BasePolarsDataset


class LazyFrame(BasePolarsDataset):
    """
    Stub for Lazy Frame implementation.
    """

    pass


================================================
FILE: modin/polars/series.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Module houses `Series` class, that is distributed version of `polars.Series`."""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Sequence

import numpy as np
import pandas
import polars
from polars._utils.various import no_default

from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
from modin.error_message import ErrorMessage
from modin.pandas import Series as ModinPandasSeries
from modin.pandas.io import from_pandas
from modin.polars.base import BasePolarsDataset

if TYPE_CHECKING:
    from numpy.typing import ArrayLike
    from polars import PolarsDataType

    from modin.polars import DataFrame


class Series(BasePolarsDataset):
    def __init__(
        self,
        name: str | "ArrayLike" | None = None,
        values: "ArrayLike" | None = None,
        dtype: "PolarsDataType | None" = None,
        *,
        strict: "bool" = True,
        nan_to_null: "bool" = False,
        dtype_if_empty: "PolarsDataType" = polars.Null,
        _query_compiler: BaseQueryCompiler | None = None,
    ) -> None:
        if _query_compiler is None:
            if isinstance(values, ModinPandasSeries):
                self._query_compiler = values._query_compiler.copy()
            else:
                self._query_compiler: BaseQueryCompiler = from_pandas(
                    polars.Series(
                        name=name,
                        values=values,
                        dtype=dtype,
                        strict=strict,
                        nan_to_null=nan_to_null,
                        dtype_if_empty=dtype_if_empty,
                    )
                    .to_pandas()
                    .to_frame()
                )._query_compiler
        else:
            self._query_compiler: BaseQueryCompiler = _query_compiler

    def __repr__(self):
        return repr(
            polars.from_pandas(self._query_compiler.to_pandas().squeeze(axis=1))
        )

    _sorted = False
    _descending = None

    def to_pandas(self) -> ModinPandasSeries:
        return ModinPandasSeries(query_compiler=self._query_compiler)

    def arg_max(self) -> int:
        """
        Get the index of the maximum value.

        Returns:
            Index of the maximum value.
        """
        return self.to_pandas().argmax()

    def arg_min(self) -> int:
        """
        Get the index of the minimum value.

        Returns:
            Index of the minimum value.
        """
        return self.to_pandas().argmin()

    def implode(self) -> "Series":
        """
        Aggregate values into a list.

        Returns:
            Imploded Series.
        """
        raise NotImplementedError("not yet")

    def max(self) -> Any:
        """
        Get the maximum value.

        Returns:
            Maximum value.
        """
        return self.to_pandas().max()

    def min(self) -> Any:
        """
        Get the minimum value.

        Returns:
            Minimum value.
        """
        return self.to_pandas().min()

    def mean(self) -> Any:
        """
        Get the mean value.

        Returns:
            Mean value.
        """
        return self.to_pandas().mean()

    def median(self) -> Any:
        """
        Get the median value.

        Returns:
            Median value.
        """
        return self.to_pandas().median()

    def mode(self) -> Any:
        """
        Get the mode value.

        Returns:
            Mode value.
        """
        return self.to_pandas().mode()

    def nan_max(self) -> Any:
        """
        Get the maximum value, ignoring NaN values.

        Returns:
            Maximum value.
        """
        return self.to_pandas().max(skipna=True)

    def nan_min(self) -> Any:
        """
        Get the minimum value, ignoring NaN values.

        Returns:
            Minimum value.
        """
        return self.to_pandas().min(skipna=True)

    def product(self) -> Any:
        """
        Get the product of all values.

        Returns:
            Product of all values.
        """
        return self.to_pandas().product()

    def quantile(self, quantile: float, interpolation: str = "nearest") -> float | None:
        """
        Get the quantile value.

        Args:
            quantile: Quantile to calculate.
            interpolation: Interpolation method.

        Returns:
            Quantile value.
        """
        return self.to_pandas().quantile(quantile, interpolation=interpolation)

    def std(self, ddof: int = 1) -> float:
        """
        Get the standard deviation.

        Args:
            ddof: Delta Degrees of Freedom.

        Returns:
            Standard deviation.
        """
        return self.to_pandas().std(ddof=ddof)

    def sum(self) -> Any:
        """
        Get the sum of all values.

        Returns:
            Sum of all values.
        """
        return self.to_pandas().sum()

    def var(self, ddof: int = 1) -> float:
        """
        Get the variance.

        Args:
            ddof: Delta Degrees of Freedom.

        Returns:
            Variance.
        """
        return self.to_pandas().var(ddof=ddof)

    @property
    def arr(self) -> polars.series.array.ArrayNameSpace:
        """
        Get the underlying array.

        Returns:
            Underlying array.
        """
        return polars.from_pandas(self._query_compiler.to_pandas().squeeze(axis=1)).arr

    @property
    def dtype(self) -> polars.datatypes.DataType:
        """
        Get the data type.

        Returns:
            Data type.
        """
        return polars.from_pandas(
            pandas.Series().astype(self._query_compiler.dtypes.iloc[0])
        ).dtype

    @property
    def name(self) -> str:
        """
        Get the name.

        Returns:
            Name.
        """
        return self._query_compiler.columns[0]

    @property
    def shape(self) -> tuple[int]:
        """
        Get the shape.

        Returns:
            Shape.
        """
        return (len(self._query_compiler.index),)

    flags = []

    @property
    def bin(self):
        raise NotImplementedError("not yet")

    def all(self) -> bool:
        """
        Check if all values are True.

        Returns:
            True if all values are True, False otherwise.
        """
        return self.to_pandas().all()

    def any(self) -> bool:
        """
        Check if any value is True.

        Returns:
            True if any value is True, False otherwise.
        """
        return self.to_pandas().any()

    def not_(self) -> "Series":
        """
        Negate the values.

        Returns:
            Negated Series.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.invert())

    @property
    def cat(self):
        raise NotImplementedError("not yet")

    def abs(self) -> "Series":
        """
        Get the absolute values.

        Returns:
            Absolute values Series.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.abs())

    def arccos(self) -> "Series":
        """
        Get the arc cosine values.

        Returns:
            Arc cosine values Series.
        """
        raise NotImplementedError("not yet")

    def arccosh(self) -> "Series":
        """
        Get the hyperbolic arc cosine values.

        Returns:
            Hyperbolic arc cosine values Series.
        """
        raise NotImplementedError("not yet")

    def arcsin(self) -> "Series":
        """
        Get the arc sine values.

        Returns:
            Arc sine values Series.
        """
        raise NotImplementedError("not yet")

    def arcsinh(self) -> "Series":
        """
        Get the hyperbolic arc sine values.

        Returns:
            Hyperbolic arc sine values Series.
        """
        raise NotImplementedError("not yet")

    def arctan(self) -> "Series":
        """
        Get the arc tangent values.

        Returns:
            Arc tangent values Series.
        """
        raise NotImplementedError("not yet")

    def arctanh(self) -> "Series":
        """
        Get the hyperbolic arc tangent values.

        Returns:
            Hyperbolic arc tangent values Series.
        """
        raise NotImplementedError("not yet")

    def arg_true(self) -> "Series":
        """
        Get the index of the first True value.

        Returns:
            Index of the first True value.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.reset_index(drop=False)
            .getitem_array(self._query_compiler)
            .getitem_column_array(0, numeric=True)
        ).rename(self.name)

    def arg_unique(self) -> "Series":
        """
        Get the index of the first unique value.

        Returns:
            Index of the first unique value.
        """
        raise NotImplementedError("not yet")

    def cbrt(self) -> "Series":
        """
        Get the cube root values.

        Returns:
            Cube root values Series.
        """
        raise NotImplementedError("not yet")

    def cos(self) -> "Series":
        """
        Get the cosine values.

        Returns:
            Cosine values Series.
        """
        raise NotImplementedError("not yet")

    def cosh(self) -> "Series":
        """
        Get the hyperbolic cosine values.

        Returns:
            Hyperbolic cosine values Series.
        """
        raise NotImplementedError("not yet")

    def cot(self) -> "Series":
        """
        Get the cotangent values.

        Returns:
            Cotangent values Series.
        """
        raise NotImplementedError("not yet")

    def cum_count(self) -> "Series":
        """
        Get the cumulative count values.

        Returns:
            Cumulative count values Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.isna().cumsum()
        )

    def cum_max(self) -> "Series":
        """
        Get the cumulative maximum values.

        Returns:
            Cumulative maximum values Series.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.cummax())

    def cum_min(self) -> "Series":
        """
        Get the cumulative minimum values.

        Returns:
            Cumulative minimum values Series.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.cummin())

    def cum_prod(self) -> "Series":
        """
        Get the cumulative product values.

        Returns:
            Cumulative product values Series.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.cumprod())

    def cum_sum(self) -> "Series":
        """
        Get the cumulative sum values.

        Returns:
            Cumulative sum values Series.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.cumsum())

    def cumulative_eval(
        self, expr, min_periods: int = 1, *, parallel: bool = False
    ) -> "Series":
        """
        Get the cumulative evaluation values.

        Args:
            expr: Expression to evaluate.
            min_periods: Minimum number of periods.

        Returns:
            Cumulative evaluation values Series.
        """
        raise NotImplementedError("not yet")

    def diff(self, n: int = 1, null_behavior: str = "ignore") -> "Series":
        """
        Calculate the first discrete difference between shifted items.

        Args:
            n: Number of periods to shift.
            null_behavior: Null behavior.

        Returns:
            Difference values Series.
        """
        raise NotImplementedError("not yet")

    def dot(self, other) -> int | float | None:
        """
        Calculate the dot product.

        Args:
            other: Other Series.

        Returns:
            Dot product.
        """
        if isinstance(other, Series):
            other = other.to_pandas()
        return self.to_pandas().dot(other)

    def entropy(
        self, base: float = 2.718281828459045, *, normalize: bool = False
    ) -> float:
        """
        Calculate the entropy.

        Args:
            base: Logarithm base.
            normalize: Normalize the entropy.

        Returns:
            Entropy.
        """
        raise NotImplementedError("not yet")

    def ewm_mean(
        self,
        com: int | None = None,
        span: int | None = None,
        half_life: int | None = None,
        alpha: float | None = None,
        *,
        adjust: bool = True,
        min_periods: int = 1,
        ignore_nulls: bool | None = None,
    ) -> "Series":
        """
        Calculate the exponential weighted mean.

        Args:
            com: Center of mass.
            span: Span.

        Returns:
            Exponential weighted mean Series.
        """
        return self.__constructor__(
            self.to_pandas()
            .ewm(
                com=com,
                span=span,
                halflife=half_life,
                alpha=alpha,
                adjust=adjust,
                min_periods=min_periods,
                ignore_na=ignore_nulls,
            )
            .mean()
        )

    def ewm_mean_by(self, by, *, half_life: int | None = None) -> "Series":
        """
        Calculate the exponential weighted mean by group.

        Args:
            by: Grouping Series.

        Returns:
            Exponential weighted mean Series.
        """
        raise NotImplementedError("not yet")

    def ewm_std(
        self,
        com: int | None = None,
        span: int | None = None,
        half_life: int | None = None,
        alpha: float | None = None,
        *,
        adjust: bool = True,
        min_periods: int = 1,
        ignore_nulls: bool | None = None,
    ) -> "Series":
        """
        Calculate the exponential weighted standard deviation.

        Args:
            com: Center of mass.
            span: Span.

        Returns:
            Exponential weighted standard deviation Series.
        """
        return self.__constructor__(
            self.to_pandas()
            .ewm(
                com=com,
                span=span,
                halflife=half_life,
                alpha=alpha,
                adjust=adjust,
                min_periods=min_periods,
                ignore_na=ignore_nulls,
            )
            .std()
        )

    def ewm_var(
        self,
        com: int | None = None,
        span: int | None = None,
        half_life: int | None = None,
        alpha: float | None = None,
        *,
        adjust: bool = True,
        min_periods: int = 1,
        ignore_nulls: bool | None = None,
    ) -> "Series":
        """
        Calculate the exponential weighted variance.

        Args:
            com: Center of mass.
            span: Span.

        Returns:
            Exponential weighted variance Series.
        """
        return self.__constructor__(
            self.to_pandas()
            .ewm(
                com=com,
                span=span,
                halflife=half_life,
                alpha=alpha,
                adjust=adjust,
                min_periods=min_periods,
                ignore_na=ignore_nulls,
            )
            .var()
        )

    def exp(self) -> "Series":
        """
        Calculate the exponential values.

        Returns:
            Exponential values Series.
        """
        return self.__constructor__(self.to_pandas().exp())

    def hash(
        self,
        seed: int = 0,
        seed_1: int | None = None,
        seed_2: int | None = None,
        seed_3: int | None = None,
    ) -> "Series":
        """
        Calculate the hash values.

        Args:
            seed: Seed.
            seed_1: Seed 1.
            seed_2: Seed 2.
            seed_3: Seed 3.

        Returns:
            Hash values Series.
        """
        raise NotImplementedError("not yet")

    def hist(
        self,
        bins: list[float] | None = None,
        *,
        bin_count: int | None = None,
        include_category: bool = True,
        include_breakpoint: bool = True,
    ) -> "Series":
        """
        Calculate the histogram.

        Args:
            bins: Bins.
            bin_count: Bin count.

        Returns:
            Histogram Series.
        """
        raise NotImplementedError("not yet")

    def is_between(self, lower_bound, upper_bound, closed: str = "both") -> "Series":
        """
        Check if values are between the bounds.

        Args:
            lower_bound: Lower bound.
            upper_bound: Upper bound.
            closed: Closed bounds.

        Returns:
            Boolean Series.
        """
        raise NotImplementedError("not yet")

    def kurtosis(self, *, fisher: bool = True, bias: bool = True) -> float | None:
        """
        Calculate the kurtosis.

        Args:
            fisher: Fisher method.
            bias: Bias method.

        Returns:
            Kurtosis.
        """
        return self.to_pandas().kurtosis(fisher=fisher, bias=bias)

    def log(self, base: float = 2.718281828459045) -> "Series":
        """
        Calculate the logarithm values.

        Args:
            base: Logarithm base.

        Returns:
            Logarithm values Series.
        """
        raise NotImplementedError("not yet")

    def log10(self) -> "Series":
        """
        Calculate the base 10 logarithm values.

        Returns:
            Base 10 logarithm values Series.
        """
        return self.log(10)

    def log1p(self) -> "Series":
        """
        Calculate the natural logarithm of 1 plus the values.

        Returns:
            Natural logarithm of 1 plus the values Series.
        """
        raise NotImplementedError("not yet")

    def replace(
        self,
        mapping: dict[Any, Any],
        *,
        default: Any = None,
        return_dtype=None,
    ) -> "Series":
        """
        Map values to other values.

        Args:
            mapping: Mapping.

        Returns:
            Mapped Series.
        """
        return self.__constructor__(
            self.to_pandas().apply(lambda x: mapping.get(x, default))
        )

    def pct_change(self, n: int = 1) -> "Series":
        """
        Calculate the percentage change.

        Args:
            n: Number of periods to shift.

        Returns:
            Percentage change Series.
        """
        return self.__constructor__(self.to_pandas().pct_change(n))

    def peak_max(self) -> "Series":
        """
        Get the peak maximum values.

        Returns:
            Peak maximum values Series.
        """
        return self.__eq__(self.max())

    def peak_min(self) -> "Series":
        """
        Get the peak minimum values.

        Returns:
            Peak minimum values Series.
        """
        return self.__eq__(self.min())

    def rank(
        self,
        method: str = "average",
        *,
        descending: bool = False,
        seed: int | None = None,
    ) -> "Series":
        """
        Calculate the rank.

        Args:
            method: Rank method.

        Returns:
            Rank Series.
        """
        # TODO: support seed
        if method not in ["average", "min", "max", "first", "dense"]:
            raise ValueError(f"method {method} not supported")
        return self.__constructor__(
            self.to_pandas().rank(method=method, ascending=not descending)
        )

    def rolling_map(
        self,
        function: callable,
        window_size: int,
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
    ) -> "Series":
        """
        Apply a rolling function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .apply(function)
        )

    def rolling_max(
        self,
        window_size: int,
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
    ) -> "Series":
        """
        Apply a rolling maximum function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .max()
        )

    def rolling_mean(
        self,
        window_size: int,
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
    ) -> "Series":
        """
        Apply a rolling mean function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .mean()
        )

    def rolling_median(
        self,
        window_size: int,
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
    ) -> "Series":
        """
        Apply a rolling median function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .median()
        )

    def rolling_min(
        self,
        window_size: int,
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
    ) -> "Series":
        """
        Apply a rolling minimum function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .min()
        )

    def rolling_quantile(
        self,
        window_size: int,
        quantile: float,
        interpolation: str = "nearest",
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
    ) -> "Series":
        """
        Apply a rolling quantile function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .quantile(quantile, interpolation=interpolation)
        )

    def rolling_skew(self, window_size: int, *, bias: bool = False) -> "Series":
        """
        Apply a rolling skewness function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        return self.__constructor__(self.to_pandas().rolling(window=window_size).skew())

    def rolling_std(
        self,
        window_size: int,
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
        ddof: int = 1,
    ) -> "Series":
        """
        Apply a rolling standard deviation function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .std(ddof=ddof)
        )

    def rolling_sum(
        self,
        window_size: int,
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
    ) -> "Series":
        """
        Apply a rolling sum function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .sum()
        )

    def rolling_var(
        self,
        window_size: int,
        weights: list[float] | None = None,
        min_periods: int = 1,
        *,
        center: bool = False,
        ddof: int = 1,
    ) -> "Series":
        """
        Apply a rolling variance function.

        Args:
            function: Function to apply.
            window_size: Window size.

        Returns:
            Applied Series.
        """
        if weights is not None:
            raise NotImplementedError("not yet")
        return self.__constructor__(
            self.to_pandas()
            .rolling(window=window_size, min_periods=min_periods, center=center)
            .var(ddof=ddof)
        )

    def search_sorted(self, element, side: str = "any") -> int | "Series":
        """
        Search for the element in the sorted Series.

        Args:
            element: Element to search.
            side: Side to search.

        Returns:
            Index of the element.
        """
        if side == "any":
            side = "left"
        return self.__constructor__(self.to_pandas().searchsorted(element, side=side))

    def sign(self) -> "Series":
        """
        Get the sign values.

        Returns:
            Sign values Series.
        """
        return self.__lt__(0).__mul__(-1).__add__(self.__gt__(0))

    def sin(self) -> "Series":
        """
        Get the sine values.

        Returns:
            Sine values Series.
        """
        raise NotImplementedError("not yet")

    def sinh(self) -> "Series":
        """
        Get the hyperbolic sine values.

        Returns:
            Hyperbolic sine values Series.
        """
        raise NotImplementedError("not yet")

    def skew(self, *, bias: bool = True) -> float:
        """
        Calculate the skewness.

        Args:
            bias: Bias method.

        Returns:
            Skewness.
        """
        return self.to_pandas().skew()

    def sqrt(self) -> "Series":
        """
        Get the square root values.

        Returns:
            Square root values Series.
        """
        return self.__constructor__(self.to_pandas().sqrt())

    def tan(self) -> "Series":
        """
        Get the tangent values.

        Returns:
            Tangent values Series.
        """
        raise NotImplementedError("not yet")

    def tanh(self) -> "Series":
        """
        Get the hyperbolic tangent values.

        Returns:
            Hyperbolic tangent values Series.
        """
        raise NotImplementedError("not yet")

    def chunk_lengths(self) -> list[int]:
        """
        Get the chunk lengths.

        Returns:
            Chunk lengths.
        """
        raise NotImplementedError("not yet")

    def describe(
        self,
        percentiles: Sequence[float] | float | None = (0.25, 0.5, 0.75),
        interpolation: str = "nearest",
    ):
        """
        Generate descriptive statistics.

        Args:
            percentiles: Percentiles to calculate.

        Returns:
            Descriptive statistics.
        """
        return self.to_pandas().describe(percentiles=percentiles)

    def estimated_size(self) -> int:
        """
        Get the estimated size.

        Returns:
            Estimated size.
        """
        return self.to_pandas().memory_usage(index=False)

    def has_nulls(self) -> bool:
        """
        Check if there are null values.

        Returns:
            True if there are null values, False otherwise.
        """
        return self.to_pandas().isnull().any()

    has_validity = has_nulls

    def is_finite(self) -> "Series":
        """
        Check if the values are finite.

        Returns:
            True if the values are finite, False otherwise.
        """
        return self.__ne__(np.inf)

    def is_first_distinct(self) -> "Series":
        """
        Check if the values are the first occurrence.

        Returns:
            True if the values are the first occurrence, False otherwise.
        """
        raise NotImplementedError("not yet")

    def is_in(self, other: "Series" | list[Any]) -> "Series":
        """
        Check if the values are in the other Series.

        Args:
            other: Other Series.

        Returns:
            True if the values are in the other Series, False otherwise.
        """
        return self.__constructor__(self.to_pandas().isin(other))

    def is_infinite(self) -> "Series":
        """
        Check if the values are infinite.

        Returns:
            True if the values are infinite, False otherwise.
        """
        return self.__eq__(np.inf)

    def is_last_distinct(self) -> "Series":
        """
        Check if the values are the last occurrence.

        Returns:
            True if the values are the last occurrence, False otherwise.
        """
        raise NotImplementedError("not yet")

    def is_nan(self) -> "Series":
        """
        Check if the values are NaN.

        Returns:
            True if the values are NaN, False otherwise.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.isna())

    def is_not_nan(self) -> "Series":
        """
        Check if the values are not NaN.

        Returns:
            True if the values are not NaN, False otherwise.
        """
        return self.__constructor__(_query_compiler=self._query_compiler.notna())

    def is_not_null(self) -> "Series":
        """
        Check if the values are not null.

        Returns:
            True if the values are not null, False otherwise.
        """
        return self.is_not_nan()

    def is_null(self) -> "Series":
        """
        Check if the values are null.

        Returns:
            True if the values are null, False otherwise.
        """
        return self.is_nan()

    def is_sorted(
        self,
        *,
        descending: bool = False,
        nulls_last: bool = False,
    ) -> bool:
        """
        Check if the values are sorted.

        Args:
            descending: Descending order.

        Returns:
            True if the values are sorted, False otherwise.
        """
        return (
            self.to_pandas().is_monotonic_increasing
            if not descending
            else self.to_pandas().is_monotonic_decreasing
        )

    def len(self) -> int:
        """
        Get the length of the values.

        Returns:
            Length of the values Series.
        """
        return len(self.to_pandas())

    def lower_bound(self) -> "Series":
        """
        Get the lower bound values.

        Returns:
            Lower bound values Series.
        """
        raise NotImplementedError("not yet")

    def null_count(self) -> int:
        """
        Get the number of null values.

        Returns:
            Number of null values.
        """
        return self.to_pandas().isnull().sum()

    def unique_counts(self) -> "Series":
        """
        Get the unique counts.

        Returns:
            Unique counts.
        """
        return self.__constructor__(values=self.to_pandas().value_counts())

    def upper_bound(self) -> "Series":
        """
        Get the upper bound values.

        Returns:
            Upper bound values Series.
        """
        raise NotImplementedError("not yet")

    def value_counts(
        self, *, sort: bool = False, parallel: bool = False, name: str = "count"
    ) -> "DataFrame":
        """
        Get the value counts.

        Returns:
            Value counts.
        """
        from modin.polars import DataFrame

        return DataFrame(
            self.to_pandas().value_counts(sort=sort).reset_index(drop=False, names=name)
        )

    def to_frame(self, name: str | None = None) -> "DataFrame":
        """
        Convert the Series to a DataFrame.

        Args:
            name: Name of the Series.

        Returns:
            DataFrame representation of the Series.
        """
        from modin.polars import DataFrame

        return DataFrame(_query_compiler=self._query_compiler).rename({self.name: name})

    def to_init_repr(self, n: int = 1000) -> str:
        """
        Convert Series to instantiatable string representation.

        Args:
            n: First n elements.

        Returns:
            Instantiatable string representation.
        """
        return polars.from_pandas(
            self.slice(0, n)._query_compiler.to_pandas()
        ).to_init_repr()

    @property
    def list(self):
        # TODO: implement list object
        #  https://docs.pola.rs/api/python/stable/reference/series/list.html
        raise NotImplementedError("not yet")

    def alias(self, name: str) -> "Series":
        """
        Rename the Series.

        Args:
            name: New name.

        Returns:
            Renamed Series.
        """
        return self.to_frame(name).to_series()

    def append(self, other: "Series") -> "Series":
        """
        Append another Series.

        Args:
            other: Other Series.

        Returns:
            Appended Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.concat(0, other._query_compiler)
        )

    def arg_sort(
        self, *, descending: bool = False, nulls_last: bool = False
    ) -> "Series":
        """
        Get the sorted indices.

        Args:
            descending: Descending order.

        Returns:
            Sorted indices Series.
        """
        # TODO: implement nulls_last
        result = self.__constructor__(values=self.to_pandas().argsort())
        if descending:
            return result.reverse()
        else:
            return result

    def ceil(self) -> "Series":
        """
        Get the ceiling values.

        Returns:
            Ceiling values Series.
        """
        raise NotImplementedError("not yet")

    def clear(self, n: int = 0) -> "Series":
        """
        Create an empty copy of the current Series, with zero to ‘n’ elements.

        Args:
            n: Number of elements.

        Returns:
            Series will n nulls.
        """
        raise NotImplementedError("not yet")

    def clip(self, lower_bound=None, upper_bound=None) -> "Series":
        """
        Clip the values.

        Args:
            lower_bound: Lower bound.
            upper_bound: Upper bound.

        Returns:
            Clipped values Series.
        """
        return self.__constructor__(
            values=self.to_pandas().clip(lower_bound, upper_bound)
        )

    def cut(
        self,
        breaks: Sequence[float],
        *,
        labels: list[str] | None = None,
        break_point_label: str = "breakpoint",
        left_closed: bool = False,
        include_breaks: bool = False,
        as_series: bool = True,
    ) -> "BasePolarsDataset":
        raise NotImplementedError("not yet")

    def extend_constant(self, value) -> "Series":
        """
        Extend the Series with a constant value.

        Args:
            value: Constant value.

        Returns:
            Extended Series.
        """
        raise NotImplementedError("not yet")

    def floor(self) -> "BasePolarsDataset":
        return self.__floordiv__(1)

    def gather(self, indices) -> "Series":
        """
        Gather values by indices.

        Args:
            indices: Indices.

        Returns:
            Gathered Series.
        """
        return self.__constructor__(
            values=self.to_pandas().iloc[
                (
                    indices._query_compiler
                    if hasattr(indices, "_query_compiler")
                    else indices
                )
            ]
        )

    def interpolate_by(self, by) -> "Series":
        """
        Interpolate values by group.

        Args:
            by: Grouping Series.

        Returns:
            Interpolated Series.
        """
        raise NotImplementedError("not yet")

    def item(self, index: int | None = None) -> Any:
        """
        Get the item at the index.

        Args:
            index: Index.

        Returns:
            Item at the index.
        """
        return self.to_pandas().iloc[index]

    def new_from_index(self, index: int, length: int) -> "Series":
        """
        Create a new Series from the index.

        Args:
            index: Index.
            length: Length.

        Returns:
            New Series.
        """
        raise NotImplementedError("not yet")

    def qcut(
        self,
        quantiles: Sequence[float] | int,
        *,
        labels: Sequence[str] | None = None,
        left_closed: bool = False,
        allow_duplicates: bool = False,
        include_breaks: bool = False,
        break_point_label: str = "breakpoint",
        category_labels: str = "category",
        as_series: bool = True,
    ) -> "Series" | "DataFrame":
        """
        Bin continuous values into discrete categories based on quantiles.

        Args:
            quantiles: Number of quantiles or sequence of quantiles.
            labels: Labels for the resulting bins.
            left_closed: Whether the intervals are left-closed.
            allow_duplicates: Whether to allow duplicate intervals.
            include_breaks: Whether to include the breaks in the result.
            break_point_label: Label for the break points.
            category_labels: Label for the categories.
            as_series: Whether to return a Series.

        Returns:
            Binned Series.
        """
        raise NotImplementedError("not yet")

    def rechunk(self, *, in_place: bool = False) -> "Series":
        """
        Rechunk the Series.

        Args:
            in_place: In-place operation.

        Returns:
            Rechunked Series.
        """
        raise NotImplementedError("not yet")

    rename = alias

    def reshape(self, dimensions, nested_type) -> "Series":
        """
        Reshape the Series.

        Args:
            dimensions: Dimensions.
            nested_type: Nested type.

        Returns:
            Reshaped Series.
        """
        raise NotImplementedError("not yet")

    def reverse(self) -> "Series":
        """
        Reverse the Series.

        Returns:
            Reversed Series.
        """
        return self.__constructor__(values=self.to_pandas().iloc[::-1])

    def rle(self) -> "Series":
        """
        Run-length encode the Series.

        Returns:
            Run-length encoded Series.
        """
        raise NotImplementedError("not yet")

    def rle_id(self) -> "Series":
        """
        Run-length encode the Series with IDs.

        Returns:
            Run-length encoded Series with IDs.
        """
        raise NotImplementedError("not yet")

    def round(self, decimals: int = 0) -> "Series":
        """
        Round the values.

        Args:
            decimals: Number of decimals.

        Returns:
            Rounded values Series.
        """
        return self.__constructor__(values=self.to_pandas().round(decimals))

    def round_sig_figs(self, digits: int) -> "Series":
        """
        Round the values to significant figures.

        Args:
            digits: Number of significant figures.

        Returns:
            Rounded values Series.
        """
        raise NotImplementedError("not yet")

    def scatter(self, indices, values) -> "Series":
        """
        Scatter values by indices.

        Args:
            indices: Indices.
            values: Values.

        Returns:
            Scattered Series.
        """
        raise NotImplementedError("not yet")

    def set(self, filter: "Series", value: int | float | str | bool | None) -> "Series":
        """
        Set values by filter.

        Args:
            filter: Filter.
            value: Value.

        Returns:
            Set Series.
        """
        raise NotImplementedError("not yet")

    def shrink_dtype(self) -> "Series":
        """
        Shrink the data type.

        Returns:
            Shrunk Series.
        """
        raise NotImplementedError("not yet")

    def shuffle(self, seed: int | None = None) -> "Series":
        """
        Shuffle the Series.

        Args:
            seed: Seed.

        Returns:
            Shuffled Series.
        """
        raise NotImplementedError("not yet")

    def zip_with(self, mask: "Series", other: "Series") -> "Series":
        """
        Zip the Series with another Series.

        Args:
            mask: Mask Series.
            other: Other Series.

        Returns:
            Zipped Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.where(
                mask._query_compiler, other._query_compiler
            )
        )

    def map_elements(
        self,
        function: callable,
        return_dtype=None,
        *,
        skip_nulls: bool = True,
    ) -> "Series":
        """
        Map the elements.

        Args:
            function: Function to apply.

        Returns:
            Mapped Series.
        """
        if return_dtype is not None or skip_nulls is False:
            ErrorMessage.warn(
                "`return_dtype` and `skip_nulls=False` are not supported yet"
            )
        return self.__constructor__(values=self.to_pandas().apply(function))

    def reinterpret(self, *, signed: bool = True) -> "Series":
        """
        Reinterpret the data type of the series as signed or unsigned.

        Args:
            signed: If True, reinterpret as signed, otherwise as unsigned.

        Returns:
            Reinterpreted Series.
        """
        raise NotImplementedError("not yet")

    def set_sorted(self, *, descending: bool = False) -> "Series":
        """
        Set the Series as sorted.

        Args:
            descending: Descending order.

        Returns:
            Sorted Series.
        """
        self._sorted = True
        self._descending = descending
        return self

    def to_physical(self) -> "Series":
        """
        Convert the Series to physical.

        Returns:
            Physical Series.
        """
        raise NotImplementedError("not yet")

    def get_chunks(self) -> list["Series"]:
        """
        Get the chunks.

        Returns:
            Chunks.
        """
        raise NotImplementedError("not yet")

    @property
    def str(self):
        # TODO: implement str object
        #  https://docs.pola.rs/api/python/stable/reference/series/string.html
        raise NotImplementedError("not yet")

    @property
    def struct(self):
        # TODO: implement struct object
        #  https://docs.pola.rs/api/python/stable/reference/series/struct.html
        raise NotImplementedError("not yet")

    @property
    def dt(self):
        # TODO: implement dt object
        #  https://docs.pola.rs/api/python/stable/reference/series/temporal.html
        raise NotImplementedError("not yet")

    def __len__(self) -> int:
        """
        Get the length of the Series.
        """
        return self.len()

    def __matmul__(self, other) -> "Series":
        """
        Matrix multiplication.

        Args:
            other: Other Series.

        Returns:
            Matrix multiplication Series.
        """
        raise NotImplementedError("not yet")

    def __radd__(self, other) -> "Series":
        """
        Right addition.

        Args:
            other: Other Series.

        Returns:
            Added Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.radd(other, axis=0)
        )

    def __rand__(self, other) -> "Series":
        """
        Right and.

        Args:
            other: Other Series.

        Returns:
            And Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.__rand__(other, axis=0)
        )

    def __rfloordiv__(self, other) -> "Series":
        """
        Right floor division.

        Args:
            other: Other Series.

        Returns:
            Floored Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.rfloordiv(other, axis=0)
        )

    def __rmatmul__(self, other) -> "Series":
        """
        Right matrix multiplication.

        Args:
            other: Other Series.

        Returns:
            Matrix multiplication Series.
        """
        raise NotImplementedError("not yet")

    def __rmod__(self, other) -> "Series":
        """
        Right modulo.

        Args:
            other: Other Series.

        Returns:
            Modulo Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.rmod(other, axis=0)
        )

    def __rmul__(self, other) -> "Series":
        """
        Right multiplication.

        Args:
            other: Other Series.

        Returns:
            Multiplied Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.rmul(other, axis=0)
        )

    def __ror__(self, other) -> "Series":
        """
        Right or.

        Args:
            other: Other Series.

        Returns:
            Or Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.__ror__(other, axis=0)
        )

    def __rpow__(self, other) -> "Series":
        """
        Right power.

        Args:
            other: Other Series.

        Returns:
            Powered Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.rpow(other, axis=0)
        )

    def __rsub__(self, other) -> "Series":
        """
        Right subtraction.

        Args:
            other: Other Series.

        Returns:
            Subtracted Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.rsub(other, axis=0)
        )

    def __rtruediv__(self, other) -> "Series":
        """
        Right true division.

        Args:
            other: Other Series.

        Returns:
            Divided Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.rtruediv(other, axis=0)
        )

    def __rxor__(self, other) -> "Series":
        """
        Right xor.

        Args:
            other: Other Series.

        Returns:
            Xor Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.__rxor__(other, axis=0)
        )

    def eq(self, other) -> "Series":
        """
        Check if the values are equal to the other Series.

        Args:
            other: Other Series.

        Returns:
            Boolean Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.eq(other._query_compiler)
        )

    def eq_missing(self, other) -> "Series":
        """
        Check if the values are equal to the other Series, including missing values.

        Args:
            other: Other Series.

        Returns:
            Boolean Series.
        """
        raise NotImplementedError("not yet")

    def ge(self, other) -> "Series":
        """
        Check if the values are greater than or equal to the other Series.

        Args:
            other: Other Series.

        Returns:
            Boolean Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.ge(other._query_compiler)
        )

    def gt(self, other) -> "Series":
        """
        Check if the values are greater than the other Series.

        Args:
            other: Other Series.

        Returns:
            Boolean Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.gt(other._query_compiler)
        )

    def le(self, other) -> "Series":
        """
        Check if the values are less than or equal to the other Series.

        Args:
            other: Other Series.

        Returns:
            Boolean Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.le(other._query_compiler)
        )

    def lt(self, other) -> "Series":
        """
        Check if the values are less than the other Series.

        Args:
            other: Other Series.

        Returns:
            Boolean Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.lt(other._query_compiler)
        )

    def n_unique(self) -> int:
        """
        Get the number of unique values.

        Returns:
            Number of unique values.
        """
        return self._query_compiler.nunique().to_pandas().squeeze(axis=None)

    def ne(self, other) -> "Series":
        """
        Check if the values are not equal to the other Series.

        Args:
            other: Other Series.

        Returns:
            Boolean Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.ne(other._query_compiler)
        )

    def ne_missing(self, other) -> "Series":
        """
        Check if the values are not equal to the other Series, including missing values.

        Args:
            other: Other Series.

        Returns:
            Boolean Series.
        """
        raise NotImplementedError("not yet")

    def pow(self, exponent) -> "Series":
        """
        Raise the values to the power of the exponent.

        Args:
            exponent: Exponent.

        Returns:
            Powered Series.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.pow(exponent, axis=0)
        )

    def replace_strict(
        self, old, new=no_default, *, default=no_default, return_dtype=None
    ) -> "Series":
        """
        Replace values strictly.

        Args:
            old: Old values.
            new: New values.
            default: Default value.

        Returns:
            Replaced Series.
        """
        raise NotImplementedError("not yet")

    def to_list(self) -> list:
        """
        Convert the Series to a list.

        Returns:
            List representation of the Series.
        """
        return self._to_polars().tolist()

    def drop_nans(self) -> "Series":
        """
        Drop NaN values.

        Returns:
            Series without NaN values.
        """
        return self.__constructor__(
            _query_compiler=self._query_compiler.dropna(how="any")
        )


================================================
FILE: modin/tests/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/config/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/config/docs_module/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from .classes import BasePandasDataset, DataFrame, Series
from .functions import read_csv

__all__ = ["BasePandasDataset", "DataFrame", "Series", "read_csv"]


================================================
FILE: modin/tests/config/docs_module/classes.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


class DataFrame:
    def apply(self):
        """This is a test of the documentation module for DataFrame."""
        return


class Series:
    def isna(self):
        """This is a test of the documentation module for Series."""
        return


class BasePandasDataset:
    """This is a test of the documentation module for BasePandasDataSet."""

    def apply():
        """This is a test of the documentation module for BasePandasDataSet.apply."""
        return

    def astype():
        """This is a test of the documentation module for BasePandasDataSet.astype."""


================================================
FILE: modin/tests/config/docs_module/functions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


def read_csv():
    """Test override for functions on the module."""
    return


================================================
FILE: modin/tests/config/docs_module_with_just_base/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from .classes import BasePandasDataset

__all__ = ["BasePandasDataset"]


================================================
FILE: modin/tests/config/docs_module_with_just_base/classes.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


class BasePandasDataset:
    def astype():
        """This is a test of the documentation module for BasePandasDataSet.astype."""


================================================
FILE: modin/tests/config/test_envvars.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import itertools
import os
import re
import sys
import unittest.mock as mock
from unittest.mock import Mock, patch

import pandas
import pytest
from pytest import param

import modin.config as cfg
import modin.pandas as pd
from modin.config.envvars import _check_vars
from modin.config.pubsub import _UNSET, ExactStr, ValueSource
from modin.pandas.base import BasePandasDataset
from modin.tests.pandas.utils import switch_execution

################# WARNING #####################################################
# Test cases in this file affect global state, e.g. by setting environment
# variables. The test cases may produce unexpected results when repeated on run
# out of the order they are defined in. Be careful when running the test
# locally or when adding new test cases. In particular, note:
#   - test_ray_cluster_resources() causes us to permanently attach the
#     `_initialize_engine` subscriber to Engine: https://github.com/modin-project/modin/blob/6252ebde19935bd1f6a6850209bf8a1f5e5ecfb7/modin/core/execution/dispatching/factories/dispatcher.py#L115
#     Changing to any engine after that test runs will cause Modin to try to
#     initialize the engine.
#   - In CI, we only run these tests with Ray execution, in the
#     `test-internal` job.
#   - test_wrong_values() permanently messes up some config variables. For more
#     details see https://github.com/modin-project/modin/issues/7454
################# WARNING ######################

UNIDIST_SKIP_REASON = (
    "Switching to unidist causes an error since we have to execute unidist "
    + "tests differently, with `mpiexec` instead of just `pytest`"
)


@pytest.fixture
def clear_backend_execution_and_storage_format(monkeypatch):
    """
    Reset environment variables and config classes for backend, execution, and storage format.

    Parameters
    ----------
    *vars : tuple[Parameter]
    """
    for variable in (cfg.Backend, cfg.StorageFormat, cfg.Engine):
        monkeypatch.setattr(variable, "_value", _UNSET)
        monkeypatch.setattr(variable, "_value_source", ValueSource.DEFAULT)
        monkeypatch.delitem(os.environ, variable.varname, raising=False)


@pytest.fixture
def make_unknown_env():
    varname = "MODIN_UNKNOWN"
    os.environ[varname] = "foo"
    yield varname
    del os.environ[varname]


@pytest.fixture(params=[str, ExactStr])
def make_custom_envvar(request):
    class CustomVar(cfg.EnvironmentVariable, type=request.param):
        """custom var"""

        default = 10
        varname = "MODIN_CUSTOM"
        choices = (1, 5, 10)

    return CustomVar


@pytest.fixture(scope="session")
def add_pandas_duplicate_on_ray_execution():
    """
    Add an execution mode with the storage format Test_Pandasduplicate and engine Ray.

    This mode's execution is equivalent to PandasOnRay execution.
    """
    cfg.StorageFormat.add_option("Test_Pandasduplicate")
    from modin.core.execution.dispatching.factories import factories

    factories.Test_PandasduplicateOnRayFactory = factories.PandasOnRayFactory
    cfg.Backend.register_backend(
        "Test_Backend_1",
        cfg.Execution(
            storage_format="Test_Pandasduplicate",
            engine="Ray",
        ),
    )


@pytest.fixture
def set_custom_envvar(make_custom_envvar):
    os.environ[make_custom_envvar.varname] = "  custom  "
    yield "Custom" if make_custom_envvar.type is str else "  custom  "
    del os.environ[make_custom_envvar.varname]


def test_unknown(make_unknown_env):
    with pytest.warns(UserWarning, match=f"Found unknown .*{make_unknown_env}.*"):
        _check_vars()


def test_custom_default(make_custom_envvar):
    assert make_custom_envvar.get() == 10


def test_custom_set(make_custom_envvar, set_custom_envvar):
    assert make_custom_envvar.get() == set_custom_envvar


def test_custom_help(make_custom_envvar):
    assert "MODIN_CUSTOM" in make_custom_envvar.get_help()
    assert "custom var" in make_custom_envvar.get_help()


class TestDocModule:
    """
    Test using a module to replace default docstrings.
    """

    def test_overrides(self):
        cfg.DocModule.put("modin.tests.config.docs_module")

        # Test for override
        assert BasePandasDataset.__doc__ == (
            "This is a test of the documentation module for BasePandasDataSet."
        )
        assert BasePandasDataset.apply.__doc__ == (
            "This is a test of the documentation module for BasePandasDataSet.apply."
        )
        # Test scenario 2 from https://github.com/modin-project/modin/issues/7113:
        # We can correctly override the docstring for BasePandasDataset.astype,
        # which is the same method (modulo some wrapping that we add to handle
        # extensions) as Series.astype.
        assert (
            pd.Series.astype.__wrapped__.__wrapped__
            is BasePandasDataset.astype.__wrapped__
        )
        assert BasePandasDataset.astype.__doc__ == (
            "This is a test of the documentation module for BasePandasDataSet.astype."
        )
        assert (
            pd.DataFrame.apply.__doc__
            == "This is a test of the documentation module for DataFrame."
        )
        # Test for pandas doc when method is not defined on the plugin module
        assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
        assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
        assert BasePandasDataset.astype.__doc__ in pd.DataFrame.astype.__doc__
        # Test for override
        assert (
            pd.Series.isna.__doc__
            == "This is a test of the documentation module for Series."
        )
        # Test for pandas doc when method is not defined on the plugin module
        assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__
        assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__
        # Test for override
        assert pd.read_csv.__doc__ == "Test override for functions on the module."
        # Test for pandas doc when function is not defined on module.
        assert pandas.read_table.__doc__ in pd.read_table.__doc__

    def test_not_redefining_classes_modin_issue_7138(self):
        original_dataframe_class = pd.DataFrame

        cfg.DocModule.put("modin.tests.config.docs_module")

        # Test for override
        assert (
            pd.DataFrame.apply.__doc__
            == "This is a test of the documentation module for DataFrame."
        )

        assert pd.DataFrame is original_dataframe_class

    def test_base_docstring_override_with_no_dataframe_or_series_class_issue_7113(
        self,
    ):
        # This test case tests scenario 1 from issue 7113.
        sys.path.append(f"{os.path.dirname(__file__)}")
        cfg.DocModule.put("docs_module_with_just_base")
        assert BasePandasDataset.astype.__doc__ == (
            "This is a test of the documentation module for BasePandasDataSet.astype."
        )


@pytest.mark.skipif(cfg.Engine.get() != "Ray", reason="Ray specific test")
def test_ray_cluster_resources():
    import ray

    cfg.RayInitCustomResources.put({"special_hardware": 1.0})
    # create a dummy df to initialize Ray engine
    _ = pd.DataFrame([1, 2, 3])
    assert ray.cluster_resources()["special_hardware"] == 1.0


@pytest.mark.parametrize(
    "modify_config",
    [{cfg.RangePartitioning: False, cfg.LazyExecution: "Auto"}],
    indirect=True,
)
def test_context_manager_update_config(modify_config):
    # simple case, 1 parameter
    assert cfg.RangePartitioning.get() is False
    with cfg.context(RangePartitioning=True):
        assert cfg.RangePartitioning.get() is True
    assert cfg.RangePartitioning.get() is False

    # nested case, 1 parameter
    assert cfg.RangePartitioning.get() is False
    with cfg.context(RangePartitioning=True):
        assert cfg.RangePartitioning.get() is True
        with cfg.context(RangePartitioning=False):
            assert cfg.RangePartitioning.get() is False
            with cfg.context(RangePartitioning=False):
                assert cfg.RangePartitioning.get() is False
            assert cfg.RangePartitioning.get() is False
        assert cfg.RangePartitioning.get() is True
    assert cfg.RangePartitioning.get() is False

    # simple case, 2 parameters
    assert cfg.RangePartitioning.get() is False
    assert cfg.LazyExecution.get() == "Auto"
    with cfg.context(RangePartitioning=True, LazyExecution="Off"):
        assert cfg.RangePartitioning.get() is True
        assert cfg.LazyExecution.get() == "Off"
    assert cfg.RangePartitioning.get() is False
    assert cfg.LazyExecution.get() == "Auto"

    # nested case, 2 parameters
    assert cfg.RangePartitioning.get() is False
    assert cfg.LazyExecution.get() == "Auto"
    with cfg.context(RangePartitioning=True, LazyExecution="Off"):
        assert cfg.RangePartitioning.get() is True
        assert cfg.LazyExecution.get() == "Off"
        with cfg.context(RangePartitioning=False):
            assert cfg.RangePartitioning.get() is False
            assert cfg.LazyExecution.get() == "Off"
            with cfg.context(LazyExecution="On"):
                assert cfg.RangePartitioning.get() is False
                assert cfg.LazyExecution.get() == "On"
                with cfg.context(RangePartitioning=True, LazyExecution="Off"):
                    assert cfg.RangePartitioning.get() is True
                    assert cfg.LazyExecution.get() == "Off"
                assert cfg.RangePartitioning.get() is False
                assert cfg.LazyExecution.get() == "On"
            assert cfg.RangePartitioning.get() is False
            assert cfg.LazyExecution.get() == "Off"
        assert cfg.RangePartitioning.get() is True
        assert cfg.LazyExecution.get() == "Off"
    assert cfg.RangePartitioning.get() is False
    assert cfg.LazyExecution.get() == "Auto"


class TestBackend:

    @pytest.mark.parametrize(
        "engine, storage_format, expected_backend",
        [
            ("Python", "Pandas", "Python_Test"),
            ("Ray", "Pandas", "Ray"),
            param(
                "Unidist",
                "Pandas",
                "Unidist",
                marks=pytest.mark.skip(reason=UNIDIST_SKIP_REASON),
            ),
            ("Dask", "Pandas", "Dask"),
            ("Native", "Native", "Pandas"),
        ],
    )
    def test_setting_execution_changes_backend(
        self, engine, storage_format, expected_backend
    ):
        previous_backend = cfg.Backend.get()
        with switch_execution(engine, storage_format):
            assert cfg.Backend.get() == expected_backend
        assert cfg.Backend.get() == previous_backend

    def test_subscribing_to_backend_triggers_callback(self):
        backend_subscriber = Mock()
        cfg.Backend.subscribe(backend_subscriber)
        backend_subscriber.assert_called_once_with(cfg.Backend)

    def test_setting_backend_triggers_all_callbacks(self):
        # Start with a known backend (rather than the one that we start the
        # test with).
        with cfg.context(Backend="Pandas"):
            backend_subscriber = Mock()
            cfg.Backend.subscribe(backend_subscriber)
            backend_subscriber.reset_mock()

            storage_format_subscriber = Mock()
            cfg.StorageFormat.subscribe(storage_format_subscriber)
            storage_format_subscriber.reset_mock()

            engine_subscriber = Mock()
            cfg.Engine.subscribe(engine_subscriber)
            engine_subscriber.reset_mock()

            with cfg.context(Backend="Python_Test"):
                backend_subscriber.assert_called_once_with(cfg.Backend)
                storage_format_subscriber.assert_called_once_with(cfg.StorageFormat)
                engine_subscriber.assert_called_once_with(cfg.Engine)

    @pytest.mark.parametrize(
        "backend, expected_engine, expected_storage_format",
        [
            ("Python_test", "Python", "Pandas"),
            ("PYTHON_test", "Python", "Pandas"),
            ("python_TEST", "Python", "Pandas"),
            ("Ray", "Ray", "Pandas"),
            param(
                "Unidist",
                "Unidist",
                "Pandas",
                marks=pytest.mark.skip(reason=UNIDIST_SKIP_REASON),
            ),
            ("Dask", "Dask", "Pandas"),
            ("Pandas", "Native", "Native"),
        ],
    )
    def test_setting_backend_changes_execution(
        self, backend, expected_engine, expected_storage_format
    ):
        previous_engine = cfg.Engine.get()
        previous_storage_format = cfg.StorageFormat.get()
        with cfg.context(Backend=backend):
            assert cfg.Engine.get() == expected_engine
            assert cfg.StorageFormat.get() == expected_storage_format
        assert cfg.Engine.get() == previous_engine
        assert cfg.StorageFormat.get() == previous_storage_format

    def test_setting_engine_alone_changes_backend(self):
        # Start with a known backend (rather than the one that we start the
        # test with).
        with switch_execution(storage_format="Pandas", engine="Ray"):
            current_backend = cfg.Backend.get()
            assert current_backend == "Ray"
            with cfg.context(Engine="Python"):
                assert cfg.Backend.get() == "Python_Test"
            assert cfg.Backend.get() == current_backend

    def test_setting_engine_triggers_callbacks(self):
        # Start with a known backend (rather than the one that we start the
        # test with).
        with switch_execution(storage_format="Pandas", engine="Ray"):
            engine_subscriber = Mock()
            cfg.Engine.subscribe(engine_subscriber)
            engine_subscriber.reset_mock()

            backend_subscriber = Mock()
            cfg.Backend.subscribe(backend_subscriber)
            backend_subscriber.reset_mock()

            storage_format_subscriber = Mock()
            cfg.StorageFormat.subscribe(storage_format_subscriber)
            storage_format_subscriber.reset_mock()

            with cfg.context(Engine="Dask"):
                engine_subscriber.assert_called_once_with(cfg.Engine)
                backend_subscriber.assert_called_once_with(cfg.Backend)
                # StorageFormat stayed the same, so we don't call its callback.
                storage_format_subscriber.assert_not_called()

    def test_setting_storage_format_triggers_callbacks(self):
        # There's only one built-in storage format, pandas, so we add a new one
        # here.
        cfg.StorageFormat.add_option("Pandasduplicate")
        from modin.core.execution.dispatching.factories import factories

        factories.PandasduplicateOnRayFactory = factories.PandasOnRayFactory
        cfg.Backend.register_backend(
            "NewBackend",
            cfg.Execution(
                storage_format="Pandasduplicate",
                engine="Ray",
            ),
        )

        with switch_execution(storage_format="Pandas", engine="Ray"):
            engine_subscriber = Mock()
            cfg.Engine.subscribe(engine_subscriber)
            engine_subscriber.reset_mock()
            backend_subscriber = Mock()
            cfg.Backend.subscribe(backend_subscriber)
            backend_subscriber.reset_mock()
            storage_format_subscriber = Mock()
            cfg.StorageFormat.subscribe(storage_format_subscriber)
            storage_format_subscriber.reset_mock()
            with cfg.context(StorageFormat="PANDASDUPLICATE"):
                storage_format_subscriber.assert_called_once_with(cfg.StorageFormat)
                backend_subscriber.assert_called_once_with(cfg.Backend)
                # Engine stayed the same, so we don't call its callback.
                engine_subscriber.assert_not_called()

    @pytest.mark.parametrize("name", ["Python_Test", "python_Test"])
    def test_register_existing_backend(self, name):
        with pytest.raises(
            ValueError,
            match=re.escape(
                "Backend 'Python_Test' is already registered with the execution "
                + "Execution(storage_format='Pandas', engine='Python')"
            ),
        ):
            cfg.Backend.register_backend(
                name,
                cfg.Execution(
                    storage_format="Pandas",
                    engine="Python",
                ),
            )

    def test_register_existing_execution(self):
        with pytest.raises(
            ValueError,
            match=re.escape(
                "Execution(storage_format='Pandas', engine='Python') is already registered with the backend Python_Test."
            ),
        ):
            cfg.Backend.register_backend(
                "NewBackend2",
                cfg.Execution(
                    storage_format="Pandas",
                    engine="Python",
                ),
            )

    def test_set_invalid_backend(self):
        with pytest.raises(ValueError, match=re.escape("Unknown backend 'Unknown'")):
            cfg.Backend.put("Unknown")

    def test_switch_to_unregistered_backend_with_switch_execution(self):
        cfg.StorageFormat.add_option("Pandas2")
        from modin.core.execution.dispatching.factories import factories

        factories.Pandas2OnRayFactory = factories.PandasOnRayFactory
        with pytest.raises(
            ValueError,
            match=re.escape(
                "Execution(storage_format='Pandas2', engine='Ray') "
                + "has no known backend. Please register a backend for it with "
                + "Backend.register_backend()"
            ),
        ), switch_execution(engine="Ray", storage_format="Pandas2"):
            pass

    def test_switch_to_unregistered_backend_with_switch_storage_format(self):
        cfg.StorageFormat.add_option("Pandas3")
        from modin.core.execution.dispatching.factories import factories

        factories.Pandas2OnRayFactory = factories.PandasOnPythonFactory
        with cfg.context(StorageFormat="Pandas", Engine="Python"):
            with pytest.raises(
                ValueError,
                match=re.escape(
                    "Execution(storage_format='Pandas3', engine='Python') "
                    + "has no known backend. Please register a backend for it with "
                    + "Backend.register_backend()"
                ),
            ):
                cfg.StorageFormat.put("Pandas3")

    def test_switch_to_unregistered_backend_with_switch_engine(self):
        cfg.Engine.add_option("Python2")
        from modin.core.execution.dispatching.factories import factories

        factories.PandasOnPython2Factory = factories.PandasOnPythonFactory
        with cfg.context(StorageFormat="Pandas", Engine="Python"):
            with pytest.raises(
                ValueError,
                match=re.escape(
                    "Execution(storage_format='Pandas', engine='Python2') "
                    + "has no known backend. Please register a backend for it with "
                    + "Backend.register_backend()"
                ),
            ):
                cfg.Engine.put("Python2")

    # The default engine and storage format, and hence the default backend,
    # will depend on which engines are available in the current environment.
    # For simplicity, patch the defaults.
    @patch(
        target="modin.config.StorageFormat._get_default",
    )
    @patch(
        target="modin.config.Engine._get_default",
    )
    def test_backend_default(
        self,
        mocked_get_default,
        mocked_get_default2,
    ):
        mocked_get_default.return_value = "Native"
        mocked_get_default2.return_value = "Native"
        assert cfg.Backend._get_default() == "Pandas"

    def test_add_backend_option(self):
        with pytest.raises(
            ValueError,
            match=re.escape(
                "Cannot add an option to Backend directly. Use Backend.register_backend instead."
            ),
        ):
            cfg.Backend.add_option("NewBackend")

    @pytest.mark.parametrize(
        "order_to_get_in",
        itertools.permutations(
            [
                cfg.Backend,
                cfg.Engine,
                cfg.StorageFormat,
            ]
        ),
        ids=lambda permutation: "_".join(x.__name__ for x in permutation),
    )
    @pytest.mark.parametrize(
        "storage_environment_variable, engine_environment_variable, variable_to_expected_value",
        [
            (
                "Native",
                "Native",
                {
                    cfg.Backend: "Pandas",
                    cfg.Engine: "Native",
                    cfg.StorageFormat: "Native",
                },
            ),
            (
                "NATIVE",
                "NATIVE",
                {
                    cfg.Backend: "Pandas",
                    cfg.Engine: "Native",
                    cfg.StorageFormat: "Native",
                },
            ),
            (
                "Pandas",
                "Dask",
                {
                    cfg.Backend: "Dask",
                    cfg.Engine: "Dask",
                    cfg.StorageFormat: "Pandas",
                },
            ),
        ],
    )
    def test_storage_format_and_engine_come_from_environment(
        self,
        monkeypatch,
        clear_backend_execution_and_storage_format,
        order_to_get_in,
        storage_environment_variable,
        engine_environment_variable,
        variable_to_expected_value,
    ):
        with mock.patch.dict(
            os.environ,
            {
                cfg.StorageFormat.varname: storage_environment_variable,
                cfg.Engine.varname: engine_environment_variable,
            },
        ):
            for variable in order_to_get_in:
                expected_value = variable_to_expected_value[variable]
                assert (
                    variable.get() == expected_value
                ), f"{variable.__name__} was {variable.get()} instead of {expected_value}"

    @pytest.mark.parametrize(
        "order_to_get_in",
        itertools.permutations(
            [
                cfg.Backend,
                cfg.Engine,
                cfg.StorageFormat,
            ]
        ),
        ids=lambda permutation: "_".join(x.__name__ for x in permutation),
    )
    @pytest.mark.parametrize(
        "engine_environment_variable, variable_to_expected_value",
        [
            (
                "Dask",
                {cfg.Backend: "Dask", cfg.StorageFormat: "Pandas", cfg.Engine: "Dask"},
            ),
            (
                "DASK",
                {cfg.Backend: "Dask", cfg.StorageFormat: "Pandas", cfg.Engine: "Dask"},
            ),
            (
                "python",
                {
                    cfg.Backend: "Python_Test",
                    cfg.StorageFormat: "Pandas",
                    cfg.Engine: "Python",
                },
            ),
            (
                "ray",
                {cfg.Backend: "Ray", cfg.StorageFormat: "Pandas", cfg.Engine: "Ray"},
            ),
            # note that we can't test Native here because it's not valid to use
            # "Native" engine with the default storage format of "Pandas."
        ],
    )
    def test_only_engine_comes_from_environment(
        self,
        clear_backend_execution_and_storage_format,
        order_to_get_in,
        engine_environment_variable,
        variable_to_expected_value,
    ):
        with mock.patch.dict(
            os.environ,
            {cfg.Engine.varname: engine_environment_variable},
        ):
            for var in order_to_get_in:
                expected_value = variable_to_expected_value[var]
                assert (
                    var.get() == expected_value
                ), f"{var.__name__} was {var.get()} instead of {expected_value}"

    @pytest.mark.parametrize(
        "order_to_get_in",
        itertools.permutations(
            [
                cfg.Backend,
                cfg.Engine,
                cfg.StorageFormat,
            ]
        ),
        ids=lambda permutation: "_".join(x.__name__ for x in permutation),
    )
    def test_only_storage_format_comes_from_environment(
        self,
        clear_backend_execution_and_storage_format,
        order_to_get_in,
        add_pandas_duplicate_on_ray_execution,
    ):
        # To test switching StorageFormat alone, we have to add a new backend
        # that works with the default "Pandas" execution.
        with mock.patch.dict(
            os.environ,
            {
                cfg.StorageFormat.varname: "Test_Pandasduplicate",
            },
        ):
            cfg.Engine.put("Ray")
            for variable in order_to_get_in:
                expected_value = {
                    cfg.Backend: "Test_Backend_1",
                    cfg.Engine: "Ray",
                    cfg.StorageFormat: "Test_Pandasduplicate",
                }[variable]
                assert (
                    variable.get() == expected_value
                ), f"{variable.__name__} was {variable.get()} instead of {expected_value}"

    @pytest.mark.parametrize(
        "order_to_get_in",
        itertools.permutations(
            [
                cfg.Backend,
                cfg.Engine,
                cfg.StorageFormat,
            ]
        ),
        ids=lambda permutation: "_".join(x.__name__ for x in permutation),
    )
    @pytest.mark.parametrize(
        "backend_environment_variable, variable_to_expected_value",
        [
            (
                "Pandas",
                {
                    cfg.Backend: "Pandas",
                    cfg.Engine: "Native",
                    cfg.StorageFormat: "Native",
                },
            ),
            (
                "Ray",
                {cfg.Backend: "Ray", cfg.Engine: "Ray", cfg.StorageFormat: "Pandas"},
            ),
            (
                "Dask",
                {cfg.Backend: "Dask", cfg.Engine: "Dask", cfg.StorageFormat: "Pandas"},
            ),
            (
                "python_test",
                {
                    cfg.Backend: "Python_Test",
                    cfg.Engine: "Python",
                    cfg.StorageFormat: "Pandas",
                },
            ),
        ],
    )
    def test_backend_comes_from_environment(
        self,
        monkeypatch,
        clear_backend_execution_and_storage_format,
        order_to_get_in,
        backend_environment_variable,
        variable_to_expected_value,
    ):
        with mock.patch.dict(
            os.environ,
            {
                cfg.Backend.varname: backend_environment_variable,
            },
        ):
            for variable in order_to_get_in:
                expected_value = variable_to_expected_value[variable]
                assert (
                    variable.get() == expected_value
                ), f"{variable.__name__} was {variable.get()} instead of {expected_value}"

    @pytest.mark.parametrize(
        "order_to_get_in",
        itertools.permutations(
            [cfg.Backend, cfg.Engine, cfg.StorageFormat],
        ),
        ids=lambda permutation: "_".join(x.__name__ for x in permutation),
    )
    def test_environment_not_set_and_pick_up_default_engine(
        self, clear_backend_execution_and_storage_format, order_to_get_in
    ):
        for variable in order_to_get_in:
            assert variable.get() == variable._get_default()

    @pytest.mark.parametrize(
        "execution_variable, value",
        [(cfg.Engine, "Python"), (cfg.StorageFormat, "Pandas")],
    )
    @pytest.mark.parametrize(
        "variable_to_get",
        [cfg.Backend, cfg.Engine, cfg.StorageFormat],
    )
    def test_conflicting_execution_and_backend_in_environment(
        self,
        monkeypatch,
        clear_backend_execution_and_storage_format,
        execution_variable,
        value,
        variable_to_get,
    ):
        monkeypatch.setitem(os.environ, cfg.Backend.varname, "Ray")
        monkeypatch.setitem(os.environ, execution_variable.varname, value)
        with pytest.raises(
            ValueError,
            match=re.escape("Can't specify both execution and backend in environment"),
        ):
            variable_to_get.get()

    def test_get_execution_for_unknown_backend(self):
        backend_choice_string = ", ".join(
            f"'{choice}'" for choice in cfg.Backend.choices
        )
        with pytest.raises(
            ValueError,
            match=re.escape(
                f"Unknown backend 'Unknown'. Available backends are: {backend_choice_string}"
            ),
        ):
            cfg.Backend.get_execution_for_backend("Unknown")


@pytest.mark.parametrize(
    "config_name",
    [
        "NPartitions",
        "CpuCount",
        "LogMemoryInterval",
        "LogFileSize",
        "MinRowPartitionSize",
        "MinColumnPartitionSize",
    ],
)
def test_wrong_values(config_name):
    config: cfg.EnvironmentVariable = getattr(cfg, config_name)
    new_value = -1
    with pytest.raises(ValueError):
        with cfg.context(**{config_name: new_value}):
            _ = config.get()


================================================
FILE: modin/tests/config/test_parameter.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from collections import defaultdict

import pytest

from modin.config import Parameter
from modin.config.pubsub import _TYPE_PARAMS


def make_prefilled(vartype, varinit):
    class Prefilled(Parameter, type=vartype):
        @classmethod
        def _get_value_from_config(cls):
            if not _TYPE_PARAMS[cls.type].verify(varinit):
                raise ValueError(f"Unsupported raw value: {varinit}")
            return _TYPE_PARAMS[cls.type].decode(varinit)

    return Prefilled


@pytest.fixture
def prefilled_parameter():
    return make_prefilled(str, "init")


def test_equals(prefilled_parameter):
    assert prefilled_parameter.get() == "Init"

    prefilled_parameter.put("value2")
    assert prefilled_parameter.get() == "Value2"


def test_triggers(prefilled_parameter):
    results = defaultdict(int)
    callbacks = []

    def make_callback(name, res=results):
        def callback(p: Parameter):
            res[name] += 1

        # keep reference to callbacks so they won't be removed by GC
        callbacks.append(callback)
        return callback

    prefilled_parameter.once("init", make_callback("init"))
    assert results["init"] == 1

    prefilled_parameter.once("never", make_callback("never"))
    prefilled_parameter.once("once", make_callback("once"))
    prefilled_parameter.subscribe(make_callback("subscribe"))

    prefilled_parameter.put("multi")
    prefilled_parameter.put("once")
    prefilled_parameter.put("multi")
    prefilled_parameter.put("once")

    expected = [("init", 1), ("never", 0), ("once", 1), ("subscribe", 5)]
    for name, val in expected:
        assert results[name] == val, "{} has wrong count".format(name)


@pytest.mark.parametrize(
    "parameter,good,bad",
    [
        (make_prefilled(bool, "false"), {"1": True, False: False}, ["nope", 2]),
        (make_prefilled(int, "10"), {" 15\t": 15, 25: 25}, ["-10", 1.0, "foo"]),
        (
            make_prefilled(dict, "key = value"),
            {
                "KEY1 = VALUE1, KEY2=VALUE2=VALUE3,KEY3=0": {
                    "KEY1": "VALUE1",
                    "KEY2": "VALUE2=VALUE3",
                    "KEY3": 0,
                },
                "KEY=1": {"KEY": 1},
            },
            ["key1=some,string", "key1=value1,key2=", "random string"],
        ),
    ],
)
def test_validation(parameter, good, bad):
    for inval, outval in good.items():
        parameter.put(inval)
        assert parameter.get() == outval
    for inval in bad:
        with pytest.raises(ValueError):
            parameter.put(inval)


@pytest.mark.parametrize("vartype", [bool, int, dict])
def test_init_validation(vartype):
    parameter = make_prefilled(vartype, "bad value")
    with pytest.raises(ValueError):
        parameter.get()


================================================
FILE: modin/tests/core/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/core/storage_formats/base/test_internals.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions
from modin.tests.pandas.utils import create_test_dfs, df_equals, test_data_values

NPartitions.put(4)


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("item_length", [0, 1, 2])
@pytest.mark.parametrize("loc", ["first", "first + 1", "middle", "penult", "last"])
@pytest.mark.parametrize("replace", [True, False])
def test_insert_item(axis, item_length, loc, replace):
    data = test_data_values[0]

    def post_fn(df):
        return (
            (df.iloc[:, :-item_length], df.iloc[:, -item_length:])
            if axis
            else (df.iloc[:-item_length, :], df.iloc[-item_length:, :])
        )

    def get_loc(frame, loc):
        locs_dict = {
            "first": 0,
            "first + 1": 1,
            "middle": len(frame.axes[axis]) // 2,
            "penult": len(frame.axes[axis]) - 1,
            "last": len(frame.axes[axis]),
        }
        return locs_dict[loc]

    def get_reference(df, value, loc):
        if axis == 0:
            first_mask = df.iloc[:loc]
            if replace:
                loc += 1
            second_mask = df.iloc[loc:]
        else:
            first_mask = df.iloc[:, :loc]
            if replace:
                loc += 1
            second_mask = df.iloc[:, loc:]
        return pandas.concat([first_mask, value, second_mask], axis=axis)

    md_frames, pd_frames = create_test_dfs(data, post_fn=post_fn)
    md_item1, md_item2 = md_frames
    pd_item1, pd_item2 = pd_frames

    index_loc = get_loc(pd_item1, loc)

    pd_res = get_reference(pd_item1, loc=index_loc, value=pd_item2)
    md_res = md_item1._query_compiler.insert_item(
        axis=axis, loc=index_loc, value=md_item2._query_compiler, replace=replace
    ).to_pandas()
    df_equals(
        md_res,
        pd_res,
        # This test causes an empty slice to be generated thus triggering:
        # https://github.com/modin-project/modin/issues/5974
        check_dtypes=axis != 0,
    )

    index_loc = get_loc(pd_item2, loc)

    pd_res = get_reference(pd_item2, loc=index_loc, value=pd_item1)
    md_res = md_item2._query_compiler.insert_item(
        axis=axis, loc=index_loc, value=md_item1._query_compiler, replace=replace
    ).to_pandas()

    df_equals(
        md_res,
        pd_res,
        # This test causes an empty slice to be generated thus triggering:
        # https://github.com/modin-project/modin/issues/5974
        check_dtypes=axis != 0,
    )


@pytest.mark.parametrize("num_rows", list(range(1, 5)), ids=lambda x: f"num_rows={x}")
@pytest.mark.parametrize("num_cols", list(range(1, 5)), ids=lambda x: f"num_cols={x}")
def test_repr_size_issue_6104(num_rows, num_cols):
    # this tests an edge case where we used to select exactly num_cols / 2 + 1 columns
    # from both the front and the back of the dataframe, but the dataframe is such a
    # length that the front and back columns overlap at one column. The result is that
    # we convert one column twice to pandas, although we would never see the duplicate
    # column in the output because pandas would also only represent the num_cols / 2
    # columns from the front and back.
    df = pd.DataFrame([list(range(4)) for _ in range(4)])
    pandas_repr_df = df._build_repr_df(num_rows, num_cols)
    assert pandas_repr_df.columns.is_unique
    assert pandas_repr_df.index.is_unique


================================================
FILE: modin/tests/core/storage_formats/cudf/test_gpu_managers.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/core/storage_formats/cudf/test_internals.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/core/storage_formats/pandas/test_internals.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import functools
import sys
import unittest.mock as mock

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import (
    CpuCount,
    Engine,
    MinColumnPartitionSize,
    MinRowPartitionSize,
    NPartitions,
    RangePartitioning,
    context,
)
from modin.core.dataframe.algebra import Fold
from modin.core.dataframe.algebra.default2pandas import DataFrameDefault
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
from modin.core.dataframe.pandas.dataframe.utils import ColumnInfo, ShuffleSortFunctions
from modin.core.dataframe.pandas.metadata import (
    DtypesDescriptor,
    LazyProxyCategoricalDtype,
    ModinDtypes,
)
from modin.core.execution.utils import remote_function
from modin.core.storage_formats import PandasQueryCompiler
from modin.core.storage_formats.pandas.utils import split_result_of_axis_func_pandas
from modin.distributed.dataframe.pandas import from_partitions
from modin.tests.pandas.utils import (
    create_test_dfs,
    df_equals,
    eval_general,
    test_data_values,
)
from modin.utils import try_cast_to_pandas

NPartitions.put(4)

if Engine.get() == "Ray":
    import ray

    from modin.core.execution.ray.common import RayWrapper
    from modin.core.execution.ray.common.deferred_execution import MetaList
    from modin.core.execution.ray.implementations.pandas_on_ray.partitioning import (
        PandasOnRayDataframeColumnPartition,
        PandasOnRayDataframePartition,
        PandasOnRayDataframeRowPartition,
    )

    block_partition_class = PandasOnRayDataframePartition
    virtual_column_partition_class = PandasOnRayDataframeColumnPartition
    virtual_row_partition_class = PandasOnRayDataframeRowPartition
    put = RayWrapper.put
    deploy = RayWrapper.deploy
    materialize = RayWrapper.materialize
elif Engine.get() == "Dask":
    from modin.core.execution.dask.common import DaskWrapper
    from modin.core.execution.dask.implementations.pandas_on_dask.partitioning import (
        PandasOnDaskDataframeColumnPartition,
        PandasOnDaskDataframePartition,
        PandasOnDaskDataframeRowPartition,
    )

    # initialize modin dataframe to initialize dask
    pd.DataFrame()

    def put(x):
        return DaskWrapper.put(x, hash=False)

    block_partition_class = PandasOnDaskDataframePartition
    virtual_column_partition_class = PandasOnDaskDataframeColumnPartition
    virtual_row_partition_class = PandasOnDaskDataframeRowPartition
    deploy = DaskWrapper.deploy
    materialize = DaskWrapper.materialize
elif Engine.get() == "Unidist":
    from modin.core.execution.unidist.common import UnidistWrapper
    from modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning import (
        PandasOnUnidistDataframeColumnPartition,
        PandasOnUnidistDataframePartition,
        PandasOnUnidistDataframeRowPartition,
    )

    block_partition_class = PandasOnUnidistDataframePartition
    virtual_column_partition_class = PandasOnUnidistDataframeColumnPartition
    virtual_row_partition_class = PandasOnUnidistDataframeRowPartition
    put = UnidistWrapper.put
elif Engine.get() == "Python":
    from modin.core.execution.python.common import PythonWrapper
    from modin.core.execution.python.implementations.pandas_on_python.partitioning import (
        PandasOnPythonDataframeColumnPartition,
        PandasOnPythonDataframePartition,
        PandasOnPythonDataframeRowPartition,
    )

    def put(x):
        return PythonWrapper.put(x, hash=False)

    def deploy(func, args=tuple()):
        return func(*args)

    def materialize(arg):
        return arg

    block_partition_class = PandasOnPythonDataframePartition
    virtual_column_partition_class = PandasOnPythonDataframeColumnPartition
    virtual_row_partition_class = PandasOnPythonDataframeRowPartition
else:
    raise NotImplementedError(
        f"These test suites are not implemented for the '{Engine.get()}' engine"
    )


def construct_modin_df_by_scheme(pandas_df, partitioning_scheme):
    """
    Build ``modin.pandas.DataFrame`` from ``pandas.DataFrame`` according the `partitioning_scheme`.

    Parameters
    ----------
    pandas_df : pandas.DataFrame
    partitioning_scheme : dict[{"row_lengths", "column_widths"}] -> list of ints

    Returns
    -------
    modin.pandas.DataFrame
    """
    index = pandas_df.index
    columns = pandas_df.columns
    row_lengths = partitioning_scheme["row_lengths"]
    column_widths = partitioning_scheme["column_widths"]
    new_length = sum(row_lengths)
    new_width = sum(column_widths)
    new_index = index if len(index) == new_length else index[:new_length]
    new_columns = columns if len(columns) == new_width else columns[:new_width]

    row_partitions = split_result_of_axis_func_pandas(
        axis=0,
        num_splits=len(row_lengths),
        result=pandas_df,
        min_block_size=MinRowPartitionSize.get(),
        length_list=row_lengths,
    )
    partitions = [
        split_result_of_axis_func_pandas(
            axis=1,
            num_splits=len(column_widths),
            result=row_part,
            min_block_size=MinColumnPartitionSize.get(),
            length_list=column_widths,
        )
        for row_part in row_partitions
    ]

    md_df = from_partitions(
        [[put(part) for part in row_parts] for row_parts in partitions],
        axis=None,
        index=new_index,
        columns=new_columns,
        row_lengths=row_lengths,
        column_widths=column_widths,
    )
    return md_df


def validate_partitions_cache(df, axis=None):
    """
    Assert that the ``PandasDataframe`` shape caches correspond to the actual partition's shapes.

    Parameters
    ----------
    df : PandasDataframe
    axis : int, optional
        An axis to verify the cache for. If not specified, verify cache for both of the axes.
    """
    axis = [0, 1] if axis is None else [axis]

    axis_lengths = [df._row_lengths_cache, df._column_widths_cache]

    for ax in axis:
        assert axis_lengths[ax] is not None
        assert df._partitions.shape[ax] == len(axis_lengths[ax])

    for i in range(df._partitions.shape[0]):
        for j in range(df._partitions.shape[1]):
            if 0 in axis:
                assert df._partitions[i, j].length() == axis_lengths[0][i]
            if 1 in axis:
                assert df._partitions[i, j].width() == axis_lengths[1][j]


def assert_has_no_cache(df, axis=0):
    """
    Assert that the passed dataframe has no labels and no lengths cache along the specified axis.

    Parameters
    ----------
    df : modin.pandas.DataFrame
    axis : int, default: 0
    """
    mf = df._query_compiler._modin_frame
    if axis == 0:
        assert not mf.has_materialized_index and mf._row_lengths_cache is None
    else:
        assert not mf.has_materialized_columns and mf._column_widths_cache is None


def remove_axis_cache(df, axis=0, remove_lengths=True):
    """
    Remove index/columns cache for the passed dataframe.

    Parameters
    ----------
    df : modin.pandas.DataFrame
    axis : int, default: 0
        0 - remove index cache, 1 - remove columns cache.
    remove_lengths : bool, default: True
        Whether to remove row lengths/column widths cache.
    """
    mf = df._query_compiler._modin_frame
    if axis == 0:
        mf.set_index_cache(None)
        if remove_lengths:
            mf._row_lengths_cache = None
    else:
        mf.set_columns_cache(None)
        if remove_lengths:
            mf._column_widths_cache = None


def test_aligning_blocks():
    # Test problem when modin frames have the same number of rows, but different
    # blocks (partition.list_of_blocks). See #2322 for details
    accm = pd.DataFrame(["-22\n"] * 162)
    accm = accm.iloc[2:, :]
    accm.reset_index(drop=True, inplace=True)
    accm["T"] = pd.Series(["24.67\n"] * 145)

    # see #2322 for details
    try_cast_to_pandas(accm)  # force materialization


def test_aligning_blocks_with_duplicated_index():
    # Same problem as in `test_aligning_blocks` but with duplicated values in index.
    data11 = [0, 1]
    data12 = [2, 3]

    data21 = [0]
    data22 = [1, 2, 3]

    df1 = pd.concat((pd.DataFrame(data11), pd.DataFrame(data12)))
    df2 = pd.concat((pd.DataFrame(data21), pd.DataFrame(data22)))

    try_cast_to_pandas(df1 - df2)  # force materialization


def test_aligning_partitions():
    data = [0, 1, 2, 3, 4, 5]
    modin_df1, _ = create_test_dfs({"a": data, "b": data})
    modin_df = modin_df1.loc[:2]

    modin_df2 = pd.concat((modin_df, modin_df))

    modin_df2["c"] = modin_df1["b"]
    try_cast_to_pandas(modin_df2)  # force materialization


@pytest.mark.parametrize("row_labels", [None, [("a", "")], ["a"]])
@pytest.mark.parametrize("col_labels", [None, ["a1"], [("c1", "z")]])
def test_take_2d_labels_or_positional(row_labels, col_labels):
    kwargs = {
        "index": [["a", "b", "c", "d"], ["", "", "x", "y"]],
        "columns": [["a1", "b1", "c1", "d1"], ["", "", "z", "x"]],
    }
    md_df, pd_df = create_test_dfs(np.random.rand(4, 4), **kwargs)

    _row_labels = slice(None) if row_labels is None else row_labels
    _col_labels = slice(None) if col_labels is None else col_labels
    pd_df = pd_df.loc[_row_labels, _col_labels]
    modin_frame = md_df._query_compiler._modin_frame
    new_modin_frame = modin_frame.take_2d_labels_or_positional(
        row_labels=row_labels, col_labels=col_labels
    )
    md_df._query_compiler._modin_frame = new_modin_frame

    df_equals(md_df, pd_df)


@pytest.mark.parametrize("has_partitions_shape_cache", [True, False])
@pytest.mark.parametrize("has_frame_shape_cache", [True, False])
def test_apply_func_to_both_axis(has_partitions_shape_cache, has_frame_shape_cache):
    """
    Test ``modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.apply_select_indices`` functionality of broadcasting non-distributed items.
    """
    data = test_data_values[0]

    md_df, pd_df = create_test_dfs(data)
    values = pd_df.values + 1

    pd_df.iloc[:, :] = values

    modin_frame = md_df._query_compiler._modin_frame

    if has_frame_shape_cache:
        # Explicitly compute rows & columns shapes to store this info in frame's cache
        modin_frame.row_lengths
        modin_frame.column_widths
    else:
        # Explicitly reset frame's cache
        modin_frame._row_lengths_cache = None
        modin_frame._column_widths_cache = None

    for row in modin_frame._partitions:
        for part in row:
            if has_partitions_shape_cache:
                # Explicitly compute partition shape to store this info in its cache
                part.length()
                part.width()
            else:
                # Explicitly reset partition's shape cache
                part._length_cache = None
                part._width_cache = None

    def func_to_apply(partition, row_internal_indices, col_internal_indices, item):
        partition.iloc[row_internal_indices, col_internal_indices] = item
        return partition

    new_modin_frame = modin_frame.apply_select_indices(
        axis=None,
        func=func_to_apply,
        # Passing none-slices does not trigger shapes recomputation and so the cache is untouched.
        row_labels=slice(None),
        col_labels=slice(None),
        keep_remaining=True,
        new_index=pd_df.index,
        new_columns=pd_df.columns,
        item_to_distribute=values,
    )
    md_df._query_compiler._modin_frame = new_modin_frame

    df_equals(md_df, pd_df)


@pytest.mark.parametrize(
    "test_type",
    [
        "many_small_dfs",
        "concatted_df_with_small_dfs",
        "large_df_plus_small_dfs",
    ],
)
@pytest.mark.parametrize(
    "set_num_partitions",
    [1, 4],
    indirect=True,
)
def test_rebalance_partitions(test_type, set_num_partitions):
    num_partitions = NPartitions.get()
    if test_type == "many_small_dfs":
        small_dfs = [
            pd.DataFrame(
                [[i + j for j in range(0, 1000)]],
                columns=[f"col{j}" for j in range(0, 1000)],
                index=pd.Index([i]),
            )
            for i in range(1, 100001, 1000)
        ]
        large_df = pd.concat(small_dfs)
        col_length = 100
    elif test_type == "concatted_df_with_small_dfs":
        small_dfs = [
            pd.DataFrame(
                [[i + j for j in range(0, 1000)]],
                columns=[f"col{j}" for j in range(0, 1000)],
                index=pd.Index([i]),
            )
            for i in range(1, 100001, 1000)
        ]
        large_df = pd.concat([pd.concat(small_dfs)] + small_dfs[:3])
        col_length = 103
    else:
        large_df = pd.DataFrame(
            [[i + j for j in range(1, 1000)] for i in range(0, 100000, 1000)],
            columns=[f"col{j}" for j in range(1, 1000)],
            index=pd.Index(list(range(0, 100000, 1000))),
        )
        small_dfs = [
            pd.DataFrame(
                [[i + j for j in range(0, 1000)]],
                columns=[f"col{j}" for j in range(0, 1000)],
                index=pd.Index([i]),
            )
            for i in range(1, 4001, 1000)
        ]
        large_df = pd.concat([large_df] + small_dfs[:3])
        col_length = 103
    large_modin_frame = large_df._query_compiler._modin_frame
    assert large_modin_frame._partitions.shape == (
        num_partitions,
        num_partitions,
    ), "Partitions were not rebalanced after concat."
    assert all(
        isinstance(ptn, large_modin_frame._partition_mgr_cls._column_partitions_class)
        for ptn in large_modin_frame._partitions.flatten()
    )
    # The following check tests that we can correctly form full-axis virtual partitions
    # over the orthogonal axis from non-full-axis virtual partitions.

    def col_apply_func(col):
        assert len(col) == col_length, "Partial axis partition detected."
        return col + 1

    large_apply_result = large_df.apply(col_apply_func)
    large_apply_result_frame = large_apply_result._query_compiler._modin_frame
    assert large_apply_result_frame._partitions.shape == (
        num_partitions,
        num_partitions,
    ), "Partitions list shape is incorrect."
    assert all(
        isinstance(ptn, large_apply_result_frame._partition_mgr_cls._partition_class)
        for ptn in large_apply_result_frame._partitions.flatten()
    ), "Partitions are not block partitioned after column-wise apply."
    large_df = pd.DataFrame(
        query_compiler=large_df._query_compiler.__constructor__(large_modin_frame)
    )
    # The following check tests that we can correctly form full-axis virtual partitions
    # over the same axis from non-full-axis virtual partitions.

    def row_apply_func(row):
        assert len(row) == 1000, "Partial axis partition detected."
        return row + 1

    large_apply_result = large_df.apply(row_apply_func, axis=1)
    large_apply_result_frame = large_apply_result._query_compiler._modin_frame
    assert large_apply_result_frame._partitions.shape == (
        num_partitions,
        num_partitions,
    ), "Partitions list shape is incorrect."
    assert all(
        isinstance(ptn, large_apply_result_frame._partition_mgr_cls._partition_class)
        for ptn in large_apply_result_frame._partitions.flatten()
    ), "Partitions are not block partitioned after row-wise apply."

    large_apply_result = large_df.applymap(lambda x: x)
    large_apply_result_frame = large_apply_result._query_compiler._modin_frame
    assert large_apply_result_frame._partitions.shape == (
        num_partitions,
        num_partitions,
    ), "Partitions list shape is incorrect."
    assert all(
        isinstance(ptn, large_apply_result_frame._partition_mgr_cls._partition_class)
        for ptn in large_apply_result_frame._partitions.flatten()
    ), "Partitions are not block partitioned after element-wise apply."


@pytest.mark.parametrize(
    "axis,virtual_partition_class",
    ((0, virtual_column_partition_class), (1, virtual_row_partition_class)),
    ids=["partitions_spanning_all_columns", "partitions_spanning_all_rows"],
)
class TestDrainVirtualPartitionCallQueue:
    """Test draining virtual partition call queues.

    Test creating a virtual partition made of block partitions and/or one or
    more layers of virtual partitions, draining the top-level partition's
    call queue, and getting the result.

    In all these test cases, the full_axis argument doesn't matter for
    correctness because it only affects `apply`, which is not used here.
    Still, virtual partition users are not supposed to create full-axis
    virtual partitions out of other full-axis virtual partitions, so
    set full_axis to False everywhere.
    """

    def test_from_virtual_partitions_with_call_queues(
        self,
        axis,
        virtual_partition_class,
    ):
        # reverse the dataframe along the virtual partition axis.
        def reverse(df):
            return df.iloc[::-1, :] if axis == 0 else df.iloc[:, ::-1]

        level_zero_blocks_first = [
            block_partition_class(put(pandas.DataFrame([0]))),
            block_partition_class(put(pandas.DataFrame([1]))),
        ]
        level_one_virtual_first = virtual_partition_class(
            level_zero_blocks_first, full_axis=False
        )
        level_one_virtual_first = level_one_virtual_first.add_to_apply_calls(reverse)
        level_zero_blocks_second = [
            block_partition_class(put(pandas.DataFrame([2]))),
            block_partition_class(put(pandas.DataFrame([3]))),
        ]
        level_one_virtual_second = virtual_partition_class(
            level_zero_blocks_second, full_axis=False
        )
        level_one_virtual_second = level_one_virtual_second.add_to_apply_calls(reverse)
        level_two_virtual = virtual_partition_class(
            [level_one_virtual_first, level_one_virtual_second], full_axis=False
        )
        level_two_virtual.drain_call_queue()
        if axis == 0:
            expected_df = pandas.DataFrame([1, 0, 3, 2], index=[0, 0, 0, 0])
        else:
            expected_df = pandas.DataFrame([[1, 0, 3, 2]], columns=[0, 0, 0, 0])
        df_equals(
            level_two_virtual.to_pandas(),
            expected_df,
        )

    def test_from_block_and_virtual_partition_with_call_queues(
        self, axis, virtual_partition_class
    ):
        # make a function that reverses the dataframe along the virtual
        # partition axis.
        # for testing axis == 0, start with two 2-rows-by-1-column blocks. for
        # axis == 1, start with two 1-rows-by-2-column blocks.
        def reverse(df):
            return df.iloc[::-1, :] if axis == 0 else df.iloc[:, ::-1]

        block_data = [[0, 1], [2, 3]] if axis == 0 else [[[0, 1]], [[2, 3]]]
        level_zero_blocks = [
            block_partition_class(put(pandas.DataFrame(block_data[0]))),
            block_partition_class(put(pandas.DataFrame(block_data[1]))),
        ]
        level_zero_blocks[0] = level_zero_blocks[0].add_to_apply_calls(reverse)
        level_one_virtual = virtual_partition_class(
            level_zero_blocks[1], full_axis=False
        )
        level_one_virtual = level_one_virtual.add_to_apply_calls(reverse)
        level_two_virtual = virtual_partition_class(
            [level_zero_blocks[0], level_one_virtual], full_axis=False
        )
        level_two_virtual.drain_call_queue()
        if axis == 0:
            expected_df = pandas.DataFrame([1, 0, 3, 2], index=[1, 0, 1, 0])
        else:
            expected_df = pandas.DataFrame([[1, 0, 3, 2]], columns=[1, 0, 1, 0])
        df_equals(level_two_virtual.to_pandas(), expected_df)

    def test_virtual_partition_call_queues_at_three_levels(
        self, axis, virtual_partition_class
    ):
        block = block_partition_class(put(pandas.DataFrame([1])))
        level_one_virtual = virtual_partition_class([block], full_axis=False)
        level_one_virtual = level_one_virtual.add_to_apply_calls(
            lambda df: pandas.concat([df, pandas.DataFrame([2])])
        )
        level_two_virtual = virtual_partition_class(
            [level_one_virtual], full_axis=False
        )
        level_two_virtual = level_two_virtual.add_to_apply_calls(
            lambda df: pandas.concat([df, pandas.DataFrame([3])])
        )
        level_three_virtual = virtual_partition_class(
            [level_two_virtual], full_axis=False
        )
        level_three_virtual = level_three_virtual.add_to_apply_calls(
            lambda df: pandas.concat([df, pandas.DataFrame([4])])
        )
        level_three_virtual.drain_call_queue()
        df_equals(
            level_three_virtual.to_pandas(),
            pd.DataFrame([1, 2, 3, 4], index=[0, 0, 0, 0]),
        )


@pytest.mark.parametrize(
    "virtual_partition_class",
    (virtual_column_partition_class, virtual_row_partition_class),
    ids=["partitions_spanning_all_columns", "partitions_spanning_all_rows"],
)
def test_virtual_partition_apply_not_returning_pandas_dataframe(
    virtual_partition_class,
):
    # see https://github.com/modin-project/modin/issues/4811

    partition = virtual_partition_class(
        block_partition_class(put(pandas.DataFrame())), full_axis=False
    )

    apply_result = partition.apply(lambda df: 1).get()
    assert apply_result == 1


@pytest.mark.skipif(
    Engine.get() != "Ray",
    reason="Only ray.wait() does not take duplicate object refs.",
)
def test_virtual_partition_dup_object_ref():
    # See https://github.com/modin-project/modin/issues/5045
    frame_c = pd.DataFrame(np.zeros((100, 20), dtype=np.float32, order="C"))
    frame_c = [frame_c] * 20
    df = pd.concat(frame_c)
    partition = df._query_compiler._modin_frame._partitions.flatten()[0]
    obj_refs = partition.list_of_blocks
    assert len(obj_refs) != len(
        set(obj_refs)
    ), "Test setup did not contain duplicate objects"
    # The below call to wait() should not crash
    partition.wait()


__test_reorder_labels_cache_axis_positions = [
    pytest.param(lambda index: None, id="no_reordering"),
    pytest.param(lambda index: np.arange(len(index) - 1, -1, -1), id="reordering_only"),
    pytest.param(
        lambda index: [0, 1, 2, len(index) - 3, len(index) - 2, len(index) - 1],
        id="projection_only",
    ),
    pytest.param(
        lambda index: np.repeat(np.arange(len(index)), repeats=3), id="size_grow"
    ),
]


@pytest.mark.parametrize("row_positions", __test_reorder_labels_cache_axis_positions)
@pytest.mark.parametrize("col_positions", __test_reorder_labels_cache_axis_positions)
@pytest.mark.parametrize(
    "partitioning_scheme",
    [
        pytest.param(
            lambda df: {
                "row_lengths": [df.shape[0]],
                "column_widths": [df.shape[1]],
            },
            id="single_partition",
        ),
        pytest.param(
            lambda df: {
                "row_lengths": [32, max(0, df.shape[0] - 32)],
                "column_widths": [32, max(0, df.shape[1] - 32)],
            },
            id="two_unbalanced_partitions",
        ),
        pytest.param(
            lambda df: {
                "row_lengths": [df.shape[0] // NPartitions.get()] * NPartitions.get(),
                "column_widths": [df.shape[1] // NPartitions.get()] * NPartitions.get(),
            },
            id="perfect_partitioning",
        ),
        pytest.param(
            lambda df: {
                "row_lengths": [2**i for i in range(NPartitions.get())],
                "column_widths": [2**i for i in range(NPartitions.get())],
            },
            id="unbalanced_partitioning_equals_npartition",
        ),
        pytest.param(
            lambda df: {
                "row_lengths": [2] * (df.shape[0] // 2),
                "column_widths": [2] * (df.shape[1] // 2),
            },
            id="unbalanced_partitioning",
        ),
    ],
)
def test_reorder_labels_cache(
    row_positions,
    col_positions,
    partitioning_scheme,
):
    pandas_df = pandas.DataFrame(test_data_values[0])

    md_df = construct_modin_df_by_scheme(pandas_df, partitioning_scheme(pandas_df))
    md_df = md_df._query_compiler._modin_frame

    result = md_df._reorder_labels(
        row_positions(md_df.index), col_positions(md_df.columns)
    )
    validate_partitions_cache(result)


def test_reorder_labels_dtypes():
    pandas_df = pandas.DataFrame(
        {
            "a": [1, 2, 3, 4],
            "b": [1.0, 2.4, 3.4, 4.5],
            "c": ["a", "b", "c", "d"],
            "d": pd.to_datetime([1, 2, 3, 4], unit="D"),
        }
    )

    md_df = construct_modin_df_by_scheme(
        pandas_df,
        partitioning_scheme={
            "row_lengths": [len(pandas_df)],
            "column_widths": [
                len(pandas_df) // 2,
                len(pandas_df) // 2 + len(pandas_df) % 2,
            ],
        },
    )
    md_df = md_df._query_compiler._modin_frame

    result = md_df._reorder_labels(
        row_positions=None, col_positions=np.arange(len(md_df.columns) - 1, -1, -1)
    )
    df_equals(result.dtypes, result.to_pandas().dtypes)


@pytest.mark.parametrize(
    "left_partitioning, right_partitioning, ref_with_cache_available, ref_with_no_cache",
    # Note: this test takes into consideration that `MinRowPartitionSize == 32`,
    # `MinColumnPartitionSize == 32` and `NPartitions == 4`
    [
        (
            [2],
            [2],
            1,  # the num_splits is computed like (2 + 2 = 4 / chunk_size = 1 split)
            2,  # the num_splits is just splits sum (1 + 1 == 2)
        ),
        (
            [24],
            [54],
            3,  # the num_splits is computed like (24 + 54 = 78 / chunk_size = 3 splits)
            2,  # the num_splits is just splits sum (1 + 1 == 2)
        ),
        (
            [2],
            [299],
            4,  # the num_splits is bounded by NPartitions (2 + 299 = 301 / chunk_size = 10 splits -> bound by 4)
            2,  # the num_splits is just splits sum (1 + 1 == 2)
        ),
        (
            [32, 32],
            [128],
            4,  # the num_splits is bounded by NPartitions (32 + 32 + 128 = 192 / chunk_size = 6 splits -> bound by 4)
            3,  # the num_splits is just splits sum (2 + 1 == 3)
        ),
        (
            [128] * 7,
            [128] * 6,
            4,  # the num_splits is bounded by NPartitions (128 * 7 + 128 * 6 = 1664 / chunk_size = 52 splits -> bound by 4)
            4,  # the num_splits is just splits sum bound by NPartitions (7 + 6 = 13 splits -> 4 splits)
        ),
    ],
)
@pytest.mark.parametrize(
    "modify_config",
    [{NPartitions: 4, MinRowPartitionSize: 32, MinColumnPartitionSize: 32}],
    indirect=True,
)
def test_merge_partitioning(
    left_partitioning,
    right_partitioning,
    ref_with_cache_available,
    ref_with_no_cache,
    modify_config,
):
    from modin.core.storage_formats.pandas.utils import merge_partitioning

    left_df = pandas.DataFrame(
        [np.arange(sum(left_partitioning)) for _ in range(sum(left_partitioning))]
    )
    right_df = pandas.DataFrame(
        [np.arange(sum(right_partitioning)) for _ in range(sum(right_partitioning))]
    )

    left = construct_modin_df_by_scheme(
        left_df, {"row_lengths": left_partitioning, "column_widths": left_partitioning}
    )._query_compiler._modin_frame
    right = construct_modin_df_by_scheme(
        right_df,
        {"row_lengths": right_partitioning, "column_widths": right_partitioning},
    )._query_compiler._modin_frame

    assert left.row_lengths == left.column_widths == left_partitioning
    assert right.row_lengths == right.column_widths == right_partitioning

    res = merge_partitioning(left, right, axis=0)
    assert res == ref_with_cache_available

    res = merge_partitioning(left, right, axis=1)
    assert res == ref_with_cache_available

    (
        left._row_lengths_cache,
        left._column_widths_cache,
        right._row_lengths_cache,
        right._column_widths_cache,
    ) = [None] * 4

    res = merge_partitioning(left, right, axis=0)
    assert res == ref_with_no_cache
    # Verifying that no computations are being triggered
    assert all(
        cache is None
        for cache in (
            left._row_lengths_cache,
            left._column_widths_cache,
            right._row_lengths_cache,
            right._column_widths_cache,
        )
    )

    res = merge_partitioning(left, right, axis=1)
    assert res == ref_with_no_cache
    # Verifying that no computations are being triggered
    assert all(
        cache is None
        for cache in (
            left._row_lengths_cache,
            left._column_widths_cache,
            right._row_lengths_cache,
            right._column_widths_cache,
        )
    )


def test_merge_with_bad_partitioning():
    # https://github.com/modin-project/modin/pull/7229

    left_partitioning = [256]
    right_partitioning = [32, 32, 32, 32]

    left_df = pandas.DataFrame(
        [np.arange(sum(left_partitioning)) for _ in range(sum(left_partitioning))]
    )
    right_df = pandas.DataFrame(
        [np.arange(sum(right_partitioning)) for _ in range(sum(right_partitioning))]
    )

    left = construct_modin_df_by_scheme(
        left_df, {"row_lengths": left_partitioning, "column_widths": left_partitioning}
    )
    right = construct_modin_df_by_scheme(
        right_df,
        {"row_lengths": right_partitioning, "column_widths": right_partitioning},
    )

    left_frame = left._query_compiler._modin_frame
    right_frame = right._query_compiler._modin_frame
    assert left_frame.row_lengths == left_frame.column_widths == left_partitioning
    assert right_frame.row_lengths == right_frame.column_widths == right_partitioning

    # just a dummy value
    return_value = pd.DataFrame([1, 2, 3, 4])._query_compiler
    with mock.patch.object(
        left._query_compiler, "repartition", return_value=return_value
    ) as repartition:
        _ = left.merge(right)
        repartition.assert_called_once_with(axis=0)


def test_groupby_with_empty_partition():
    # see #5461 for details
    md_df = construct_modin_df_by_scheme(
        pandas_df=pandas.DataFrame({"a": [1, 1, 2, 2], "b": [3, 4, 5, 6]}),
        partitioning_scheme={"row_lengths": [2, 2], "column_widths": [2]},
    )
    md_res = md_df.query("a > 1", engine="python")
    grp_obj = md_res.groupby("a")
    # check index error due to partitioning mismatching
    grp_obj.count()

    md_df = construct_modin_df_by_scheme(
        pandas_df=pandas.DataFrame({"a": [1, 1, 2, 2], "b": [3, 4, 5, 6]}),
        partitioning_scheme={"row_lengths": [2, 2], "column_widths": [2]},
    )
    md_res = md_df.query("a > 1", engine="python")
    grp_obj = md_res.groupby(md_res["a"])
    grp_obj.count()


@pytest.mark.parametrize("set_num_partitions", [2], indirect=True)
def test_repartitioning(set_num_partitions):
    """
    This test verifies that 'keep_partitioning=False' doesn't actually preserve partitioning.

    For more details see: https://github.com/modin-project/modin/issues/5621
    """
    assert NPartitions.get() == 2

    pandas_df = pandas.DataFrame(
        {"a": [1, 1, 2, 2], "b": [3, 4, 5, 6], "c": [1, 2, 3, 4], "d": [4, 5, 6, 7]}
    )

    modin_df = construct_modin_df_by_scheme(
        pandas_df=pandas.DataFrame(
            {"a": [1, 1, 2, 2], "b": [3, 4, 5, 6], "c": [1, 2, 3, 4], "d": [4, 5, 6, 7]}
        ),
        partitioning_scheme={"row_lengths": [4], "column_widths": [2, 2]},
    )

    modin_frame = modin_df._query_compiler._modin_frame

    assert modin_frame._partitions.shape == (1, 2)
    assert modin_frame.column_widths == [2, 2]

    res = modin_frame.apply_full_axis(
        axis=1,
        func=lambda df: df,
        keep_partitioning=False,
        new_index=[0, 1, 2, 3],
        new_columns=["a", "b", "c", "d"],
    )

    assert res._partitions.shape == (1, 1)
    assert res.column_widths == [4]
    df_equals(res._partitions[0, 0].to_pandas(), pandas_df)
    df_equals(res.to_pandas(), pandas_df)


@pytest.mark.parametrize("col_name", ["numeric_col", "non_numeric_col"])
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.parametrize("num_pivots", [3, 2, 1])
@pytest.mark.parametrize("all_pivots_are_unique", [True, False])
def test_split_partitions_kernel(
    col_name, ascending, num_pivots, all_pivots_are_unique
):
    """
    This test verifies proper work of the `split_partitions_using_pivots_for_sort` function
    used in partitions reshuffling.

    The function being tested splits the passed dataframe into parts according
    to the 'pivots' indicating boundary values for the parts.

    Parameters
    ----------
    col_name : {"numeric_col", "non_numeric_col"}
        The tested function takes a key column name to which the pivot values belong.
        The function may behave differently depending on the type of that column.
    ascending : {True, False}
        The split parts are returned either in ascending or descending order.
        This parameter helps us to test both of the cases.
    num_pivots : {3, 2, 1}
        The function's behavior may depend on the number of boundary values being passed.
    all_pivots_are_unique : {True, False}
        Duplicate pivot values cause empty partitions to be produced. This parameter helps
        to verify that the function still behaves correctly in such cases.
    """
    random_state = np.random.RandomState(42)

    df = pandas.DataFrame(
        {
            "numeric_col": range(9),
            "non_numeric_col": list("abcdefghi"),
        }
    )
    min_val, max_val = df[col_name].iloc[0], df[col_name].iloc[-1]

    # Selecting random boundary values for the key column
    pivots = random_state.choice(df[col_name], num_pivots, replace=False)
    if not all_pivots_are_unique:
        # Making the 'pivots' contain only duplicate values
        pivots = np.repeat(pivots[0], num_pivots)
    # The tested function assumes that we pass pivots in the ascending order
    pivots = np.sort(pivots)

    # Randomly reordering rows in the dataframe
    df = df.reindex(random_state.permutation(df.index))
    bins = ShuffleSortFunctions.split_partitions_using_pivots_for_sort(
        df,
        [
            ColumnInfo(
                name=col_name,
                is_numeric=pandas.api.types.is_numeric_dtype(df.dtypes[col_name]),
                pivots=pivots,
            )
        ],
        ascending=ascending,
    )

    # Building reference bounds to make the result verification simpler
    bounds = np.concatenate([[min_val], pivots, [max_val]])
    if not ascending:
        # If the order is descending we want bounds to be in the descending order as well:
        # Ex: bounds = [0, 2, 5, 10] for ascending and [10, 5, 2, 0] for descending.
        bounds = bounds[::-1]

    for idx, part in enumerate(bins):
        if ascending:
            # Check that each part is in the range of 'bound[i] <= part <= bound[i + 1]'
            # Example, if the `pivots` were [2, 5] and the min/max values for the colum are min=0, max=10
            # Then each part satisfies: 0 <= part[0] <= 2; 2 <= part[1] <= 5; 5 <= part[2] <= 10
            assert (
                (bounds[idx] <= part[col_name]) & (part[col_name] <= bounds[idx + 1])
            ).all()
        else:
            # Check that each part is in the range of 'bound[i + 1] <= part <= bound[i]'
            # Example, if the `pivots` were [2, 5] and the min/max values for the colum are min=0, max=10
            # Then each part satisfies: 5 <= part[0] <= 10; 2 <= part[1] <= 5; 0 <= part[2] <= 2
            assert (
                (bounds[idx + 1] <= part[col_name]) & (part[col_name] <= bounds[idx])
            ).all()


@pytest.mark.parametrize("col_name", ["numeric_col", "non_numeric_col"])
@pytest.mark.parametrize("ascending", [True, False])
def test_split_partitions_with_empty_pivots(col_name, ascending):
    """
    This test verifies that the splitting function performs correctly when an empty pivots list is passed.
    The expected behavior is to return a single split consisting of the exact copy of the input dataframe.
    """
    df = pandas.DataFrame(
        {
            "numeric_col": range(9),
            "non_numeric_col": list("abcdefghi"),
        }
    )

    result = ShuffleSortFunctions.split_partitions_using_pivots_for_sort(
        df,
        [
            ColumnInfo(
                name=col_name,
                is_numeric=pandas.api.types.is_numeric_dtype(df.dtypes[col_name]),
                pivots=[],
            )
        ],
        ascending=ascending,
    )
    # We're expecting to recieve a single split here
    assert isinstance(result, tuple)
    assert len(result) == 1
    assert result[0].equals(df)


@pytest.mark.parametrize("ascending", [True, False])
def test_shuffle_partitions_with_empty_pivots(ascending):
    """
    This test verifies that the `PartitionMgr.shuffle_partitions` method can handle empty pivots list.
    """
    modin_frame = pd.DataFrame(
        np.array([["hello", "goodbye"], ["hello", "Hello"]])
    )._query_compiler._modin_frame

    assert modin_frame._partitions.shape == (1, 1)

    column_name = modin_frame.columns[1]

    shuffle_functions = ShuffleSortFunctions(
        # These are the parameters we pass in the `.sort_by()` implementation
        modin_frame,
        columns=column_name,
        ascending=ascending,
        ideal_num_new_partitions=1,
    )

    new_partitions = modin_frame._partition_mgr_cls.shuffle_partitions(
        modin_frame._partitions,
        index=0,
        shuffle_functions=shuffle_functions,
        final_shuffle_func=lambda df: df.sort_values(column_name),
    )
    ref = modin_frame.to_pandas().sort_values(column_name)
    res = new_partitions[0, 0].get()

    assert new_partitions.shape == (1, 1)
    assert ref.equals(res)


@pytest.mark.parametrize("ascending", [True, False])
def test_split_partition_preserve_names(ascending):
    """
    This test verifies that the dataframes being split by ``split_partitions_using_pivots_for_sort``
    preserve their index/column names.
    """
    df = pandas.DataFrame(
        {
            "numeric_col": range(9),
            "non_numeric_col": list("abcdefghi"),
        }
    )
    index_name = "custom_name"
    df.index.name = index_name
    df.columns.name = index_name

    # Pivots that contain empty bins
    pivots = [2, 2, 5, 7]
    splits = ShuffleSortFunctions.split_partitions_using_pivots_for_sort(
        df,
        [ColumnInfo(name="numeric_col", is_numeric=True, pivots=pivots)],
        ascending=ascending,
    )

    for part in splits:
        assert part.index.name == index_name
        assert part.columns.name == index_name


@pytest.mark.parametrize("has_cols_metadata", [True, False])
@pytest.mark.parametrize("has_dtypes_metadata", [True, False])
def test_merge_preserves_metadata(has_cols_metadata, has_dtypes_metadata):
    df1 = pd.DataFrame({"a": [1, 1, 2, 2], "b": list("abcd")})
    df2 = pd.DataFrame({"a": [4, 2, 1, 3], "b": list("bcaf"), "c": [3, 2, 1, 0]})

    modin_frame = df1._query_compiler._modin_frame

    if has_cols_metadata:
        # Verify that there were initially materialized metadata
        assert modin_frame.has_materialized_columns
    else:
        modin_frame._columns_cache = None

    if has_dtypes_metadata:
        # Verify that there were initially materialized metadata
        assert modin_frame.has_materialized_dtypes
    else:
        modin_frame.set_dtypes_cache(None)

    res = df1.merge(df2, on="b")._query_compiler._modin_frame

    if has_cols_metadata:
        assert res.has_materialized_columns
        if has_dtypes_metadata:
            assert res.has_materialized_dtypes
        else:
            # Verify that no materialization was triggered
            assert not res.has_materialized_dtypes
            assert not modin_frame.has_materialized_dtypes
    else:
        # Verify that no materialization was triggered
        assert not res.has_materialized_columns
        assert not res.has_materialized_dtypes
        assert not modin_frame.has_materialized_columns
        if not has_dtypes_metadata:
            assert not modin_frame.has_materialized_dtypes


def test_binary_op_preserve_dtypes():
    df = pd.DataFrame({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]})

    def setup_cache(df, has_cache=True):
        if has_cache:
            _ = df.dtypes
            assert df._query_compiler.frame_has_materialized_dtypes
        else:
            df._query_compiler.set_frame_dtypes_cache(None)
            assert not df._query_compiler.frame_has_materialized_dtypes
        return df

    def assert_cache(df, has_cache=True):
        assert not (has_cache ^ df._query_compiler.frame_has_materialized_dtypes)

    # Check when `other` is a non-distributed object
    assert_cache(setup_cache(df) + 2.0)
    assert_cache(setup_cache(df) + {"a": 2.0, "b": 4})
    assert_cache(setup_cache(df) + [2.0, 4])
    assert_cache(setup_cache(df) + np.array([2.0, 4]))

    # Check when `other` is a dataframe
    other = pd.DataFrame({"b": [3, 4, 5], "c": [4.0, 5.0, 6.0]})
    assert_cache(setup_cache(df) + setup_cache(other, has_cache=True))
    assert_cache(setup_cache(df) + setup_cache(other, has_cache=False), has_cache=False)

    # Check when `other` is a series
    other = pd.Series({"b": 3.0, "c": 4.0})
    assert_cache(setup_cache(df) + setup_cache(other, has_cache=True))
    assert_cache(setup_cache(df) + setup_cache(other, has_cache=False), has_cache=False)


@pytest.mark.parametrize("axis", [0, 1])
def test_concat_dont_materialize_opposite_axis(axis):
    data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]}
    df1, df2 = pd.DataFrame(data), pd.DataFrame(data)

    def assert_no_cache(df, axis):
        if axis:
            assert not df._query_compiler.frame_has_materialized_columns
        else:
            assert not df._query_compiler.frame_has_materialized_index

    def remove_cache(df, axis):
        if axis:
            df._query_compiler.set_frame_columns_cache(None)
        else:
            df._query_compiler.set_frame_index_cache(None)
        assert_no_cache(df, axis)
        return df

    df1, df2 = remove_cache(df1, axis), remove_cache(df2, axis)

    df_concated = pd.concat((df1, df2), axis=axis)
    assert_no_cache(df1, axis)
    assert_no_cache(df2, axis)
    assert_no_cache(df_concated, axis)


def test_setitem_bool_preserve_dtypes():
    df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [3, 4, 5, 6]})
    indexer = pd.Series([True, False, True, False])

    assert df._query_compiler.frame_has_materialized_dtypes

    # slice(None) as a col_loc
    df.loc[indexer] = 2.0
    assert df._query_compiler.frame_has_materialized_dtypes

    # list as a col_loc
    df.loc[indexer, ["a", "b"]] = 2.0
    assert df._query_compiler.frame_has_materialized_dtypes

    # scalar as a col_loc
    df.loc[indexer, "a"] = 2.0
    assert df._query_compiler.frame_has_materialized_dtypes


def test_setitem_unhashable_preserve_dtypes():
    df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
    assert df._query_compiler.frame_has_materialized_dtypes

    df2 = pd.DataFrame([[9, 9], [5, 5]])
    assert df2._query_compiler.frame_has_materialized_dtypes

    df[[1, 2]] = df2
    assert df._query_compiler.frame_has_materialized_dtypes


@pytest.mark.parametrize("modify_config", [{RangePartitioning: True}], indirect=True)
def test_groupby_size_shuffling(modify_config):
    # verifies that 'groupby.size()' works with reshuffling implementation
    # https://github.com/modin-project/modin/issues/6367
    df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [3, 4, 5, 6]})
    modin_frame = df._query_compiler._modin_frame

    with mock.patch.object(
        modin_frame,
        "_apply_func_to_range_partitioning",
        wraps=modin_frame._apply_func_to_range_partitioning,
    ) as shuffling_method:
        try_cast_to_pandas(df.groupby("a").size())

    shuffling_method.assert_called()


@pytest.mark.parametrize(
    "kwargs",
    [dict(axis=0, labels=[]), dict(axis=1, labels=["a"]), dict(axis=1, labels=[])],
)
def test_reindex_preserve_dtypes(kwargs):
    df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [3, 4, 5, 6]})

    reindexed_df = df.reindex(**kwargs)
    assert reindexed_df._query_compiler.frame_has_materialized_dtypes


class TestModinIndexIds:
    @staticmethod
    def _patch_get_index(df, axis=0):
        """Patch the ``.index``/``.columns`` attribute of the passed dataframe."""
        if axis == 0:
            return mock.patch.object(
                type(df),
                "index",
                new_callable=mock.PropertyMock,
                wraps=functools.partial(type(df).index.__get__, df),
            )
        else:
            return mock.patch.object(
                type(df),
                "columns",
                new_callable=mock.PropertyMock,
                wraps=functools.partial(type(df).columns.__get__, df),
            )

    def test_setitem_without_copartition(self):
        """Test that setitem for identical indices works without materializing the axis."""
        # simple insertion
        df = pd.DataFrame({f"col{i}": np.arange(256) for i in range(64)})
        remove_axis_cache(df)

        col = df["col0"]
        assert_has_no_cache(col)
        assert_has_no_cache(df)

        # insert the column back and check that no index computation were triggered
        with self._patch_get_index(df) as get_index_patch:
            df["col0"] = col
            # check that no cache computation was triggered
            assert_has_no_cache(df)
            assert_has_no_cache(col)
        get_index_patch.assert_not_called()

        # insertion with few map operations
        df = pd.DataFrame({f"col{i}": np.arange(256) for i in range(64)})
        remove_axis_cache(df)

        col = df["col0"]
        # perform some operations that doesn't modify index labels and partitioning
        col = col * 2 + 10
        assert_has_no_cache(col)
        assert_has_no_cache(df)

        # insert the modified column back and check that no index computation were triggered
        with self._patch_get_index(df) as get_index_patch:
            df["col0"] = col
            # check that no cache computation was triggered
            assert_has_no_cache(df)
            assert_has_no_cache(col)
        get_index_patch.assert_not_called()

    @pytest.mark.parametrize("axis", [0, 1])
    def test_concat_without_copartition(self, axis):
        """Test that concatenation for frames with identical indices works without materializing the axis."""
        df1 = pd.DataFrame({f"col{i}": np.arange(256) for i in range(64)})
        remove_axis_cache(df1, axis)

        # perform some operations that doesn't modify index labels and partitioning
        df2 = df1.abs().applymap(lambda df: df * 2)

        with self._patch_get_index(df1, axis) as get_index_patch:
            res = pd.concat([df1, df2], axis=axis ^ 1)
            # check that no cache computation was triggered
            assert_has_no_cache(df1, axis)
            assert_has_no_cache(df2, axis)
            assert_has_no_cache(res, axis)
        get_index_patch.assert_not_called()

    def test_index_updates_ref(self):
        """Test that copying the default ModinIndex to a new frame updates frame reference with the new one."""
        df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        remove_axis_cache(df1)

        modin_frame1 = df1._query_compiler._modin_frame
        # verify that index cache is 'default' and so holds a reference to the `modin_frame`
        assert modin_frame1._index_cache._is_default_callable

        ref_count_before = sys.getrefcount(modin_frame1)

        df2 = df1 + 1
        modin_frame2 = df2._query_compiler._modin_frame
        # verify that new index cache is also the 'default' one
        assert modin_frame2._index_cache._is_default_callable
        # verify that there's no new references being created to the old frame
        assert sys.getrefcount(modin_frame1) == ref_count_before

    def test_index_updates_axis(self):
        """Verify that the ModinIndex `axis` attribute is updated when copied to a new frame but for an opposit axis."""
        df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        remove_axis_cache(df1)

        # now index becomes columns and vice-versa, this means that the 'default callable'
        # of the ModinIndex now has to update its axis
        df2 = df1.T

        idx1 = df1._query_compiler._modin_frame._index_cache
        idx2 = df2._query_compiler._modin_frame._index_cache

        cols1 = df1._query_compiler._modin_frame._columns_cache
        cols2 = df2._query_compiler._modin_frame._columns_cache

        # check that we can compare df.index == df.T.columns & df.columns == df.T.index
        # without triggering any axis materialization
        assert (
            idx1._index_id == cols2._index_id and idx1._lengths_id == cols2._lengths_id
        )
        assert (
            cols1._index_id == idx2._index_id and cols1._lengths_id == idx2._lengths_id
        )

        # check that when the materialization is triggered for the transposed frame it produces proper labels
        assert df2.index.equals(pandas.Index(["a", "b"]))
        assert df2.columns.equals(pandas.Index([0, 1, 2]))

    def test_filter_empties_resets_lengths(self):
        """Verify that filtering out empty partitions affects ``ModinIndex._lengths_id`` field."""
        # case1: partitioning is modified by '._filter_empties()', meaning that '._lengths_id' should be changed
        md_df = construct_modin_df_by_scheme(
            pandas.DataFrame({"a": [1, 1, 2, 2]}),
            {"row_lengths": [2, 2], "column_widths": [1]},
        )
        mf = md_df.query("a < 2")._query_compiler._modin_frame
        mf.index  # trigger index materialization

        old_cache = mf._index_cache
        assert mf._partitions.shape == (2, 1)

        mf._filter_empties()
        new_cache = mf._index_cache

        assert new_cache._index_id == old_cache._index_id
        assert new_cache._lengths_id != old_cache._lengths_id
        assert new_cache._lengths_cache != old_cache._lengths_cache

        # case2: partitioning is NOT modified by '._filter_empties()', meaning that '._lengths_id' should stay the same
        md_df = construct_modin_df_by_scheme(
            pandas.DataFrame({"a": [1, 1, 2, 2]}),
            {"row_lengths": [2, 2], "column_widths": [1]},
        )
        mf = md_df._query_compiler._modin_frame

        old_cache = mf._index_cache
        assert mf._partitions.shape == (2, 1)

        mf._filter_empties()
        new_cache = mf._index_cache

        assert new_cache._index_id == old_cache._index_id
        assert new_cache._lengths_id == old_cache._lengths_id
        assert new_cache._lengths_cache == old_cache._lengths_cache

    def test_binops_without_repartitioning(self):
        """Test that binary operations for identical indices works without materializing the axis."""
        df = pd.DataFrame({f"col{i}": np.arange(256) for i in range(64)})
        remove_axis_cache(df)

        col1 = df["col1"]
        assert_has_no_cache(col1)
        assert_has_no_cache(df)

        col2 = df["col2"]
        assert_has_no_cache(col2)
        assert_has_no_cache(df)

        # perform a binary op and insert the result back then check that no index computation were triggered
        with self._patch_get_index(df) as get_index_df:
            df["result"] = col1 + col2
            # check that no cache computation was triggered
            assert_has_no_cache(df)
            assert_has_no_cache(col1)
            assert_has_no_cache(col2)
        get_index_df.assert_not_called()


def test_skip_set_columns():
    """
    Verifies that the mechanism of skipping the actual ``._set_columns()`` call in case
    the new columns are identical to the previous ones works properly.

    In this test, we rely on the ``modin_frame._deferred_column`` attribute.
    The new indices propagation is done lazily, and the ``deferred_column`` attribute
    indicates whether there's a new indices propagation pending.
    """
    df = pd.DataFrame({"col1": [1, 2, 3], "col2": [3, 4, 5]})
    df.columns = ["col1", "col10"]
    # Verifies that the new columns were successfully set in case they're actually new
    assert df._query_compiler._modin_frame._deferred_column
    assert np.all(df.columns.values == ["col1", "col10"])

    df = pd.DataFrame({"col1": [1, 2, 3], "col2": [3, 4, 5]})
    df.columns = ["col1", "col2"]
    # Verifies that the new columns weren't set if they're equal to the previous ones
    assert not df._query_compiler._modin_frame._deferred_column

    df = pd.DataFrame({"col1": [1, 2, 3], "col2": [3, 4, 5]})
    df.columns = pandas.Index(["col1", "col2"], name="new name")
    # Verifies that the new columns were successfully set in case they's new metadata
    assert df.columns.name == "new name"

    df = pd.DataFrame(
        {("a", "col1"): [1, 2, 3], ("a", "col2"): [3, 4, 5], ("b", "col1"): [6, 7, 8]}
    )
    df.columns = df.columns.copy()
    # Verifies that the new columns weren't set if they're equal to the previous ones
    assert not df._query_compiler._modin_frame._deferred_column

    df = pd.DataFrame(
        {("a", "col1"): [1, 2, 3], ("a", "col2"): [3, 4, 5], ("b", "col1"): [6, 7, 8]}
    )
    new_cols = df.columns[::-1]
    df.columns = new_cols
    # Verifies that the new columns were successfully set in case they're actually new
    assert df._query_compiler._modin_frame._deferred_column
    assert df.columns.equals(new_cols)

    df = pd.DataFrame({"col1": [1, 2, 3], "col2": [3, 4, 5]})
    remove_axis_cache(df, axis=1)
    df.columns = ["col1", "col2"]
    # Verifies that the computation of the old columns wasn't triggered for the sake
    # of equality comparison, in this case the new columns should be set unconditionally,
    # meaning that the '_deferred_column' has to be True
    assert df._query_compiler._modin_frame._deferred_column


def test_query_dispatching():
    """
    Test whether the logic of determining whether the passed query
    can be performed row-wise works correctly in ``PandasQueryCompiler.rowwise_query()``.

    The tested method raises a ``NotImpementedError`` if the query cannot be performed row-wise
    and raises nothing if it can.
    """
    qc = pd.DataFrame(
        {"a": [1], "b": [2], "c": [3], "d": [4], "e": [5]}
    )._query_compiler

    local_var = 10  # noqa: F841 (unused variable)

    # these queries should be performed row-wise (so no exception)
    qc.rowwise_query("a < 1")
    qc.rowwise_query("a < b")
    qc.rowwise_query("a < (b + @local_var) * c > 10")

    # these queries cannot be performed row-wise (so they must raise an exception)
    with pytest.raises(NotImplementedError):
        qc.rowwise_query("a < b[0]")
    with pytest.raises(NotImplementedError):
        qc.rowwise_query("a < b.min()")
    with pytest.raises(NotImplementedError):
        qc.rowwise_query("a < (b + @local_var + (b - e.min())) * c > 10")
    with pytest.raises(NotImplementedError):
        qc.rowwise_query("a < b.size")


def test_sort_values_cache():
    """
    Test that the column widths cache after ``.sort_values()`` is valid:
    https://github.com/modin-project/modin/issues/6607
    """
    # 1 row partition and 2 column partitions, in this case '.sort_values()' will use
    # row-wise implementation and so the column widths WILL NOT be changed
    modin_df = construct_modin_df_by_scheme(
        pandas.DataFrame({f"col{i}": range(100) for i in range(64)}),
        partitioning_scheme={"row_lengths": [100], "column_widths": [32, 32]},
    )
    mf_initial = modin_df._query_compiler._modin_frame

    mf_res = modin_df.sort_values("col0")._query_compiler._modin_frame
    # check that row-wise implementation was indeed used (col widths were not changed)
    assert mf_res._column_widths_cache == [32, 32]
    # check that the cache and actual col widths match
    validate_partitions_cache(mf_res, axis=1)
    # check that the initial frame's cache wasn't changed
    assert mf_initial._column_widths_cache == [32, 32]
    validate_partitions_cache(mf_initial, axis=1)

    # 2 row partition and 2 column partitions, in this case '.sort_values()' will use
    # range-partitioning implementation and so the column widths WILL be changed
    modin_df = construct_modin_df_by_scheme(
        pandas.DataFrame({f"col{i}": range(100) for i in range(64)}),
        partitioning_scheme={"row_lengths": [50, 50], "column_widths": [32, 32]},
    )
    mf_initial = modin_df._query_compiler._modin_frame

    mf_res = modin_df.sort_values("col0")._query_compiler._modin_frame
    # check that range-partitioning implementation was indeed used (col widths were changed)
    assert mf_res._column_widths_cache == [64]
    # check that the cache and actual col widths match
    validate_partitions_cache(mf_res, axis=1)
    # check that the initial frame's cache wasn't changed
    assert mf_initial._column_widths_cache == [32, 32]
    validate_partitions_cache(mf_initial, axis=1)


def test_apply_full_axis_preserve_widths():
    md_df = construct_modin_df_by_scheme(
        pandas.DataFrame(
            {"a": [1, 2, 3, 4], "b": [3, 4, 5, 6], "c": [6, 7, 8, 9], "d": [0, 1, 2, 3]}
        ),
        {"row_lengths": [2, 2], "column_widths": [2, 2]},
    )._query_compiler._modin_frame

    assert md_df._row_lengths_cache == [2, 2]
    assert md_df._column_widths_cache == [2, 2]

    def func(df):
        if df.iloc[0, 0] == 1:
            return pandas.DataFrame(
                {"a": [1, 2, 3], "b": [3, 4, 5], "c": [6, 7, 8], "d": [0, 1, 2]}
            )
        else:
            return pandas.DataFrame({"a": [4], "b": [6], "c": [9], "d": [3]})

    res = md_df.apply_full_axis(
        func=func,
        axis=1,
        new_index=[0, 1, 2, 3],
        new_columns=["a", "b", "c", "d"],
        keep_partitioning=True,
    )
    col_widths_cache = res._column_widths_cache
    actual_column_widths = [part.width() for part in res._partitions[0]]

    assert col_widths_cache == actual_column_widths
    assert res._row_lengths_cache is None


def test_apply_full_axis_preserve_lengths():
    md_df = construct_modin_df_by_scheme(
        pandas.DataFrame(
            {"a": [1, 2, 3, 4], "b": [3, 4, 5, 6], "c": [6, 7, 8, 9], "d": [0, 1, 2, 3]}
        ),
        {"row_lengths": [2, 2], "column_widths": [2, 2]},
    )._query_compiler._modin_frame

    assert md_df._row_lengths_cache == [2, 2]
    assert md_df._column_widths_cache == [2, 2]

    def func(df):
        if df.iloc[0, 0] == 1:
            return pandas.DataFrame({"a": [3, 2, 3, 4], "b": [3, 4, 5, 6]})
        else:
            return pandas.DataFrame({"c": [9, 5, 6, 7]})

    res = md_df.apply_full_axis(
        func=func,
        axis=0,
        new_index=[0, 1, 2, 3],
        new_columns=["a", "b", "c"],
        keep_partitioning=True,
    )

    row_lengths_cache = res._row_lengths_cache
    actual_row_lengths = [part.length() for part in res._partitions[:, 0]]

    assert row_lengths_cache == actual_row_lengths
    assert res._column_widths_cache is None


class DummyFuture:
    """
    A dummy object emulating future's behaviour, this class is used in ``test_call_queue_serialization``.

    It stores a random numeric value representing its data and `was_materialized` state.
    Initially this object is considered to be serialized, the state can be changed by calling
    the ``.materialize()`` method.
    """

    def __init__(self):
        self._value = np.random.randint(0, 1_000_000)
        self._was_materialized = False

    def materialize(self):
        self._was_materialized = True
        return self

    def __eq__(self, other):
        if isinstance(other, type(self)) and self._value == other._value:
            return True
        return False


class TestModinDtypes:
    """Test ``ModinDtypes`` and ``DtypesDescriptor`` classes."""

    schema = pandas.Series(
        {
            "a": np.dtype("int64"),
            "b": np.dtype(float),
            "c": np.dtype(bool),
            "d": np.dtype(bool),
            "e": np.dtype("object"),
        }
    )

    def get_columns_order(self, cols):
        """Return a value to be passed as ``DtypesDescriptor(columns_order=...)`` parameter."""
        return {i: col for i, col in enumerate(cols)}

    class DummyDf:
        def __init__(self, schema):
            self._schema = pandas.Series(schema)
            # record calls to verify that we haven't materialized more than needed
            self.history = []

        def _compute_dtypes(self, subset=None):
            self.history.append(("_compute_dtypes", subset))
            return self._schema if subset is None else self._schema[subset]

        @property
        def columns(self):
            self.history.append(("columns",))
            return self._schema.index

        @property
        def has_materialized_columns(self):
            # False, to make descriptor avoid materialization at all cost
            return False

    def test_get_dtypes_set_modin_dtypes(self):
        """Test that ``ModinDtypes.get_dtypes_set()`` correctly propagates this request to the underlying value."""
        res = ModinDtypes(lambda: self.schema).get_dtypes_set()
        exp = set(self.schema.values)
        assert res == exp

        res = ModinDtypes(self.schema).get_dtypes_set()
        exp = set(self.schema.values)
        assert res == exp

        res = ModinDtypes(
            DtypesDescriptor(
                self.schema[["a", "b", "e"]], remaining_dtype=np.dtype(bool)
            )
        ).get_dtypes_set()
        exp = set(self.schema.values)
        assert res == exp

    def test_get_dtypes_set_desc(self):
        """
        Test that ``DtypesDescriptor.get_dtypes_set()`` returns valid values and doesn't
        trigger unnecessary computations.
        """
        df = self.DummyDf(self.schema)
        desc = DtypesDescriptor(
            self.schema[["a", "b"]], know_all_names=False, parent_df=df
        )
        res = desc.get_dtypes_set()
        exp = self.schema.values
        assert res == set(exp)
        # since 'know_all_names=False', we first have to retrieve columns
        # in order to determine missing dtypes and then call '._compute_dtypes()'
        # only on a subset
        assert len(df.history) == 2 and df.history == [
            ("columns",),
            ("_compute_dtypes", ["c", "d", "e"]),
        ]

        df = self.DummyDf(self.schema)
        desc = DtypesDescriptor(
            self.schema[["a", "b"]],
            cols_with_unknown_dtypes=["c", "d", "e"],
            parent_df=df,
        )
        res = desc.get_dtypes_set()
        exp = self.schema.values
        assert res == set(exp)
        # here we already know names for cols with unknown dtypes, so only
        # calling '._compute_dtypes()' on a subset
        assert len(df.history) == 1 and df.history[0] == (
            "_compute_dtypes",
            ["c", "d", "e"],
        )

        df = self.DummyDf(self.schema[["a", "b", "c", "d"]])
        desc = DtypesDescriptor(
            self.schema[["a", "b"]], remaining_dtype=np.dtype(bool), parent_df=df
        )
        res = desc.get_dtypes_set()
        exp = self.schema[["a", "b", "c", "d"]].values
        assert res == set(exp)
        # we don't need to access 'parent_df' in order to get dtypes set, as we
        # can infer it from 'known_dtypes' and 'remaining_dtype'
        assert len(df.history) == 0

        df = self.DummyDf(self.schema)
        desc = DtypesDescriptor(know_all_names=False, parent_df=df)
        res = desc.get_dtypes_set()
        exp = self.schema.values
        assert res == set(exp)
        # compute dtypes for all columns
        assert len(df.history) == 1 and df.history[0] == ("_compute_dtypes", None)

        df = self.DummyDf(self.schema)
        desc = DtypesDescriptor(
            cols_with_unknown_dtypes=self.schema.index.tolist(), parent_df=df
        )
        res = desc.get_dtypes_set()
        exp = self.schema.values
        assert res == set(exp)
        # compute dtypes for all columns
        assert len(df.history) == 1 and df.history[0] == (
            "_compute_dtypes",
            self.schema.index.tolist(),
        )

        df = self.DummyDf(self.schema)
        desc = DtypesDescriptor(
            cols_with_unknown_dtypes=["a", "b", "e"],
            remaining_dtype=np.dtype(bool),
            parent_df=df,
        )
        res = desc.get_dtypes_set()
        exp = self.schema.values
        assert res == set(exp)
        # here we already know names for cols with unknown dtypes, so only
        # calling '._compute_dtypes()' on a subset
        assert len(df.history) == 1 and df.history[0] == (
            "_compute_dtypes",
            ["a", "b", "e"],
        )

    def test_lazy_get_modin_dtypes(self):
        """Test that ``ModinDtypes.lazy_get()`` correctly propagates this request to the underlying value."""
        res = ModinDtypes(self.schema).lazy_get(["b", "c", "a"])
        exp = self.schema[["b", "c", "a"]]
        assert res._value.equals(exp)

        res = ModinDtypes(lambda: self.schema).lazy_get(["b", "c", "a"])
        exp = self.schema[["b", "c", "a"]]
        assert callable(res._value)
        assert res._value().equals(exp)

        res = ModinDtypes(
            DtypesDescriptor(
                self.schema[["a", "b"]], cols_with_unknown_dtypes=["c", "d", "e"]
            )
        ).lazy_get(["b", "c", "a"])
        exp = DtypesDescriptor(
            self.schema[["a", "b"]],
            cols_with_unknown_dtypes=["c"],
            columns_order={0: "b", 1: "c", 2: "a"},
        )
        assert res._value.equals(exp)

    def test_lazy_get_desc(self):
        """
        Test that ``DtypesDescriptor.lazy_get()`` work properly.

        In this test we never specify `parent_df` for a descriptor, verifying that
        ``.lazy_get()`` never triggers any computations.
        """
        desc = DtypesDescriptor(self.schema[["a", "b"]])
        subset = ["a", "c", "e"]
        res = desc.lazy_get(subset)
        exp = DtypesDescriptor(
            self.schema[subset[:1]],
            cols_with_unknown_dtypes=subset[1:],
            columns_order=self.get_columns_order(subset),
        )
        assert res.equals(exp)

        desc = DtypesDescriptor(self.schema[["a", "b"]], remaining_dtype=np.dtype(bool))
        subset = ["a", "c", "d"]
        res = desc.lazy_get(subset)
        exp = DtypesDescriptor(
            # dtypes for 'c' and 'b' were infered from 'remaining_dtype' parameter
            self.schema[subset],
            columns_order=self.get_columns_order(subset),
            _schema_is_known=True,
        )
        assert res.equals(exp)

        desc = DtypesDescriptor()
        subset = ["a", "c", "d"]
        res = desc.lazy_get(subset)
        exp = DtypesDescriptor(
            cols_with_unknown_dtypes=subset,
            columns_order=self.get_columns_order(subset),
        )
        assert res.equals(exp)

        desc = DtypesDescriptor(remaining_dtype=np.dtype(bool))
        subset = ["c", "d"]
        res = desc.lazy_get(subset)
        exp = DtypesDescriptor(
            # dtypes for 'c' and 'd' were infered from 'remaining_dtype' parameter
            self.schema[subset],
            columns_order=self.get_columns_order(subset),
            _schema_is_known=True,
        )
        assert res.equals(exp)

    def test_concat_axis_0(self):
        res = DtypesDescriptor.concat(
            [
                DtypesDescriptor(self.schema[["a", "b"]]),
                DtypesDescriptor(self.schema[["c", "d"]]),
            ]
        )
        # simply concat known schemas
        exp = DtypesDescriptor(self.schema[["a", "b", "c", "d"]])
        assert res.equals(exp)

        res = DtypesDescriptor.concat(
            [
                DtypesDescriptor(self.schema[["a", "b"]]),
                DtypesDescriptor(remaining_dtype=np.dtype(bool)),
            ]
        )
        # none of the descriptors had missing column names, so we can preserve 'remaining_dtype'
        exp = DtypesDescriptor(self.schema[["a", "b"]], remaining_dtype=np.dtype(bool))
        assert res.equals(exp)

        res = DtypesDescriptor.concat(
            [
                DtypesDescriptor(self.schema[["a", "b"]], know_all_names=False),
                DtypesDescriptor(remaining_dtype=np.dtype(bool)),
            ]
        )
        # can't preserve 'remaining_dtype' since first descriptor has unknown column names
        exp = DtypesDescriptor(self.schema[["a", "b"]], know_all_names=False)
        assert res.equals(exp)

        res = DtypesDescriptor.concat(
            [
                DtypesDescriptor(self.schema[["a", "b"]]),
                DtypesDescriptor(
                    cols_with_unknown_dtypes=["d", "e"], know_all_names=False
                ),
                DtypesDescriptor(remaining_dtype=np.dtype(bool)),
            ]
        )
        # can't preserve 'remaining_dtype' since second descriptor has unknown column names
        exp = DtypesDescriptor(
            self.schema[["a", "b"]],
            cols_with_unknown_dtypes=["d", "e"],
            know_all_names=False,
        )
        assert res.equals(exp)

        res = DtypesDescriptor.concat(
            [
                DtypesDescriptor(
                    self.schema[["a", "b"]],
                ),
                DtypesDescriptor(
                    cols_with_unknown_dtypes=["d", "e"],
                ),
                DtypesDescriptor(remaining_dtype=np.dtype(bool)),
            ]
        )
        # none of the descriptors had missing column names, so we can preserve 'remaining_dtype'
        exp = DtypesDescriptor(
            self.schema[["a", "b"]],
            cols_with_unknown_dtypes=["d", "e"],
            remaining_dtype=np.dtype(bool),
        )
        assert res.equals(exp)

        res = DtypesDescriptor.concat(
            [
                DtypesDescriptor(
                    self.schema[["a", "b"]], remaining_dtype=np.dtype(bool)
                ),
                DtypesDescriptor(
                    cols_with_unknown_dtypes=["d", "e"], remaining_dtype=np.dtype(float)
                ),
                DtypesDescriptor(remaining_dtype=np.dtype(bool)),
            ]
        )
        # remaining dtypes don't match, so we drop them and set 'know_all_names=False'
        exp = DtypesDescriptor(
            self.schema[["a", "b"]],
            cols_with_unknown_dtypes=["d", "e"],
            know_all_names=False,
        )
        assert res.equals(exp)

    @pytest.mark.parametrize(
        "initial_dtypes, result_cols_with_known_dtypes, result_cols_with_unknown_dtypes",
        [
            [
                # initial dtypes (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype):
                # dtypes for all columns are known
                [
                    (["a", "b", "c", "d"], [], None),
                    (["a", "b", "e", "d"], [], None),
                    (["a", "b"], [], None),
                ],
                # result_cols_with_known_dtypes:
                # all dtypes were known in the beginning, expecting the same
                # for the result
                ["a", "b", "c", "d", "e"],
                # result_cols_with_unknown_dtypes
                [],
            ],
            [
                # initial dtypes (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype)
                [
                    (["a", "b"], ["c", "d"], None),
                    (["a", "b", "d"], ["e"], None),
                    (["a", "b"], [], None),
                ],
                # result_cols_with_known_dtypes:
                # across all dataframes, dtypes were only known for 'a' and 'b' columns
                ["a", "b"],
                # result_cols_with_unknown_dtypes
                ["c", "d", "e"],
            ],
            [
                # initial dtypes (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype):
                # the 'e' column in the second frame is missing here, emulating 'know_all_names=False' case
                [
                    (["a", "b"], ["c", "d"], None),
                    (["a", "b", "d"], [], None),
                    (["a", "b"], [], None),
                ],
                # result_cols_with_known_dtypes
                ["a", "b"],
                # result_cols_with_unknown_dtypes:
                # the missing 'e' column will be deducted from the resulted frame after '.concat()'
                ["c", "d", "e"],
            ],
            [
                # initial dtypes (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype)
                # the 'c' column in the first frame is described using 'remaining_dtype'
                [
                    (["a", "b", "d"], [], np.dtype(bool)),
                    (["a", "b", "e", "d"], [], None),
                    (["a", "b"], [], None),
                ],
                # result_cols_with_known_dtypes:
                # remaining dtypes are not supported by 'concat(axis=0)', so dtype for the 'c'
                # column is missing here
                ["a", "b", "e", "d"],
                # result_cols_with_unknown_dtypes:
                ["c"],
            ],
        ],
    )
    def test_concat_axis_1(
        self,
        initial_dtypes,
        result_cols_with_known_dtypes,
        result_cols_with_unknown_dtypes,
    ):
        """
        Test that ``DtypesDescriptor.concat(axis=1)`` works as expected.

        Parameters
        ----------
        initial_dtypes : list of tuples: (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype)
            Describe how to build ``DtypesDescriptor`` for each of the three dataframes.
        result_cols_with_known_dtypes : list of labels
            Column names for which dtypes has to be determined after ``.concat()``.
        result_cols_with_unknown_dtypes : list of labels
            Column names for which dtypes has to be unknown after ``.concat()``.
        """
        md_df1, pd_df1 = create_test_dfs(
            {
                "a": [1, 2, 3],
                "b": [3.5, 4.5, 5.5],
                "c": [True, False, True],
                "d": ["a", "b", "c"],
            }
        )
        md_df2, pd_df2 = create_test_dfs(
            {
                "a": [1.5, 2.5, 3.5],
                "b": [3.5, 4.5, 5.5],
                "e": [True, False, True],
                "d": ["a", "b", "c"],
            }
        )
        md_df3, pd_df3 = create_test_dfs({"a": [1, 2, 3], "b": [3.5, 4.5, 5.5]})

        for md_df, (known_cols, unknown_cols, remaining_dtype) in zip(
            [md_df1, md_df2, md_df3], initial_dtypes
        ):
            known_dtypes = {col: md_df.dtypes[col] for col in known_cols}
            know_all_names = (
                len(known_cols) + len(unknown_cols) == len(md_df.columns)
                or remaining_dtype is not None
            )
            # setting columns cache to 'None', in order to prevent completing 'dtypes' with the materialized columns
            md_df._query_compiler.set_frame_columns_cache(None)
            md_df._query_compiler.set_frame_dtypes_cache(
                ModinDtypes(
                    DtypesDescriptor(
                        known_dtypes,
                        unknown_cols,
                        remaining_dtype,
                        know_all_names=know_all_names,
                    )
                )
            )
        md_dtypes = pd.concat(
            [md_df1, md_df2, md_df3]
        )._query_compiler._modin_frame._dtypes
        pd_dtypes = pandas.concat([pd_df1, pd_df2, pd_df3]).dtypes
        if len(result_cols_with_known_dtypes) == len(pd_dtypes):
            md_dtypes = (
                md_dtypes if isinstance(md_dtypes, pandas.Series) else md_dtypes._value
            )
            assert isinstance(md_dtypes, pandas.Series)
            assert md_dtypes.equals(pd_dtypes)
        else:
            assert set(md_dtypes._value._known_dtypes.keys()) == set(
                result_cols_with_known_dtypes
            )
            # reindexing to ensure proper order
            md_known_dtypes = pandas.Series(md_dtypes._value._known_dtypes).reindex(
                result_cols_with_known_dtypes
            )
            assert md_known_dtypes.equals(pd_dtypes[result_cols_with_known_dtypes])
            assert set(md_dtypes._value._cols_with_unknown_dtypes) == set(
                result_cols_with_unknown_dtypes
            )

    def test_ModinDtypes_duplicated_concat(self):
        # test that 'ModinDtypes' is able to perform dtypes concatenation on duplicated labels
        # if all of them are Serieses
        res = ModinDtypes.concat([pandas.Series([np.dtype("int64")], index=["a"])] * 2)
        assert isinstance(res._value, pandas.Series)
        assert res._value.equals(
            pandas.Series([np.dtype("int64"), np.dtype("int64")], index=["a", "a"])
        )

        # test that 'ModinDtypes.concat' with duplicated labels raises when not all dtypes are materialized
        with pytest.raises(NotImplementedError):
            res = ModinDtypes.concat(
                [
                    pandas.Series([np.dtype("int64")], index=["a"]),
                    DtypesDescriptor(cols_with_unknown_dtypes=["a"]),
                ]
            )

    def test_update_parent(self):
        """
        Test that updating parents in ``DtypesDescriptor`` also propagates to stored lazy categoricals.
        """
        # 'df1' will have a materialized 'pandas.Series' as dtypes cache
        df1 = pd.DataFrame({"a": [1, 1, 2], "b": [3, 4, 5]}).astype({"a": "category"})
        assert isinstance(df1.dtypes["a"], LazyProxyCategoricalDtype)

        # 'df2' will have a 'DtypesDescriptor' with unknown dtypes for a column 'c'
        df2 = pd.DataFrame({"c": [2, 3, 4]})
        df2._query_compiler.set_frame_dtypes_cache(None)
        dtypes_cache = df2._query_compiler._modin_frame._dtypes
        assert isinstance(
            dtypes_cache._value, DtypesDescriptor
        ) and dtypes_cache._value._cols_with_unknown_dtypes == ["c"]

        # concatenating 'df1' and 'df2' to get a 'DtypesDescriptor' storing lazy categories
        # in its 'known_dtypes' field
        res = pd.concat([df1, df2], axis=1)
        old_parent = df1._query_compiler._modin_frame
        new_parent = res._query_compiler._modin_frame
        dtypes_cache = new_parent._dtypes._value

        # verifying that the reference for lazy categories to a new parent was updated
        assert dtypes_cache._parent_df is new_parent
        assert dtypes_cache._known_dtypes["a"]._parent is new_parent
        assert old_parent._dtypes["a"]._parent is old_parent

    @pytest.mark.parametrize(
        "initial_dtypes, result_dtypes",
        [
            [
                DtypesDescriptor(
                    {"a": np.dtype("int64"), "b": np.dtype(float), "c": np.dtype(float)}
                ),
                DtypesDescriptor(
                    cols_with_unknown_dtypes=["col1", "col2", "col3"],
                    columns_order={0: "col1", 1: "col2", 2: "col3"},
                ),
            ],
            [
                DtypesDescriptor(
                    {
                        "a": np.dtype("int64"),
                        "b": np.dtype(float),
                        "c": np.dtype(float),
                    },
                    columns_order={0: "a", 1: "b", 2: "c"},
                ),
                DtypesDescriptor(
                    {
                        "col1": np.dtype("int64"),
                        "col2": np.dtype(float),
                        "col3": np.dtype(float),
                    },
                    columns_order={0: "col1", 1: "col2", 2: "col3"},
                ),
            ],
            [
                DtypesDescriptor(
                    {"a": np.dtype("int64"), "b": np.dtype(float)},
                    cols_with_unknown_dtypes=["c"],
                    columns_order={0: "a", 1: "b", 2: "c"},
                ),
                DtypesDescriptor(
                    {"col1": np.dtype("int64"), "col2": np.dtype(float)},
                    cols_with_unknown_dtypes=["col3"],
                    columns_order={0: "col1", 1: "col2", 2: "col3"},
                ),
            ],
            [
                DtypesDescriptor(
                    {"a": np.dtype("int64")},
                    cols_with_unknown_dtypes=["c"],
                    know_all_names=False,
                ),
                DtypesDescriptor(
                    cols_with_unknown_dtypes=["col1", "col2", "col3"],
                    columns_order={0: "col1", 1: "col2", 2: "col3"},
                ),
            ],
            [
                DtypesDescriptor(
                    {"a": np.dtype("int64")}, remaining_dtype=np.dtype(float)
                ),
                DtypesDescriptor(
                    cols_with_unknown_dtypes=["col1", "col2", "col3"],
                    columns_order={0: "col1", 1: "col2", 2: "col3"},
                ),
            ],
            [
                lambda: pandas.Series(
                    [np.dtype("int64"), np.dtype(float), np.dtype(float)],
                    index=["a", "b", "c"],
                ),
                lambda: pandas.Series(
                    [np.dtype("int64"), np.dtype(float), np.dtype(float)],
                    index=["col1", "col2", "col3"],
                ),
            ],
            [
                pandas.Series(
                    [np.dtype("int64"), np.dtype(float), np.dtype(float)],
                    index=["a", "b", "c"],
                ),
                pandas.Series(
                    [np.dtype("int64"), np.dtype(float), np.dtype(float)],
                    index=["col1", "col2", "col3"],
                ),
            ],
        ],
    )
    def test_set_index_dataframe(self, initial_dtypes, result_dtypes):
        """Test that changing labels for a dataframe also updates labels of dtypes."""
        df = pd.DataFrame(
            {"a": [1, 2, 3], "b": [3.0, 4.0, 5.0], "c": [3.2, 4.5, 5.4]}
        )._query_compiler._modin_frame
        df.set_columns_cache(None)
        if isinstance(initial_dtypes, DtypesDescriptor):
            initial_dtypes = ModinDtypes(initial_dtypes)

        df.set_dtypes_cache(initial_dtypes)
        df.columns = ["col1", "col2", "col3"]

        if result_dtypes is not None:
            if callable(result_dtypes):
                assert callable(df._dtypes._value)
                assert df._dtypes._value().equals(result_dtypes())
            else:
                assert df._dtypes._value.equals(result_dtypes)
        assert df.dtypes.index.equals(pandas.Index(["col1", "col2", "col3"]))

    def test_set_index_with_dupl_labels(self):
        """Verify that setting duplicated columns doesn't propagate any errors to a user."""
        df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [3.5, 4.4, 5.5, 6.6]})
        # making sure that dtypes are represented by an unmaterialized dtypes-descriptor
        df._query_compiler.set_frame_dtypes_cache(None)

        df.columns = ["a", "a"]
        assert df.dtypes.equals(
            pandas.Series([np.dtype(int), np.dtype("float64")], index=["a", "a"])
        )

    def test_reset_index_mi_columns(self):
        # reproducer from: https://github.com/modin-project/modin/issues/6904
        md_df, pd_df = create_test_dfs({"a": [1, 1, 2, 2], "b": [3, 3, 4, 4]})
        eval_general(
            md_df,
            pd_df,
            lambda df: df.groupby("a").agg({"b": ["min", "std"]}).reset_index().dtypes,
        )

    def test_concat_mi(self):
        """
        Verify that concatenating dfs with non-MultiIndex and MultiIndex columns results into valid indices for lazy dtypes.
        """
        md_df1, pd_df1 = create_test_dfs({"a": [1, 1, 2, 2], "b": [3, 3, 4, 4]})
        md_df2, pd_df2 = create_test_dfs(
            {("l1", "v1"): [1, 1, 2, 2], ("l1", "v2"): [3, 3, 4, 4]}
        )

        # Drop actual dtypes in order to use partially-known dtypes
        md_df1._query_compiler.set_frame_dtypes_cache(None)
        md_df2._query_compiler.set_frame_dtypes_cache(None)

        md_res = pd.concat([md_df1, md_df2], axis=1)
        pd_res = pandas.concat([pd_df1, pd_df2], axis=1)
        df_equals(md_res.dtypes, pd_res.dtypes)


class TestZeroComputationDtypes:
    """
    Test cases that shouldn't trigger dtypes computation during their execution.
    """

    @pytest.mark.parametrize("self_dtype", ["materialized", "partial", "unknown"])
    @pytest.mark.parametrize(
        "value, value_dtype",
        [
            [3.5, np.dtype(float)],
            [[3.5, 2.4], np.dtype(float)],
            [np.array([3.5, 2.4]), np.dtype(float)],
            [pd.Series([3.5, 2.4]), np.dtype(float)],
        ],
    )
    def test_preserve_dtypes_setitem(self, self_dtype, value, value_dtype):
        """
        Test that ``df[single_existing_column] = value`` preserves dtypes cache.
        """
        with mock.patch.object(PandasDataframe, "_compute_dtypes") as patch:
            df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [3, 4]})
            if self_dtype == "materialized":
                assert df._query_compiler.frame_has_materialized_dtypes
            elif self_dtype == "partial":
                df._query_compiler.set_frame_dtypes_cache(
                    ModinDtypes(
                        DtypesDescriptor(
                            {"a": np.dtype("int64")},
                            cols_with_unknown_dtypes=["b", "c"],
                        )
                    )
                )
            elif self_dtype == "unknown":
                df._query_compiler.set_frame_dtypes_cache(None)
            else:
                raise NotImplementedError(self_dtype)

            df["b"] = value

            if self_dtype == "materialized":
                result_dtype = pandas.Series(
                    [np.dtype("int64"), value_dtype, np.dtype("int64")],
                    index=["a", "b", "c"],
                )
                assert df._query_compiler.frame_has_materialized_dtypes
                assert df.dtypes.equals(result_dtype)
            elif self_dtype == "partial":
                result_dtype = DtypesDescriptor(
                    {"a": np.dtype("int64"), "b": value_dtype},
                    cols_with_unknown_dtypes=["c"],
                    columns_order={0: "a", 1: "b", 2: "c"},
                )
                df._query_compiler._modin_frame._dtypes._value.equals(result_dtype)
            elif self_dtype == "unknown":
                result_dtype = DtypesDescriptor(
                    {"b": value_dtype},
                    cols_with_unknown_dtypes=["a", "b"],
                    columns_order={0: "a", 1: "b", 2: "c"},
                )
                df._query_compiler._modin_frame._dtypes._value.equals(result_dtype)
            else:
                raise NotImplementedError(self_dtype)

        patch.assert_not_called()

    @pytest.mark.parametrize("self_dtype", ["materialized", "partial", "unknown"])
    @pytest.mark.parametrize(
        "value, value_dtype",
        [
            [3.5, np.dtype(float)],
            [[3.5, 2.4], np.dtype(float)],
            [np.array([3.5, 2.4]), np.dtype(float)],
            [pd.Series([3.5, 2.4]), np.dtype(float)],
        ],
    )
    def test_preserve_dtypes_insert(self, self_dtype, value, value_dtype):
        with mock.patch.object(PandasDataframe, "_compute_dtypes") as patch:
            df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
            if self_dtype == "materialized":
                assert df._query_compiler.frame_has_materialized_dtypes
            elif self_dtype == "partial":
                df._query_compiler.set_frame_dtypes_cache(
                    ModinDtypes(
                        DtypesDescriptor(
                            {"a": np.dtype("int64")}, cols_with_unknown_dtypes=["b"]
                        )
                    )
                )
            elif self_dtype == "unknown":
                df._query_compiler.set_frame_dtypes_cache(None)
            else:
                raise NotImplementedError(self_dtype)

            df.insert(loc=0, column="c", value=value)

            if self_dtype == "materialized":
                result_dtype = pandas.Series(
                    [value_dtype, np.dtype("int64"), np.dtype("int64")],
                    index=["c", "a", "b"],
                )
                assert df._query_compiler.frame_has_materialized_dtypes
                assert df.dtypes.equals(result_dtype)
            elif self_dtype == "partial":
                result_dtype = DtypesDescriptor(
                    {"a": np.dtype("int64"), "c": value_dtype},
                    cols_with_unknown_dtypes=["b"],
                    columns_order={0: "c", 1: "a", 2: "b"},
                )
                df._query_compiler._modin_frame._dtypes._value.equals(result_dtype)
            elif self_dtype == "unknown":
                result_dtype = DtypesDescriptor(
                    {"c": value_dtype},
                    cols_with_unknown_dtypes=["a", "b"],
                    columns_order={0: "c", 1: "a", 2: "b"},
                )
                df._query_compiler._modin_frame._dtypes._value.equals(result_dtype)
            else:
                raise NotImplementedError(self_dtype)

        patch.assert_not_called()

    def test_get_dummies_case(self):
        with mock.patch.object(PandasDataframe, "_compute_dtypes") as patch:
            df = pd.DataFrame(
                {"items": [1, 2, 3, 4], "b": [3, 3, 4, 4], "c": [1, 0, 0, 1]}
            )
            res = pd.get_dummies(df, columns=["b", "c"])
            cols = [col for col in res.columns if col != "items"]
            res[cols] = res[cols] / res[cols].mean()

            assert res._query_compiler.frame_has_materialized_dtypes

        patch.assert_not_called()

    @pytest.mark.parametrize("has_materialized_index", [True, False])
    @pytest.mark.parametrize("drop", [True, False])
    def test_preserve_dtypes_reset_index(self, drop, has_materialized_index):
        with mock.patch.object(PandasDataframe, "_compute_dtypes") as patch:
            # case 1: 'df' has complete dtype by default
            df = pd.DataFrame({"a": [1, 2, 3]})
            if has_materialized_index:
                assert df._query_compiler.frame_has_materialized_index
            else:
                df._query_compiler.set_frame_index_cache(None)
                assert not df._query_compiler.frame_has_materialized_index
            assert df._query_compiler.frame_has_materialized_dtypes

            res = df.reset_index(drop=drop)
            if drop:
                # we droped the index, so columns and dtypes shouldn't change
                assert res._query_compiler.frame_has_materialized_dtypes
                assert res.dtypes.equals(df.dtypes)
            else:
                if has_materialized_index:
                    # we should have inserted index dtype into the descriptor,
                    # and since both of them are materialized, the result should be
                    # materialized too
                    assert res._query_compiler.frame_has_materialized_dtypes
                    assert res.dtypes.equals(
                        pandas.Series(
                            [np.dtype("int64"), np.dtype("int64")], index=["index", "a"]
                        )
                    )
                else:
                    # we now know that there are cols with unknown name and dtype in our dataframe,
                    # so the resulting dtypes should contain information only about original column
                    expected_dtypes = DtypesDescriptor(
                        {"a": np.dtype("int64")},
                        know_all_names=False,
                    )
                    assert res._query_compiler._modin_frame._dtypes._value.equals(
                        expected_dtypes
                    )

            # case 2: 'df' has partial dtype by default
            df = pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
            df._query_compiler.set_frame_dtypes_cache(
                ModinDtypes(
                    DtypesDescriptor(
                        {"a": np.dtype("int64")}, cols_with_unknown_dtypes=["b"]
                    )
                )
            )
            if has_materialized_index:
                assert df._query_compiler.frame_has_materialized_index
            else:
                df._query_compiler.set_frame_index_cache(None)
                assert not df._query_compiler.frame_has_materialized_index

            res = df.reset_index(drop=drop)
            if drop:
                # we droped the index, so columns and dtypes shouldn't change
                assert res._query_compiler._modin_frame._dtypes._value.equals(
                    df._query_compiler._modin_frame._dtypes._value
                )
            else:
                if has_materialized_index:
                    # we should have inserted index dtype into the descriptor,
                    # the resulted dtype should have information about 'index' and 'a' columns,
                    # and miss dtype info for 'b' column
                    expected_dtypes = DtypesDescriptor(
                        {"index": np.dtype("int64"), "a": np.dtype("int64")},
                        cols_with_unknown_dtypes=["b"],
                        columns_order={0: "index", 1: "a", 2: "b"},
                    )
                    assert res._query_compiler._modin_frame._dtypes._value.equals(
                        expected_dtypes
                    )
                else:
                    # we miss info about the 'index' column since it wasn't materialized at
                    # the time of 'reset_index()' and we're still missing dtype info for 'b' column
                    expected_dtypes = DtypesDescriptor(
                        {"a": np.dtype("int64")},
                        cols_with_unknown_dtypes=["b"],
                        know_all_names=False,
                    )
                    assert res._query_compiler._modin_frame._dtypes._value.equals(
                        expected_dtypes
                    )

        patch.assert_not_called()

    def test_groupby_index_dtype(self):
        with mock.patch.object(PandasDataframe, "_compute_dtypes") as patch:
            # case 1: MapReduce impl, Series as an output of groupby
            df = pd.DataFrame({"a": [1, 2, 2], "b": [3, 4, 5]})
            res = df.groupby("a").size().reset_index(name="new_name")
            res_dtypes = res._query_compiler._modin_frame._dtypes._value
            assert "a" in res_dtypes._known_dtypes
            assert res_dtypes._known_dtypes["a"] == np.dtype("int64")

            # case 2: ExperimentalImpl impl, Series as an output of groupby
            RangePartitioning.put(True)
            try:
                df = pd.DataFrame({"a": [1, 2, 2], "b": [3, 4, 5]})
                res = df.groupby("a").size().reset_index(name="new_name")
                res_dtypes = res._query_compiler._modin_frame._dtypes._value
                assert "a" in res_dtypes._known_dtypes
                assert res_dtypes._known_dtypes["a"] == np.dtype("int64")
            finally:
                RangePartitioning.put(False)

            # case 3: MapReduce impl, DataFrame as an output of groupby
            df = pd.DataFrame({"a": [1, 2, 2], "b": [3, 4, 5]})
            res = df.groupby("a").sum().reset_index()
            res_dtypes = res._query_compiler._modin_frame._dtypes._value
            assert "a" in res_dtypes._known_dtypes
            assert res_dtypes._known_dtypes["a"] == np.dtype("int64")

            # case 4: ExperimentalImpl impl, DataFrame as an output of groupby
            RangePartitioning.put(True)
            try:
                df = pd.DataFrame({"a": [1, 2, 2], "b": [3, 4, 5]})
                res = df.groupby("a").sum().reset_index()
                res_dtypes = res._query_compiler._modin_frame._dtypes._value
                assert "a" in res_dtypes._known_dtypes
                assert res_dtypes._known_dtypes["a"] == np.dtype("int64")
            finally:
                RangePartitioning.put(False)

            # case 5: FullAxis impl, DataFrame as an output of groupby
            df = pd.DataFrame({"a": [1, 2, 2], "b": [3, 4, 5]})
            res = df.groupby("a").quantile().reset_index()
            res_dtypes = res._query_compiler._modin_frame._dtypes._value
            assert "a" in res_dtypes._known_dtypes
            assert res_dtypes._known_dtypes["a"] == np.dtype("int64")

        patch.assert_not_called()


@pytest.mark.skipif(Engine.get() != "Ray", reason="Ray specific")
@pytest.mark.parametrize("mode", [None, "Auto", "On", "Off"])
def test_ray_lazy_exec_mode(mode):
    import ray

    from modin.config import LazyExecution
    from modin.core.execution.ray.common.deferred_execution import DeferredExecution
    from modin.core.execution.ray.common.utils import ObjectIDType
    from modin.core.execution.ray.implementations.pandas_on_ray.partitioning import (
        PandasOnRayDataframePartition,
    )

    orig_mode = LazyExecution.get()
    try:
        if mode is None:
            mode = LazyExecution.get()
        else:
            LazyExecution.put(mode)
            assert mode == LazyExecution.get()

        df = pandas.DataFrame({"A": [1, 2, 3]})
        part = PandasOnRayDataframePartition(ray.put(df))

        def func(df):
            return len(df)

        ray_func = ray.put(func)

        if mode == "Auto":
            assert isinstance(part.apply(ray_func)._data_ref, ObjectIDType)
            assert isinstance(
                part.add_to_apply_calls(ray_func)._data_ref, DeferredExecution
            )
        elif mode == "On":
            assert isinstance(part.apply(ray_func)._data_ref, DeferredExecution)
            assert isinstance(
                part.add_to_apply_calls(ray_func)._data_ref, DeferredExecution
            )
        elif mode == "Off":
            assert isinstance(part.apply(ray_func)._data_ref, ObjectIDType)
            assert isinstance(part.add_to_apply_calls(ray_func)._data_ref, ObjectIDType)
        else:
            pytest.fail(f"Invalid value: {mode}")
    finally:
        LazyExecution.put(orig_mode)


@pytest.mark.skipif(Engine.get() != "Ray", reason="Ray specific")
def test_materialization_hook_serialization():
    @ray.remote(num_returns=1)
    def f1():
        return [1, 2, 3]

    @ray.remote(num_returns=1)
    def f2(i):
        return i

    hook = MetaList(f1.remote())[2]
    assert ray.get(f2.remote(hook)) == 3


def test_remote_function():
    def get_func():
        @remote_function
        def remote_func(arg):
            return arg

        return remote_func

    def get_capturing_func(arg):
        @remote_function
        def remote_func():
            return arg

        return remote_func

    if Engine.get() in ("Ray", "Unidist"):
        from modin.core.execution.utils import _remote_function_cache

        cache_len = len(_remote_function_cache)
        assert get_func() is get_func()
        assert get_func() in _remote_function_cache.values()
        assert get_capturing_func(1) not in _remote_function_cache.values()
        assert len(_remote_function_cache) == cache_len + 1

    assert materialize(deploy(get_func(), [123])) == 123
    assert get_capturing_func(1) is not get_capturing_func(2)
    assert (
        materialize(deploy(get_capturing_func(1)))
        + materialize(deploy(get_capturing_func(2)))
        == 3
    )


@pytest.mark.parametrize(
    "partitioning_scheme,expected_map_approach",
    [
        pytest.param(
            lambda df: {
                "row_lengths": [df.shape[0] // CpuCount.get()] * CpuCount.get(),
                "column_widths": [df.shape[1]],
            },
            "map_partitions",
            id="one_column_partition",
        ),
        pytest.param(
            lambda df: {
                "row_lengths": [df.shape[0] // (CpuCount.get() * 2)]
                * (CpuCount.get() * 2),
                "column_widths": [df.shape[1]],
            },
            "map_partitions_joined_by_column",
            id="very_long_column_partition",
        ),
        pytest.param(
            lambda df: {
                "row_lengths": [df.shape[0] // CpuCount.get()] * CpuCount.get(),
                "column_widths": [df.shape[1] // CpuCount.get()] * CpuCount.get(),
            },
            "map_axis_partitions",
            id="perfect_partitioning",
        ),
    ],
)
def test_dynamic_partitioning(partitioning_scheme, expected_map_approach):
    data_size = MinRowPartitionSize.get() * CpuCount.get()
    data = {f"col{i}": np.ones(data_size) for i in range(data_size)}
    df = pandas.DataFrame(data)

    modin_df = construct_modin_df_by_scheme(df, partitioning_scheme(df))
    partitions = modin_df._query_compiler._modin_frame._partitions
    partition_mgr_cls = modin_df._query_compiler._modin_frame._partition_mgr_cls

    with mock.patch.object(
        partition_mgr_cls,
        expected_map_approach,
        wraps=getattr(partition_mgr_cls, expected_map_approach),
    ) as expected_method:
        with context(DynamicPartitioning=True):
            partition_mgr_cls.map_partitions(partitions, lambda x: x * 2)
            expected_method.assert_called()


@pytest.mark.parametrize("npartitions", [7, CpuCount.get() * 2])
def test_map_partitions_joined_by_column(npartitions):
    with context(NPartitions=npartitions):
        ncols = MinColumnPartitionSize.get()
        nrows = MinRowPartitionSize.get() * CpuCount.get() * 2
        data = {f"col{i}": np.ones(nrows) for i in range(ncols)}
        df = pd.DataFrame(data)
        partitions = df._query_compiler._modin_frame._partitions
        partition_mgr_cls = df._query_compiler._modin_frame._partition_mgr_cls

        def map_func(df, first_arg, extra_arg=0):
            return df.map(lambda x: (x * first_arg) + extra_arg)

        column_splits = 2
        map_func_args = (2,)
        map_func_kwargs = {"extra_arg": 1}

        # this approach doesn't work if column_splits == 0
        with pytest.raises(ValueError):
            partition_mgr_cls.map_partitions_joined_by_column(
                partitions, 0, map_func, map_func_args, map_func_kwargs
            )

        result_partitions = partition_mgr_cls.map_partitions_joined_by_column(
            partitions,
            column_splits,
            map_func,
            map_func_args,
            map_func_kwargs,
        )
        assert (
            result_partitions.shape == partitions.shape
        ), "The result has a different split than the original."
        for i in range(result_partitions.shape[0]):
            assert np.all(
                result_partitions[i][0].to_numpy() == 3
            ), "Invalid map function result."


def test_fold_operator():
    new_index = list(range(500, 1000))
    new_columns = ["b"]

    initial_df = pandas.DataFrame({"a": range(0, 1000)})
    modin_df = pd.DataFrame(initial_df)
    expected_df = pandas.DataFrame(
        list(range(0, 1000, 2)), index=new_index, columns=new_columns
    )

    def filter_func(df):
        result = df[df.index % 2 == 0]
        result.index = new_index
        result.columns = new_columns
        return result

    PandasQueryCompiler.filter_func = Fold.register(filter_func)

    def filter_modin_dataframe1(df):
        return df.__constructor__(
            query_compiler=df._query_compiler.filter_func(
                fold_axis=0,
                new_index=new_index,
                new_columns=new_columns,
            )
        )

    pd.DataFrame.filter_dataframe1 = filter_modin_dataframe1

    filtered_df = modin_df.filter_dataframe1()

    df_equals(filtered_df, expected_df)

    def filter_modin_dataframe2(df):
        return df.__constructor__(
            query_compiler=df._query_compiler.filter_func(fold_axis=0)
        )

    pd.DataFrame.filter_dataframe2 = filter_modin_dataframe2

    filtered_df = modin_df.filter_dataframe2()

    df_equals(filtered_df, expected_df)


def test_default_property_warning_name():
    # Test that when a property defaults to pandas, the raised warning mentions the full name of
    # the pandas property rather than a hex address

    @property
    def _test_default_property(df):
        return "suspicious sentinel value"

    @property
    def qc_test_default_property(qc):
        return DataFrameDefault.register(_test_default_property)(qc)

    PandasQueryCompiler.qc_test_default_property = qc_test_default_property

    @property
    def dataframe_test_default_property(df):
        return df._query_compiler.qc_test_default_property

    pd.DataFrame.dataframe_test_default_property = dataframe_test_default_property

    with pytest.warns(
        UserWarning,
        match="<function DataFrame.<property fget:_test_default_property>> is not currently supported",
    ):
        pd.DataFrame([[1]]).dataframe_test_default_property


@pytest.mark.parametrize(
    "modify_config",
    [
        {Engine: "Ray"},
        {Engine: "Dask"},
    ],
    indirect=True,
)
def test_daemonic_worker_protection(modify_config):
    # Test for issue #7346, wherein some operations on Dask cause a second submission of a task to
    # the Dask client from the worker scope, which should not cause a new client to be created

    def submission_triggering_row_operation(row):
        row_to_dict = row.to_dict()
        dict_to_row = pd.Series(row_to_dict)
        return dict_to_row

    df = pd.DataFrame(
        {
            "A": ["a", "b", "c", "d"],
            "B": [1, 2, 3, 4],
            "C": [1, 2, 3, 4],
            "D": [1, 2, 3, 4],
        }
    )

    df.apply(submission_triggering_row_operation, axis=1)


================================================
FILE: modin/tests/core/test_dispatcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from contextlib import contextmanager

import pytest

import modin.pandas as pd
from modin.config import Backend, Engine, Execution, Parameter, StorageFormat
from modin.core.execution.dispatching.factories import factories
from modin.core.execution.dispatching.factories.dispatcher import (
    FactoryDispatcher,
    FactoryNotFoundError,
)
from modin.core.execution.python.implementations.pandas_on_python.io import (
    PandasOnPythonIO,
)
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
from modin.tests.pandas.utils import switch_execution


@contextmanager
def _switch_value(config: Parameter, value: str):
    old_value = config.get()
    try:
        yield config.put(value)
    finally:
        config.put(old_value)


class PandasOnTestFactory(factories.BaseFactory):
    """
    Stub factory to ensure we can switch execution engine to 'Test'
    """

    @classmethod
    def prepare(cls):
        """
        Fills in .io_cls class attribute lazily
        """
        cls.io_cls = "Foo"


class TestOnPythonFactory(factories.BaseFactory):
    """
    Stub factory to ensure we can switch partition format to 'Test'
    """

    @classmethod
    def prepare(cls):
        """
        Fills in .io_cls class attribute lazily
        """
        cls.io_cls = "Bar"


class FooOnBarFactory(factories.BaseFactory):
    """
    Stub factory to ensure we can switch engine and partition to 'Foo' and 'Bar'
    """

    @classmethod
    def prepare(cls):
        """
        Fills in .io_cls class attribute lazily
        """
        cls.io_cls = "Zug-zug"


# inject the stubs
factories.PandasOnTestFactory = PandasOnTestFactory
factories.TestOnPythonFactory = TestOnPythonFactory
factories.FooOnBarFactory = FooOnBarFactory

Backend.register_backend(
    "Test1",
    Execution(
        engine="Test",
        storage_format="Pandas",
    ),
)

Backend.register_backend(
    "Test2",
    Execution(
        engine="Python",
        storage_format="Test",
    ),
)
Backend.register_backend(
    "Test3",
    Execution(
        engine="Bar",
        storage_format="Foo",
    ),
)
Backend.register_backend(
    "Test4",
    Execution(
        engine="Dask",
        storage_format="Pyarrow",
    ),
)

# register them as known "no init" engines for modin.pandas
Engine.NOINIT_ENGINES |= {"Test", "Bar"}


def test_default_factory():
    assert issubclass(FactoryDispatcher.get_factory(), factories.BaseFactory)
    assert FactoryDispatcher.get_factory().io_cls


def test_factory_switch():
    with switch_execution("Python", "Pandas"):
        with _switch_value(Engine, "Test"):
            assert FactoryDispatcher.get_factory() == PandasOnTestFactory
            assert FactoryDispatcher.get_factory().io_cls == "Foo"

        with _switch_value(StorageFormat, "Test"):
            assert FactoryDispatcher.get_factory() == TestOnPythonFactory
            assert FactoryDispatcher.get_factory().io_cls == "Bar"


def test_engine_wrong_factory():
    with pytest.raises(FactoryNotFoundError):
        with _switch_value(Engine, "Dask"):
            with _switch_value(StorageFormat, "Pyarrow"):
                pass


def test_set_execution():
    with switch_execution("Bar", "Foo"):
        assert FactoryDispatcher.get_factory() == FooOnBarFactory


def test_add_option():
    class DifferentlyNamedFactory(factories.BaseFactory):
        @classmethod
        def prepare(cls):
            cls.io_cls = PandasOnPythonIO

    factories.StorageOnExecFactory = DifferentlyNamedFactory
    StorageFormat.add_option("sToragE")
    Engine.add_option("Exec")
    Backend.register_backend(
        name="Test5",
        execution=Execution(
            engine="Exec",
            storage_format="Storage",
        ),
    )

    with switch_execution("Exec", "Storage"):
        df = pd.DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]])
        assert isinstance(df._query_compiler, PandasQueryCompiler)


================================================
FILE: modin/tests/experimental/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/experimental/spreadsheet/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/experimental/spreadsheet/test_general.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pytest
from modin_spreadsheet import SpreadsheetWidget

import modin.experimental.spreadsheet as mss
import modin.pandas as pd


def get_test_data():
    return {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([5, 2, 3, 1], dtype="int32"),
        "E": pd.Categorical(["test", "train", "foo", "bar"]),
        "F": ["foo", "bar", "buzz", "fox"],
    }


def test_from_dataframe():
    data = get_test_data()
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    modin_result = mss.from_dataframe(modin_df)
    assert isinstance(modin_result, SpreadsheetWidget)

    with pytest.raises(TypeError):
        mss.from_dataframe(pandas_df)

    # Check parameters don't error
    def can_edit_row(row):
        return row["D"] > 2

    modin_result = mss.from_dataframe(
        modin_df,
        show_toolbar=True,
        show_history=True,
        precision=1,
        grid_options={"forceFitColumns": False, "filterable": False},
        column_options={"D": {"editable": True}},
        column_definitions={"editable": False},
        row_edit_callback=can_edit_row,
    )
    assert isinstance(modin_result, SpreadsheetWidget)


def test_to_dataframe():
    data = get_test_data()
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    spreadsheet = mss.from_dataframe(modin_df)
    modin_result = mss.to_dataframe(spreadsheet)

    assert modin_result.equals(modin_df)

    with pytest.raises(TypeError):
        mss.to_dataframe("Not a SpreadsheetWidget")
    with pytest.raises(TypeError):
        mss.to_dataframe(pandas_df)


================================================
FILE: modin/tests/experimental/test_fuzzydata.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import glob
import os
import shutil
import uuid

from fuzzydata.clients.modin import ModinWorkflow
from fuzzydata.core.generator import generate_workflow

from modin.config import Engine


def test_fuzzydata_sample_workflow():
    # Workflow Generation Options
    wf_name = str(uuid.uuid4())[:8]  # Unique name for the generated workflow
    num_versions = 10  # Number of unique CSV files to generate
    cols = 33  # Columns in Base Artifact
    rows = 1000  # Rows in Base Artifact
    bfactor = 1.0  # Branching Factor - 0.1 is linear, 10.0 is star-like
    exclude_ops = ["groupby"]  # In-Memory groupby operations cause issue #4287
    matfreq = 2  # How many operations to chain before materialization

    engine = Engine.get().lower()

    # Create Output Directory for Workflow Data
    base_out_directory = (
        f"/tmp/fuzzydata-test-wf-{engine}/"  # Must match corresponding github-action
    )
    if os.path.exists(base_out_directory):
        shutil.rmtree(base_out_directory)
    output_directory = f"{base_out_directory}/{wf_name}/"
    os.makedirs(output_directory, exist_ok=True)

    # Start Workflow Generation
    workflow = generate_workflow(
        workflow_class=ModinWorkflow,
        name=wf_name,
        num_versions=num_versions,
        base_shape=(cols, rows),
        out_directory=output_directory,
        bfactor=bfactor,
        exclude_ops=exclude_ops,
        matfreq=matfreq,
        wf_options={"modin_engine": engine},
    )

    # Assertions that the workflow generation worked correctly
    assert len(workflow) == num_versions
    assert len(list(glob.glob(f"{output_directory}/artifacts/*.csv"))) == len(
        workflow.artifact_dict
    )
    assert os.path.exists(f"{output_directory}/{workflow.name}_operations.json")
    assert os.path.getsize(f"{output_directory}/{workflow.name}_operations.json") > 0
    assert os.path.exists(f"{output_directory}/{workflow.name}_gt_graph.csv")


================================================
FILE: modin/tests/experimental/test_io_exp.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
import json
import platform
from pathlib import Path

import numpy as np
import pandas
import pytest
from pandas._testing import ensure_clean

import modin.experimental.pandas as pd
from modin.config import AsyncReadMode, Engine
from modin.tests.pandas.utils import (
    df_equals,
    eval_general,
    parse_dates_values_by_id,
    test_data,
    time_parsing_csv_path,
)
from modin.tests.test_utils import (
    current_execution_is_native,
    warns_that_defaulting_to_pandas_if,
)
from modin.utils import try_cast_to_pandas


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental API",
)
def test_from_sql_distributed(tmp_path, make_sql_connection):
    filename = "test_from_sql_distributed.db"
    table = "test_from_sql_distributed"
    conn = make_sql_connection(str(tmp_path / filename), table)
    query = "select * from {0}".format(table)

    pandas_df = pandas.read_sql(query, conn)
    modin_df_from_query = pd.read_sql(
        query,
        conn,
        partition_column="col1",
        lower_bound=0,
        upper_bound=6,
        max_sessions=2,
    )
    modin_df_from_table = pd.read_sql(
        table,
        conn,
        partition_column="col1",
        lower_bound=0,
        upper_bound=6,
        max_sessions=2,
    )

    df_equals(modin_df_from_query, pandas_df)
    df_equals(modin_df_from_table, pandas_df)


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental API",
)
def test_from_sql_defaults(tmp_path, make_sql_connection):
    filename = "test_from_sql_distributed.db"
    table = "test_from_sql_distributed"
    conn = make_sql_connection(str(tmp_path / filename), table)
    query = "select * from {0}".format(table)

    pandas_df = pandas.read_sql(query, conn)
    with pytest.warns(UserWarning):
        modin_df_from_query = pd.read_sql(query, conn)
    with pytest.warns(UserWarning):
        modin_df_from_table = pd.read_sql(table, conn)

    df_equals(modin_df_from_query, pandas_df)
    df_equals(modin_df_from_table, pandas_df)


@pytest.mark.usefixtures("TestReadGlobCSVFixture")
@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental glob API",
)
class TestCsvGlob:
    def test_read_multiple_small_csv(self):
        pandas_df = pandas.concat([pandas.read_csv(fname) for fname in pytest.files])
        modin_df = pd.read_csv_glob(pytest.glob_path)

        # Indexes get messed up when concatting so we reset both.
        pandas_df = pandas_df.reset_index(drop=True)
        modin_df = modin_df.reset_index(drop=True)

        df_equals(modin_df, pandas_df)

    @pytest.mark.parametrize("nrows", [35, 100])
    def test_read_multiple_csv_nrows(self, request, nrows):
        pandas_df = pandas.concat([pandas.read_csv(fname) for fname in pytest.files])
        pandas_df = pandas_df.iloc[:nrows, :]

        modin_df = pd.read_csv_glob(pytest.glob_path, nrows=nrows)

        # Indexes get messed up when concatting so we reset both.
        pandas_df = pandas_df.reset_index(drop=True)
        modin_df = modin_df.reset_index(drop=True)

        df_equals(modin_df, pandas_df)

    def test_read_csv_empty_frame(self):
        kwargs = {
            "usecols": [0],
            "index_col": 0,
        }

        modin_df = pd.read_csv_glob(pytest.files[0], **kwargs)
        pandas_df = pandas.read_csv(pytest.files[0], **kwargs)

        df_equals(modin_df, pandas_df)

    def test_read_csv_without_glob(self):
        with pytest.raises(FileNotFoundError):
            with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
                pd.read_csv_glob(
                    "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-",
                    storage_options={"anon": True},
                )

    def test_read_csv_glob_4373(self, tmp_path):
        columns, filename = ["col0"], str(tmp_path / "1x1.csv")
        df = pd.DataFrame([[1]], columns=columns)
        with warns_that_defaulting_to_pandas_if(df._query_compiler.engine == "Dask"):
            df.to_csv(filename)

        kwargs = {"filepath_or_buffer": filename, "usecols": columns}
        modin_df = pd.read_csv_glob(**kwargs)
        pandas_df = pandas.read_csv(**kwargs)
        df_equals(modin_df, pandas_df)

    @pytest.mark.parametrize(
        "parse_dates",
        [pytest.param(value, id=id) for id, value in parse_dates_values_by_id.items()],
    )
    def test_read_single_csv_with_parse_dates(self, parse_dates):
        try:
            pandas_df = pandas.read_csv(time_parsing_csv_path, parse_dates=parse_dates)
        except Exception as pandas_exception:
            with pytest.raises(Exception) as modin_exception:
                modin_df = pd.read_csv_glob(
                    time_parsing_csv_path, parse_dates=parse_dates
                )
                try_cast_to_pandas(modin_df)  # force materialization
            assert isinstance(
                modin_exception.value, type(pandas_exception)
            ), "Got Modin Exception type {}, but pandas Exception type {} was expected".format(
                type(modin_exception.value), type(pandas_exception)
            )
        else:
            modin_df = pd.read_csv_glob(time_parsing_csv_path, parse_dates=parse_dates)
            df_equals(modin_df, pandas_df)


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental glob API",
)
@pytest.mark.parametrize(
    "path",
    [
        "s3://modin-test/modin-bugs/multiple_csv/test_data*.csv",
    ],
)
def test_read_multiple_csv_cloud_store(path, s3_resource, s3_storage_options):
    def _pandas_read_csv_glob(path, storage_options):
        pandas_dfs = [
            pandas.read_csv(
                f"{path.lower().split('*')[0]}{i}.csv", storage_options=storage_options
            )
            for i in range(2)
        ]
        return pandas.concat(pandas_dfs).reset_index(drop=True)

    eval_general(
        pd,
        pandas,
        lambda module, **kwargs: (
            pd.read_csv_glob(path, **kwargs).reset_index(drop=True)
            if hasattr(module, "read_csv_glob")
            else _pandas_read_csv_glob(path, **kwargs)
        ),
        storage_options=s3_storage_options,
    )


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental API",
)
@pytest.mark.parametrize(
    "storage_options_extra",
    [{"anon": False}, {"anon": True}, {"key": "123", "secret": "123"}],
)
def test_read_multiple_csv_s3_storage_opts(
    s3_resource, s3_storage_options, storage_options_extra
):
    s3_path = "s3://modin-test/modin-bugs/multiple_csv/"

    def _pandas_read_csv_glob(path, storage_options):
        pandas_df = pandas.concat(
            [
                pandas.read_csv(
                    f"{s3_path}test_data{i}.csv",
                    storage_options=storage_options,
                )
                for i in range(2)
            ],
        ).reset_index(drop=True)
        return pandas_df

    expected_exception = None
    if "anon" in storage_options_extra:
        expected_exception = PermissionError("Forbidden")
    eval_general(
        pd,
        pandas,
        lambda module, **kwargs: (
            pd.read_csv_glob(s3_path, **kwargs)
            if hasattr(module, "read_csv_glob")
            else _pandas_read_csv_glob(s3_path, **kwargs)
        ),
        storage_options=s3_storage_options | storage_options_extra,
        expected_exception=expected_exception,
    )


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental API",
)
@pytest.mark.parametrize("pathlike", [False, True])
@pytest.mark.parametrize("compression", [None, "gzip"])
@pytest.mark.parametrize(
    "filename", ["test_default_to_pickle.pkl", "test_to_pickle*.pkl"]
)
@pytest.mark.parametrize("read_func", ["read_pickle_glob"])
@pytest.mark.parametrize("to_func", ["to_pickle_glob"])
def test_distributed_pickling(
    tmp_path, filename, compression, pathlike, read_func, to_func
):
    data = test_data["int_data"]
    df = pd.DataFrame(data)

    filename_param = filename
    if compression:
        filename = f"{filename}.gz"

    filename = Path(filename) if pathlike else filename

    with warns_that_defaulting_to_pandas_if(
        filename_param == "test_default_to_pickle.pkl"
    ):
        getattr(df.modin, to_func)(str(tmp_path / filename), compression=compression)
        pickled_df = getattr(pd, read_func)(
            str(tmp_path / filename), compression=compression
        )
    df_equals(pickled_df, df)


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental API",
)
@pytest.mark.parametrize(
    "filename",
    ["test_parquet_glob.parquet", "test_parquet_glob*.parquet"],
)
def test_parquet_glob(tmp_path, filename):
    data = test_data["int_data"]
    df = pd.DataFrame(data)

    filename_param = filename

    with warns_that_defaulting_to_pandas_if(
        filename_param == "test_parquet_glob.parquet"
    ):
        df.modin.to_parquet_glob(str(tmp_path / filename))
        read_df = pd.read_parquet_glob(str(tmp_path / filename))
    df_equals(read_df, df)


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental API",
)
@pytest.mark.parametrize(
    "filename",
    ["test_json_glob.json", "test_json_glob*.json"],
)
def test_json_glob(tmp_path, filename):
    data = test_data["int_data"]
    df = pd.DataFrame(data)

    filename_param = filename

    with warns_that_defaulting_to_pandas_if(filename_param == "test_json_glob.json"):
        df.modin.to_json_glob(str(tmp_path / filename))
        read_df = pd.read_json_glob(str(tmp_path / filename))
    df_equals(read_df, df)


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental API",
)
@pytest.mark.parametrize(
    "filename",
    ["test_xml_glob.xml", "test_xml_glob*.xml"],
)
@pytest.mark.skipif(
    platform.system() == "Windows",
    reason="https://github.com/modin-project/modin/issues/7497",
)
def test_xml_glob(tmp_path, filename):
    data = test_data["int_data"]
    df = pd.DataFrame(data)

    filename_param = filename

    with warns_that_defaulting_to_pandas_if(filename_param == "test_xml_glob.xml"):
        df.modin.to_xml_glob(str(tmp_path / filename), index=False)
        read_df = pd.read_xml_glob(str(tmp_path / filename))

    # Index get messed up when concatting so we reset it.
    read_df = read_df.reset_index(drop=True)
    df_equals(read_df, df)


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental read_custom_text API",
)
@pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True)
def test_read_custom_json_text(set_async_read_mode):
    def _generate_json(file_name, nrows, ncols):
        data = np.random.rand(nrows, ncols)
        df = pandas.DataFrame(data, columns=[f"col{x}" for x in range(ncols)])
        df.to_json(file_name, lines=True, orient="records")

    # Custom parser allows us to add some specifics to reading files,
    # which is not available through the ready-made API.
    # For example, the parser allows us to reduce the amount of RAM
    # required for reading by selecting a subset of columns.
    def _custom_parser(io_input, **kwargs):
        result = {"col0": [], "col1": [], "col3": []}
        for line in io_input:
            # for example, simjson can be used here
            obj = json.loads(line)
            for key in result:
                result[key].append(obj[key])
        return pandas.DataFrame(result).rename(columns={"col0": "testID"})

    with ensure_clean() as filename:
        _generate_json(filename, 64, 8)

        df1 = pd.read_custom_text(
            filename,
            columns=["testID", "col1", "col3"],
            custom_parser=_custom_parser,
            is_quoting=False,
        )
        df2 = pd.read_json(filename, lines=True)[["col0", "col1", "col3"]].rename(
            columns={"col0": "testID"}
        )
        if AsyncReadMode.get():
            # If read operations are asynchronous, then the dataframes
            # check should be inside `ensure_clean` context
            # because the file may be deleted before actual reading starts
            df_equals(df1, df2)
    if not AsyncReadMode.get():
        df_equals(df1, df2)


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason=f"{Engine.get()} does not have experimental API",
)
@pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True)
def test_read_evaluated_dict(set_async_read_mode):
    def _generate_evaluated_dict(file_name, nrows, ncols):
        result = {}
        keys = [f"col{x}" for x in range(ncols)]

        with open(file_name, mode="w") as _file:
            for i in range(nrows):
                data = np.random.rand(ncols)
                for idx, key in enumerate(keys):
                    result[key] = data[idx]
                _file.write(str(result))
                _file.write("\n")

    # This parser allows us to read a format not supported by other reading functions
    def _custom_parser(io_input, **kwargs):
        cat_list = []
        asin_list = []
        for line in io_input:
            obj = eval(line)
            cat_list.append(obj["col1"])
            asin_list.append(obj["col2"])
        return pandas.DataFrame({"col1": asin_list, "col2": cat_list})

    def columns_callback(io_input, **kwargs):
        columns = None
        for line in io_input:
            columns = list(eval(line).keys())[1:3]
            break
        return columns

    with ensure_clean() as filename:
        _generate_evaluated_dict(filename, 64, 8)

        df1 = pd.read_custom_text(
            filename,
            columns=["col1", "col2"],
            custom_parser=_custom_parser,
        )
        assert df1.shape == (64, 2)

        df2 = pd.read_custom_text(
            filename, columns=columns_callback, custom_parser=_custom_parser
        )
        if AsyncReadMode.get():
            # If read operations are asynchronous, then the dataframes
            # check should be inside `ensure_clean` context
            # because the file may be deleted before actual reading starts
            df_equals(df1, df2)
    if not AsyncReadMode.get():
        df_equals(df1, df2)


================================================
FILE: modin/tests/experimental/test_pipeline.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import Engine, NPartitions
from modin.core.execution.ray.common import RayWrapper
from modin.distributed.dataframe.pandas.partitions import from_partitions
from modin.experimental.batch.pipeline import PandasQueryPipeline
from modin.tests.pandas.utils import df_equals


@pytest.mark.skipif(
    Engine.get() != "Ray",
    reason="Only Ray supports the Batch Pipeline API",
)
class TestPipelineRayEngine:
    def test_warnings(self):
        """Ensure that creating a Pipeline object raises the correct warnings."""
        arr = np.random.randint(0, 1000, (1000, 1000))
        df = pd.DataFrame(arr)
        # Ensure that building a pipeline warns users that it is an experimental feature
        with pytest.warns(
            UserWarning,
            match="The Batch Pipeline API is an experimental feature and still under development in Modin.",
        ):
            pipeline = PandasQueryPipeline(df)
        with pytest.warns(
            UserWarning,
            match="No outputs to compute. Returning an empty list. Please specify outputs by calling `add_query` with `is_output=True`.",
        ):
            output = pipeline.compute_batch()
        assert output == [], "Empty pipeline did not return an empty list."

    def test_pipeline_simple(self):
        """Create a simple pipeline and ensure that it runs end to end correctly."""
        arr = np.random.randint(0, 1000, (1000, 1000))
        df = pd.DataFrame(arr)

        def add_col(df):
            df["new_col"] = df.sum(axis=1)
            return df

        # Build pipeline
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(add_col)
        pipeline.add_query(lambda df: df * -30)
        pipeline.add_query(
            lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)})
        )

        def add_row_to_partition(df):
            return pandas.concat([df, df.iloc[[-1]]])

        pipeline.add_query(add_row_to_partition, is_output=True)
        new_df = pipeline.compute_batch()[0]
        # Build df without pipelining to ensure correctness
        correct_df = add_col(pd.DataFrame(arr))
        correct_df *= -30
        correct_df = pd.DataFrame(
            correct_df.rename(columns={i: f"col {i}" for i in range(1000)})._to_pandas()
        )
        correct_modin_frame = correct_df._query_compiler._modin_frame
        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(
            correct_modin_frame._partitions
        )
        partitions = [
            partition.add_to_apply_calls(add_row_to_partition)
            for partition in partitions
        ]
        [partition.drain_call_queue() for partition in partitions]
        partitions = [partition.list_of_blocks for partition in partitions]
        correct_df = from_partitions(partitions, axis=None)
        # Compare pipelined and non-pipelined df
        df_equals(correct_df, new_df)
        # Ensure that setting `num_partitions` when creating a pipeline does not change `NPartitions`
        num_partitions = NPartitions.get()
        PandasQueryPipeline(df, num_partitions=(num_partitions - 1))
        assert (
            NPartitions.get() == num_partitions
        ), "Pipeline did not change NPartitions.get()"

    def test_update_df(self):
        """Ensure that `update_df` updates the df that the pipeline runs on."""
        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df + 3, is_output=True)
        new_df = df * -1
        pipeline.update_df(new_df)
        output_df = pipeline.compute_batch()[0]
        df_equals((df * -1) + 3, output_df)

    def test_multiple_outputs(self):
        """Create a pipeline with multiple outputs, and check that all are computed correctly."""
        arr = np.random.randint(0, 1000, (1000, 1000))
        df = pd.DataFrame(arr)
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df * -30, is_output=True)
        pipeline.add_query(
            lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
            is_output=True,
        )
        pipeline.add_query(lambda df: df + 30, is_output=True)
        new_dfs = pipeline.compute_batch()
        assert len(new_dfs) == 3, "Pipeline did not return all outputs"
        correct_df = pd.DataFrame(arr) * -30
        df_equals(correct_df, new_dfs[0])  # First output computed correctly
        correct_df = correct_df.rename(columns={i: f"col {i}" for i in range(1000)})
        df_equals(correct_df, new_dfs[1])  # Second output computed correctly
        correct_df += 30
        df_equals(correct_df, new_dfs[2])  # Third output computed correctly

    def test_output_id(self):
        """Ensure `output_id` is handled correctly when passed."""
        arr = np.random.randint(0, 1000, (1000, 1000))
        df = pd.DataFrame(arr)
        pipeline = PandasQueryPipeline(df, 0)
        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)
        with pytest.raises(
            ValueError, match="Output ID must be specified for all nodes."
        ):
            pipeline.add_query(
                lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
                is_output=True,
            )
        assert (
            len(pipeline.query_list) == 0 and len(pipeline.outputs) == 1
        ), "Invalid `add_query` incorrectly added a node to the pipeline."
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df * -30, is_output=True)
        with pytest.raises(
            ValueError, match="Output ID must be specified for all nodes."
        ):
            pipeline.add_query(
                lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
                is_output=True,
                output_id=20,
            )
        assert (
            len(pipeline.query_list) == 0 and len(pipeline.outputs) == 1
        ), "Invalid `add_query` incorrectly added a node to the pipeline."
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df, is_output=True)
        with pytest.raises(
            ValueError,
            match=(
                "`pass_output_id` is set to True, but output ids have not been specified. "
                + "To pass output ids, please specify them using the `output_id` kwarg with pipeline.add_query"
            ),
        ):
            pipeline.compute_batch(postprocessor=lambda df: df, pass_output_id=True)
        with pytest.raises(
            ValueError,
            match="Output ID cannot be specified for non-output node.",
        ):
            pipeline.add_query(lambda df: df, output_id=22)
        assert (
            len(pipeline.query_list) == 0 and len(pipeline.outputs) == 1
        ), "Invalid `add_query` incorrectly added a node to the pipeline."

    def test_output_id_multiple_outputs(self):
        """Ensure `output_id` is handled correctly when multiple outputs are computed."""
        arr = np.random.randint(0, 1000, (1000, 1000))
        df = pd.DataFrame(arr)
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)
        pipeline.add_query(
            lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
            is_output=True,
            output_id=21,
        )
        pipeline.add_query(lambda df: df + 30, is_output=True, output_id=22)
        new_dfs = pipeline.compute_batch()
        assert isinstance(
            new_dfs, dict
        ), "Pipeline did not return a dictionary mapping output_ids to dfs"
        assert 20 in new_dfs, "Output ID 1 not cached correctly"
        assert 21 in new_dfs, "Output ID 2 not cached correctly"
        assert 22 in new_dfs, "Output ID 3 not cached correctly"
        assert len(new_dfs) == 3, "Pipeline did not return all outputs"
        correct_df = pd.DataFrame(arr) * -30
        df_equals(correct_df, new_dfs[20])  # First output computed correctly
        correct_df = correct_df.rename(columns={i: f"col {i}" for i in range(1000)})
        df_equals(correct_df, new_dfs[21])  # Second output computed correctly
        correct_df += 30
        df_equals(correct_df, new_dfs[22])  # Third output computed correctly

    def test_postprocessing(self):
        """Check that the `postprocessor` argument to `_compute_batch` is handled correctly."""
        arr = np.random.randint(0, 1000, (1000, 1000))
        df = pd.DataFrame(arr)
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df * -30, is_output=True)
        pipeline.add_query(
            lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
            is_output=True,
        )
        pipeline.add_query(lambda df: df + 30, is_output=True)

        def new_col_adder(df):
            df["new_col"] = df.iloc[:, -1]
            return df

        new_dfs = pipeline.compute_batch(postprocessor=new_col_adder)
        assert len(new_dfs) == 3, "Pipeline did not return all outputs"
        correct_df = pd.DataFrame(arr) * -30
        correct_df["new_col"] = correct_df.iloc[:, -1]
        df_equals(correct_df, new_dfs[0])
        correct_df = correct_df.drop(columns=["new_col"])
        correct_df = correct_df.rename(columns={i: f"col {i}" for i in range(1000)})
        correct_df["new_col"] = correct_df.iloc[:, -1]
        df_equals(correct_df, new_dfs[1])
        correct_df = correct_df.drop(columns=["new_col"])
        correct_df += 30
        correct_df["new_col"] = correct_df.iloc[:, -1]
        df_equals(correct_df, new_dfs[2])

    def test_postprocessing_with_output_id(self):
        """Check that the `postprocessor` argument is correctly handled when `output_id` is specified."""

        def new_col_adder(df):
            df["new_col"] = df.iloc[:, -1]
            return df

        arr = np.random.randint(0, 1000, (1000, 1000))
        df = pd.DataFrame(arr)
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)
        pipeline.add_query(
            lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
            is_output=True,
            output_id=21,
        )
        pipeline.add_query(lambda df: df + 30, is_output=True, output_id=22)
        new_dfs = pipeline.compute_batch(postprocessor=new_col_adder)
        assert len(new_dfs) == 3, "Pipeline did not return all outputs"

    def test_postprocessing_with_output_id_passed(self):
        """Check that the `postprocessor` argument is correctly passed `output_id` when `pass_output_id` is `True`."""
        arr = np.random.randint(0, 1000, (1000, 1000))

        def new_col_adder(df, o_id):
            df["new_col"] = o_id
            return df

        df = pd.DataFrame(arr)
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)
        pipeline.add_query(
            lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
            is_output=True,
            output_id=21,
        )
        pipeline.add_query(lambda df: df + 30, is_output=True, output_id=22)
        new_dfs = pipeline.compute_batch(
            postprocessor=new_col_adder, pass_output_id=True
        )
        correct_df = pd.DataFrame(arr) * -30
        correct_df["new_col"] = 20
        df_equals(correct_df, new_dfs[20])
        correct_df = correct_df.drop(columns=["new_col"])
        correct_df = correct_df.rename(columns={i: f"col {i}" for i in range(1000)})
        correct_df["new_col"] = 21
        df_equals(correct_df, new_dfs[21])
        correct_df = correct_df.drop(columns=["new_col"])
        correct_df += 30
        correct_df["new_col"] = 22
        df_equals(correct_df, new_dfs[22])

    def test_postprocessing_with_partition_id(self):
        """Check that the postprocessing is correctly handled when `partition_id` is passed."""
        arr = np.random.randint(0, 1000, (1000, 1000))

        def new_col_adder(df, partition_id):
            df["new_col"] = partition_id
            return df

        df = pd.DataFrame(arr)
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)
        pipeline.add_query(
            lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
            is_output=True,
            output_id=21,
        )
        new_dfs = pipeline.compute_batch(
            postprocessor=new_col_adder, pass_partition_id=True
        )
        correct_df = pd.DataFrame(arr) * -30
        correct_modin_frame = correct_df._query_compiler._modin_frame
        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(
            correct_modin_frame._partitions
        )
        partitions = [
            partition.add_to_apply_calls(new_col_adder, i)
            for i, partition in enumerate(partitions)
        ]
        [partition.drain_call_queue() for partition in partitions]
        partitions = [partition.list_of_blocks for partition in partitions]
        correct_df = from_partitions(partitions, axis=None)
        df_equals(correct_df, new_dfs[20])
        correct_df = correct_df.drop(columns=["new_col"])
        correct_df = pd.DataFrame(
            correct_df.rename(columns={i: f"col {i}" for i in range(1000)})._to_pandas()
        )
        correct_modin_frame = correct_df._query_compiler._modin_frame
        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(
            correct_modin_frame._partitions
        )
        partitions = [
            partition.add_to_apply_calls(new_col_adder, i)
            for i, partition in enumerate(partitions)
        ]
        [partition.drain_call_queue() for partition in partitions]
        partitions = [partition.list_of_blocks for partition in partitions]
        correct_df = from_partitions(partitions, axis=None)
        df_equals(correct_df, new_dfs[21])

    def test_postprocessing_with_all_metadata(self):
        """Check that postprocessing is correctly handled when `partition_id` and `output_id` are passed."""
        arr = np.random.randint(0, 1000, (1000, 1000))

        def new_col_adder(df, o_id, partition_id):
            df["new_col"] = f"{o_id} {partition_id}"
            return df

        df = pd.DataFrame(arr)
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)
        pipeline.add_query(
            lambda df: df.rename(columns={i: f"col {i}" for i in range(1000)}),
            is_output=True,
            output_id=21,
        )
        new_dfs = pipeline.compute_batch(
            postprocessor=new_col_adder, pass_partition_id=True, pass_output_id=True
        )
        correct_df = pd.DataFrame(arr) * -30
        correct_modin_frame = correct_df._query_compiler._modin_frame
        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(
            correct_modin_frame._partitions
        )
        partitions = [
            partition.add_to_apply_calls(new_col_adder, 20, i)
            for i, partition in enumerate(partitions)
        ]
        [partition.drain_call_queue() for partition in partitions]
        partitions = [partition.list_of_blocks for partition in partitions]
        correct_df = from_partitions(partitions, axis=None)
        df_equals(correct_df, new_dfs[20])
        correct_df = correct_df.drop(columns=["new_col"])
        correct_df = pd.DataFrame(
            correct_df.rename(columns={i: f"col {i}" for i in range(1000)})._to_pandas()
        )
        correct_modin_frame = correct_df._query_compiler._modin_frame
        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(
            correct_modin_frame._partitions
        )
        partitions = [
            partition.add_to_apply_calls(new_col_adder, 21, i)
            for i, partition in enumerate(partitions)
        ]
        [partition.drain_call_queue() for partition in partitions]
        partitions = [partition.list_of_blocks for partition in partitions]
        correct_df = from_partitions(partitions, axis=None)
        df_equals(correct_df, new_dfs[21])

    def test_repartition_after(self):
        """Check that the `repartition_after` argument is appropriately handled."""
        df = pd.DataFrame([list(range(1000))])
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(
            lambda df: pandas.concat([df] * 1000), repartition_after=True
        )

        def new_col_adder(df, partition_id):
            df["new_col"] = partition_id
            return df

        pipeline.add_query(new_col_adder, is_output=True, pass_partition_id=True)
        new_dfs = pipeline.compute_batch()
        # new_col_adder should set `new_col` to the partition ID
        # throughout the dataframe. We expect there to be
        # NPartitions.get() partitions by the time new_col_adder runs,
        # because the previous step has repartitioned.
        assert len(new_dfs[0]["new_col"].unique()) == NPartitions.get()
        # Test that `repartition_after=True` raises an error when the result has more than
        # one partition.
        partition1 = RayWrapper.put(pandas.DataFrame([[0, 1, 2]]))
        partition2 = RayWrapper.put(pandas.DataFrame([[3, 4, 5]]))
        df = from_partitions([partition1, partition2], 0)
        pipeline = PandasQueryPipeline(df, 0)
        pipeline.add_query(lambda df: df, repartition_after=True, is_output=True)

        with pytest.raises(
            NotImplementedError,
            match="Dynamic repartitioning is currently only supported for DataFrames with 1 partition.",
        ):
            pipeline.compute_batch()

    def test_fan_out(self):
        """Check that the fan_out argument is appropriately handled."""
        df = pd.DataFrame([[0, 1, 2]])

        def new_col_adder(df, partition_id):
            df["new_col"] = partition_id
            return df

        def reducer(dfs):
            new_cols = "".join([str(df["new_col"].values[0]) for df in dfs])
            dfs[0]["new_col1"] = new_cols
            return dfs[0]

        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(
            new_col_adder,
            fan_out=True,
            reduce_fn=reducer,
            pass_partition_id=True,
            is_output=True,
        )
        new_df = pipeline.compute_batch()[0]
        correct_df = pd.DataFrame([[0, 1, 2]])
        correct_df["new_col"] = 0
        correct_df["new_col1"] = "".join([str(i) for i in range(NPartitions.get())])
        df_equals(correct_df, new_df)
        # Test that `fan_out=True` raises an error when the input has more than
        # one partition.
        partition1 = RayWrapper.put(pandas.DataFrame([[0, 1, 2]]))
        partition2 = RayWrapper.put(pandas.DataFrame([[3, 4, 5]]))
        df = from_partitions([partition1, partition2], 0)
        pipeline = PandasQueryPipeline(df)
        pipeline.add_query(
            new_col_adder,
            fan_out=True,
            reduce_fn=reducer,
            pass_partition_id=True,
            is_output=True,
        )
        with pytest.raises(
            NotImplementedError,
            match="Fan out is only supported with DataFrames with 1 partition.",
        ):
            pipeline.compute_batch()[0]

    def test_pipeline_complex(self):
        """Create a complex pipeline with both `fan_out`, `repartition_after` and postprocessing and ensure that it runs end to end correctly."""
        from os import remove
        from os.path import exists
        from time import sleep

        df = pd.DataFrame([[0, 1, 2]])

        def new_col_adder(df, partition_id):
            sleep(60)
            df["new_col"] = partition_id
            return df

        def reducer(dfs):
            new_cols = "".join([str(df["new_col"].values[0]) for df in dfs])
            dfs[0]["new_col1"] = new_cols
            return dfs[0]

        desired_num_partitions = 24
        pipeline = PandasQueryPipeline(df, num_partitions=desired_num_partitions)
        pipeline.add_query(
            new_col_adder,
            fan_out=True,
            reduce_fn=reducer,
            pass_partition_id=True,
            is_output=True,
            output_id=20,
        )
        pipeline.add_query(
            lambda df: pandas.concat([df] * 1000),
            repartition_after=True,
        )

        def to_csv(df, partition_id):
            df = df.drop(columns=["new_col"])
            df.to_csv(f"{partition_id}.csv")
            return df

        pipeline.add_query(to_csv, is_output=True, output_id=21, pass_partition_id=True)

        def post_proc(df, o_id, partition_id):
            df["new_col_proc"] = f"{o_id} {partition_id}"
            return df

        new_dfs = pipeline.compute_batch(
            postprocessor=post_proc,
            pass_partition_id=True,
            pass_output_id=True,
        )
        correct_df = pd.DataFrame([[0, 1, 2]])
        correct_df["new_col"] = 0
        correct_df["new_col1"] = "".join(
            [str(i) for i in range(desired_num_partitions)]
        )
        correct_df["new_col_proc"] = "20 0"
        df_equals(correct_df, new_dfs[20])
        correct_df = pd.concat([correct_df] * 1000)
        correct_df = correct_df.drop(columns=["new_col"])
        correct_df["new_col_proc"] = "21 0"
        new_length = len(correct_df.index) // desired_num_partitions
        for i in range(desired_num_partitions):
            if i == desired_num_partitions - 1:
                correct_df.iloc[i * new_length :, -1] = f"21 {i}"
            else:
                correct_df.iloc[i * new_length : (i + 1) * new_length, -1] = f"21 {i}"
        df_equals(correct_df, new_dfs[21])
        correct_df = correct_df.drop(columns=["new_col_proc"])
        for i in range(desired_num_partitions):
            if i == desired_num_partitions - 1:
                correct_partition = correct_df.iloc[i * new_length :]
            else:
                correct_partition = correct_df.iloc[
                    i * new_length : (i + 1) * new_length
                ]
            assert exists(
                f"{i}.csv"
            ), "CSV File for Partition {i} does not exist, even though dataframe should have been repartitioned."
            df_equals(
                correct_partition,
                pd.read_csv(f"{i}.csv", index_col="Unnamed: 0").rename(
                    columns={"0": 0, "1": 1, "2": 2}
                ),
            )
            remove(f"{i}.csv")


@pytest.mark.skipif(
    Engine.get() == "Ray",
    reason="Ray supports the Batch Pipeline API",
)
def test_pipeline_unsupported_engine():
    """Ensure that trying to use the Pipeline API with an unsupported Engine raises errors."""
    # Check that pipeline does not allow `Engine` to not be Ray.
    df = pd.DataFrame([[1]])
    with pytest.raises(
        NotImplementedError,
        match="Batch Pipeline API is only implemented for `PandasOnRay` execution.",
    ):
        PandasQueryPipeline(df)

    eng = Engine.get()
    Engine.put("Ray")
    # Check that even if Engine is Ray, if the df is not backed by Ray, the Pipeline does not allow initialization.
    with pytest.raises(
        NotImplementedError,
        match="Batch Pipeline API is only implemented for `PandasOnRay` execution.",
    ):
        PandasQueryPipeline(df, 0)
    df_on_ray_engine = pd.DataFrame([[1]])
    pipeline = PandasQueryPipeline(df_on_ray_engine)
    # Check that even if Engine is Ray, if the new df is not backed by Ray, the Pipeline does not allow an update.
    with pytest.raises(
        NotImplementedError,
        match="Batch Pipeline API is only implemented for `PandasOnRay` execution.",
    ):
        pipeline.update_df(df)
    Engine.put(eng)
    # Check that pipeline does not allow an update when `Engine` is not Ray.
    with pytest.raises(
        NotImplementedError,
        match="Batch Pipeline API is only implemented for `PandasOnRay` execution.",
    ):
        pipeline.update_df(df)


================================================
FILE: modin/tests/experimental/torch/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/experimental/torch/test_dataloader.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
from __future__ import annotations

from types import ModuleType
from typing import Type

import numpy as np
import pandas
import pytest
import ray
import torch
from torch.utils.data import RandomSampler, Sampler, SequentialSampler

import modin.pandas as pd
from modin.experimental.torch.datasets import ModinDataLoader


@pytest.fixture(scope="module", autouse=True)
def ray_fix():
    ray.init(num_cpus=1)
    yield None
    ray.shutdown()


def _load_test_dataframe(lib: ModuleType):
    df = lib.read_csv(
        "https://raw.githubusercontent.com/ponder-org/ponder-datasets/main/USA_Housing.csv"
    )
    return df


@pytest.mark.parametrize("lib", [pandas, pd])
@pytest.mark.parametrize("sampler_cls", [RandomSampler, SequentialSampler])
@pytest.mark.parametrize("batch_size", [16, 37])
def test_torch_dataloader(lib: ModuleType, sampler_cls: Type[Sampler], batch_size: int):
    df = _load_test_dataframe(lib)
    np.random.seed(42)
    torch.manual_seed(42)
    loader = ModinDataLoader(
        df,
        batch_size=batch_size,
        features=[
            "AVG_AREA_INCOME",
            "AVG_AREA_HOUSE_AGE",
            "AVG_AREA_NUM_ROOMS",
            "AVG_AREA_NUM_BEDROOMS",
            "POPULATION",
            "PRICE",
        ],
        sampler=sampler_cls,
    )

    outputs = []
    for batch in loader:
        assert batch.shape[0] <= batch_size, batch.shape
        assert batch.shape[1] == 6, batch.shape

        outputs.append(batch)

    return outputs


@pytest.mark.parametrize("sampler_cls", [RandomSampler, SequentialSampler])
@pytest.mark.parametrize("batch_size", [16, 37])
def test_compare_dataloaders(sampler_cls: Type[Sampler], batch_size: int):
    by_modin = test_torch_dataloader(pd, sampler_cls, batch_size=batch_size)
    by_pandas = test_torch_dataloader(pandas, sampler_cls, batch_size=batch_size)

    assert len(by_modin) == len(by_pandas)
    for tensor_by_modin, tensor_by_pandas in zip(by_modin, by_pandas):
        assert np.allclose(tensor_by_modin, tensor_by_pandas), (
            tensor_by_modin - tensor_by_pandas
        )


================================================
FILE: modin/tests/experimental/xgboost/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/experimental/xgboost/test_default.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


import pytest

import modin.experimental.xgboost as xgb
import modin.pandas as pd
from modin.config import Engine


@pytest.mark.skipif(
    Engine.get() == "Ray",
    reason="This test doesn't make sense on Ray engine.",
)
@pytest.mark.skipif(
    Engine.get() == "Python",
    reason="This test doesn't make sense on non-distributed engine (see issue #2938).",
)
def test_engine():
    try:
        xgb.train({}, xgb.DMatrix(pd.DataFrame([0]), pd.DataFrame([0])))
    except ValueError:
        pass


================================================
FILE: modin/tests/experimental/xgboost/test_dmatrix.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pytest
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score

import modin.experimental.xgboost as mxgb
import modin.pandas as pd
from modin.config import Engine
from modin.utils import try_cast_to_pandas

if Engine.get() != "Ray":
    pytest.skip(
        "Modin' xgboost extension works only with Ray engine.",
        allow_module_level=True,
    )


rng = np.random.RandomState(1994)


def check_dmatrix(data, label=None, **kwargs):
    modin_data = pd.DataFrame(data)
    modin_label = label if label is None else pd.Series(label)
    try:
        dm = xgb.DMatrix(data, label=label, **kwargs)
    except Exception as xgb_exception:
        with pytest.raises(Exception) as mxgb_exception:
            mxgb.DMatrix(modin_data, label=modin_label, **kwargs)
        # Thrown exceptions are `XGBoostError`, which is a descendant of `ValueError`, and `ValueError`
        # for XGBoost and Modin, respectively,  so we intentionally use `xgb_exception`
        # as a first parameter of `isinstance` to pass the assertion
        assert isinstance(
            xgb_exception, type(mxgb_exception.value)
        ), "Got Modin Exception type {}, but xgboost Exception type {} was expected".format(
            type(mxgb_exception.value), type(xgb_exception)
        )
    else:
        md_dm = mxgb.DMatrix(modin_data, label=modin_label, **kwargs)
        assert md_dm.num_row() == dm.num_row()
        assert md_dm.num_col() == dm.num_col()
        assert md_dm.feature_names == dm.feature_names
        assert md_dm.feature_types == dm.feature_types


@pytest.mark.parametrize(
    "data",
    [
        np.random.randn(5, 5),
        np.array([[1, 2], [3, 4]]),
        np.array([["a", "b"], ["c", "d"]]),
        [[1, 2], [3, 4]],
        [["a", "b"], ["c", "d"]],
    ],
)
@pytest.mark.parametrize(
    "feature_names",
    [
        list("abcdef"),
        ["a", "b", "c", "d", "d"],
        ["a", "b", "c", "d", "e<1"],
        list("abcde"),
    ],
)
@pytest.mark.parametrize(
    "feature_types",
    [None, "q", list("qiqiq")],
)
def test_dmatrix_feature_names_and_feature_types(data, feature_names, feature_types):
    check_dmatrix(data, feature_names=feature_names, feature_types=feature_types)


@pytest.mark.skipif(
    Engine.get() != "Ray",
    reason="implemented only for Ray engine.",
)
def test_feature_names():
    dataset = load_breast_cancer()
    X = dataset.data
    y = dataset.target
    feature_names = [f"feat{i}" for i in range(X.shape[1])]

    check_dmatrix(
        X,
        y,
        feature_names=feature_names,
    )

    dmatrix = xgb.DMatrix(X, label=y, feature_names=feature_names)
    md_dmatrix = mxgb.DMatrix(
        pd.DataFrame(X), label=pd.Series(y), feature_names=feature_names
    )

    params = {
        "objective": "binary:logistic",
        "eval_metric": "mlogloss",
    }

    booster = xgb.train(params, dmatrix, num_boost_round=10)
    md_booster = mxgb.train(params, md_dmatrix, num_boost_round=10)

    predictions = booster.predict(dmatrix)
    modin_predictions = md_booster.predict(md_dmatrix)

    preds = pandas.DataFrame(predictions).apply(np.round, axis=0)
    modin_preds = modin_predictions.apply(np.round, axis=0)

    accuracy = accuracy_score(y, preds)
    md_accuracy = accuracy_score(y, modin_preds)

    np.testing.assert_allclose(accuracy, md_accuracy, atol=0.005, rtol=0.002)

    # Different feature_names (default) must raise error in this case
    dm = xgb.DMatrix(X)
    md_dm = mxgb.DMatrix(pd.DataFrame(X))
    with pytest.raises(ValueError):
        booster.predict(dm)
    with pytest.raises(ValueError):
        try_cast_to_pandas(md_booster.predict(md_dm))  # force materialization


def test_feature_weights():
    n_rows = 10
    n_cols = 50
    fw = rng.uniform(size=n_cols)
    X = rng.randn(n_rows, n_cols)
    dm = xgb.DMatrix(X)
    md_dm = mxgb.DMatrix(pd.DataFrame(X))
    dm.set_info(feature_weights=fw)
    md_dm.set_info(feature_weights=fw)
    np.testing.assert_allclose(
        dm.get_float_info("feature_weights"), md_dm.get_float_info("feature_weights")
    )
    # Handle empty
    dm.set_info(feature_weights=np.empty((0,)))
    md_dm.set_info(feature_weights=np.empty((0,)))

    assert (
        dm.get_float_info("feature_weights").shape[0]
        == md_dm.get_float_info("feature_weights").shape[0]
        == 0
    )


================================================
FILE: modin/tests/experimental/xgboost/test_xgboost.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


import multiprocessing as mp

import numpy as np
import pytest
import ray
import xgboost
from sklearn.datasets import (
    load_breast_cancer,
    load_diabetes,
    load_digits,
    load_iris,
    load_wine,
)
from sklearn.metrics import accuracy_score, mean_squared_error

import modin
import modin.experimental.xgboost as xgb
import modin.pandas as pd
from modin.config import Engine
from modin.experimental.sklearn.model_selection.train_test_split import train_test_split

if Engine.get() != "Ray":
    pytest.skip("Implemented only for Ray engine.", allow_module_level=True)

ray.init(log_to_driver=False)

num_cpus = mp.cpu_count()


@pytest.mark.parametrize(
    "modin_type_y",
    [pd.DataFrame, pd.Series],
)
@pytest.mark.parametrize(
    "num_actors",
    [1, num_cpus, None, modin.config.NPartitions.get() + 1],
)
@pytest.mark.parametrize(
    "data",
    [
        (
            load_breast_cancer(),
            {"objective": "binary:logistic", "eval_metric": ["logloss", "error"]},
        ),
    ],
    ids=["load_breast_cancer"],
)
def test_xgb_with_binary_classification_datasets(data, num_actors, modin_type_y):
    dataset, param = data
    num_round = 10

    X = dataset.data
    y = dataset.target
    xgb_dmatrix = xgboost.DMatrix(X, label=y)

    modin_X = pd.DataFrame(X)
    modin_y = modin_type_y(y)
    mxgb_dmatrix = xgb.DMatrix(modin_X, label=modin_y)

    evals_result_xgb = {}
    evals_result_mxgb = {}
    verbose_eval = False
    bst = xgboost.train(
        param,
        xgb_dmatrix,
        num_round,
        evals_result=evals_result_xgb,
        evals=[(xgb_dmatrix, "train")],
        verbose_eval=verbose_eval,
    )
    modin_bst = xgb.train(
        param,
        mxgb_dmatrix,
        num_round,
        evals_result=evals_result_mxgb,
        evals=[(mxgb_dmatrix, "train")],
        num_actors=num_actors,
        verbose_eval=verbose_eval,
    )

    for par in param["eval_metric"]:
        assert len(evals_result_xgb["train"][par]) == len(
            evals_result_xgb["train"][par]
        )
        for i in range(len(evals_result_xgb["train"][par])):
            np.testing.assert_allclose(
                evals_result_xgb["train"][par][i],
                evals_result_mxgb["train"][par][i],
                atol=0.011,
            )

    predictions = bst.predict(xgb_dmatrix)
    modin_predictions = modin_bst.predict(mxgb_dmatrix)

    preds = pd.DataFrame(predictions).apply(round)
    modin_preds = modin_predictions.apply(round)

    val = accuracy_score(y, preds)
    modin_val = accuracy_score(modin_y, modin_preds)

    np.testing.assert_allclose(val, modin_val, atol=0.002, rtol=0.002)


@pytest.mark.parametrize(
    "modin_type_y",
    [pd.DataFrame, pd.Series],
)
@pytest.mark.parametrize(
    "num_actors",
    [1, num_cpus, None, modin.config.NPartitions.get() + 1],
)
@pytest.mark.parametrize(
    "data",
    [
        (
            load_iris(),
            {"num_class": 3},
        ),
        (
            load_digits(),
            {"num_class": 10},
        ),
        (
            load_wine(),
            {"num_class": 3},
        ),
    ],
    ids=["load_iris", "load_digits", "load_wine"],
)
def test_xgb_with_multiclass_classification_datasets(data, num_actors, modin_type_y):
    dataset, param_ = data
    num_round = 10
    part_param = {"objective": "multi:softprob", "eval_metric": "mlogloss"}
    param = {**param_, **part_param}

    X = dataset.data
    y = dataset.target
    xgb_dmatrix = xgboost.DMatrix(X, label=y)

    modin_X = pd.DataFrame(X)
    modin_y = modin_type_y(y)
    mxgb_dmatrix = xgb.DMatrix(modin_X, label=modin_y)

    evals_result_xgb = {}
    evals_result_mxgb = {}
    verbose_eval = False
    bst = xgboost.train(
        param,
        xgb_dmatrix,
        num_round,
        evals_result=evals_result_xgb,
        evals=[(xgb_dmatrix, "train")],
        verbose_eval=verbose_eval,
    )
    modin_bst = xgb.train(
        param,
        mxgb_dmatrix,
        num_round,
        evals_result=evals_result_mxgb,
        evals=[(mxgb_dmatrix, "train")],
        num_actors=num_actors,
        verbose_eval=verbose_eval,
    )

    assert len(evals_result_xgb["train"]["mlogloss"]) == len(
        evals_result_mxgb["train"]["mlogloss"]
    )
    for i in range(len(evals_result_xgb["train"]["mlogloss"])):
        np.testing.assert_allclose(
            evals_result_xgb["train"]["mlogloss"][i],
            evals_result_mxgb["train"]["mlogloss"][i],
            atol=0.009,
        )

    predictions = bst.predict(xgb_dmatrix)
    modin_predictions = modin_bst.predict(mxgb_dmatrix)

    array_preds = np.asarray([np.argmax(line) for line in predictions])
    modin_array_preds = np.asarray(
        [np.argmax(line) for line in modin_predictions.to_numpy()]
    )

    val = accuracy_score(y, array_preds)
    modin_val = accuracy_score(modin_y, modin_array_preds)

    np.testing.assert_allclose(val, modin_val)


@pytest.mark.parametrize(
    "modin_type_y",
    [pd.DataFrame, pd.Series],
)
@pytest.mark.parametrize(
    "num_actors",
    [1, num_cpus, None, modin.config.NPartitions.get() + 1],
)
@pytest.mark.parametrize(
    "data",
    [(load_diabetes(), {"eta": 0.01})],
    ids=["load_diabetes"],
)
def test_xgb_with_regression_datasets(data, num_actors, modin_type_y):
    dataset, param = data
    num_round = 10

    X_df = pd.DataFrame(dataset.data)
    y_df = modin_type_y(dataset.target)
    X_train, X_test = train_test_split(X_df)
    y_train, y_test = train_test_split(y_df)

    train_xgb_dmatrix = xgboost.DMatrix(X_train, label=y_train)
    test_xgb_dmatrix = xgboost.DMatrix(X_test, label=y_test)

    train_mxgb_dmatrix = xgb.DMatrix(X_train, label=y_train)
    test_mxgb_dmatrix = xgb.DMatrix(X_test, label=y_test)

    evals_result_xgb = {}
    evals_result_mxgb = {}
    verbose_eval = False
    bst = xgboost.train(
        param,
        train_xgb_dmatrix,
        num_round,
        evals_result=evals_result_xgb,
        evals=[(train_xgb_dmatrix, "train"), (test_xgb_dmatrix, "test")],
        verbose_eval=verbose_eval,
    )
    modin_bst = xgb.train(
        param,
        train_mxgb_dmatrix,
        num_round,
        evals_result=evals_result_mxgb,
        evals=[(train_mxgb_dmatrix, "train"), (test_mxgb_dmatrix, "test")],
        num_actors=num_actors,
        verbose_eval=verbose_eval,
    )

    for param in ["train", "test"]:
        assert len(evals_result_xgb[param]["rmse"]) == len(
            evals_result_mxgb[param]["rmse"]
        )
        for i in range(len(evals_result_xgb[param]["rmse"])):
            np.testing.assert_allclose(
                evals_result_xgb[param]["rmse"][i],
                evals_result_mxgb[param]["rmse"][i],
                rtol=0.0007,
            )

    predictions = bst.predict(train_xgb_dmatrix)
    modin_predictions = modin_bst.predict(train_mxgb_dmatrix)

    val = mean_squared_error(y_train, predictions)
    modin_val = mean_squared_error(y_train, modin_predictions)

    np.testing.assert_allclose(val, modin_val, rtol=1.25e-05)


def test_invalid_input():
    list_df = [[1, 2.0, True], [2, 3.0, False]]
    with pytest.raises(AssertionError):
        # Check that DMatrix uses only DataFrame
        xgb.DMatrix(list_df, label=pd.Series([1, 2]))

    param = {}
    num_round = 2
    with pytest.raises(AssertionError):
        # Check that train uses only DMatrix
        xgb.train(param, list_df, num_round)

    df = pd.DataFrame([[1, 2.0, True], [2, 3.0, False]], columns=["a", "b", "c"])
    modin_dtrain = xgb.DMatrix(df, label=pd.Series([1, 2]))

    modin_bst = xgb.train(param, modin_dtrain, num_round)

    dt = [[1, 2.0, 3.3], [2, 3.0, 4.4]]

    with pytest.raises(AssertionError):
        # Check that predict uses only DMatrix
        modin_bst.predict(dt)


================================================
FILE: modin/tests/interchange/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/interchange/dataframe_protocol/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/interchange/dataframe_protocol/base/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/interchange/dataframe_protocol/base/test_sanity.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Basic sanity checks for the DataFrame exchange protocol."""

import pytest

import modin.pandas as pd
from modin.tests.pandas.utils import default_to_pandas_ignore_string


def test_sanity():
    """Test that the DataFrame protocol module is valid and could be imported correctly."""
    from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (  # noqa
        ProtocolDataframe,
    )


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_basic_io(get_unique_base_execution):
    """Test that the protocol IO functions actually reach their implementation with no errors."""

    class TestPassed(BaseException):
        pass

    def dummy_io_method(*args, **kwargs):
        """Dummy method emulating that the code path reached the exchange protocol implementation."""
        raise TestPassed

    query_compiler_cls = get_unique_base_execution
    query_compiler_cls.from_interchange_dataframe = dummy_io_method
    query_compiler_cls.to_interchange_dataframe = dummy_io_method

    from modin.pandas.io import from_dataframe

    with pytest.raises(TestPassed):
        from_dataframe(None)

    with pytest.raises(TestPassed):
        pd.DataFrame([[1]]).__dataframe__()


================================================
FILE: modin/tests/interchange/dataframe_protocol/base/test_utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Tests for common utility functions of the DataFrame exchange protocol."""

import numpy as np
import pandas
import pytest

from modin.core.dataframe.base.interchange.dataframe_protocol.utils import (
    pandas_dtype_to_arrow_c,
)


# TODO: use ArrowSchema to get reference C-string.
# At the time, there is no way to access ArrowSchema holding a type format string from python.
# The only way to 'touch' it is to export the structure to a C-pointer:
# https://github.com/apache/arrow/blob/5680d209fd870f99134e2d7299b47acd90fabb8e/python/pyarrow/types.pxi#L230-L239
@pytest.mark.parametrize(
    "pandas_dtype, c_string",
    [
        (np.dtype("bool"), "b"),
        (np.dtype("int8"), "c"),
        (np.dtype("uint8"), "C"),
        (np.dtype("int16"), "s"),
        (np.dtype("uint16"), "S"),
        (np.dtype("int32"), "i"),
        (np.dtype("uint32"), "I"),
        (np.dtype("int64"), "l"),
        (np.dtype("uint64"), "L"),
        (np.dtype("float16"), "e"),
        (np.dtype("float32"), "f"),
        (np.dtype("float64"), "g"),
        (pandas.Series(["a"]).dtype, "u"),
        (
            pandas.Series([0]).astype("datetime64[ns]").dtype,
            "tsn:",
        ),
    ],
)
def test_dtype_to_arrow_c(pandas_dtype, c_string):  # noqa PR01
    """Test ``pandas_dtype_to_arrow_c`` utility function."""
    assert pandas_dtype_to_arrow_c(pandas_dtype) == c_string


================================================
FILE: modin/tests/interchange/dataframe_protocol/pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Dataframe exchange protocol tests that are specific for pandas storage format implementation."""

import pandas

import modin.pandas as pd
from modin.pandas.io import from_dataframe
from modin.tests.pandas.utils import df_equals, test_data
from modin.tests.test_utils import (
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)


def eval_df_protocol(modin_df_producer):
    internal_modin_df_producer = modin_df_producer.__dataframe__()
    # Our configuration in pytest.ini requires that we explicitly catch all
    # instances of defaulting to pandas, this one raises a warning on `.from_dataframe`
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_df_producer)
    ):
        modin_df_consumer = from_dataframe(modin_df_producer)
        internal_modin_df_consumer = from_dataframe(internal_modin_df_producer)

    # TODO: the following assertions verify that `from_dataframe` doesn't return
    # the same object untouched due to optimization branching, it actually should
    # do so but the logic is not implemented yet, so the assertions are passing
    # for now. It's required to replace the producer's type with a different one
    # to consumer when we have some other implementation of the protocol as the
    # assertions may start failing shortly.
    assert modin_df_producer is not modin_df_consumer
    assert internal_modin_df_producer is not internal_modin_df_consumer
    assert (
        modin_df_producer._query_compiler._modin_frame
        is not modin_df_consumer._query_compiler._modin_frame
    )

    df_equals(modin_df_producer, modin_df_consumer)
    df_equals(modin_df_producer, internal_modin_df_consumer)


def test_simple_import():
    modin_df = pd.DataFrame(test_data["int_data"])
    eval_df_protocol(modin_df)


def test_categorical_from_dataframe():
    modin_df = pd.DataFrame(
        {"foo": pd.Series(["0", "1", "2", "3", "0", "3", "2", "3"], dtype="category")}
    )
    eval_df_protocol(modin_df)


def test_from_dataframe_with_empty_dataframe():
    modin_df = pd.DataFrame({"foo_col": pd.Series([], dtype="int64")})
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_df)
    ):
        eval_df_protocol(modin_df)


def test_interchange_with_pandas_string():
    modin_df = pd.DataFrame({"fips": ["01001"]})
    pandas_df = pandas.api.interchange.from_dataframe(modin_df.__dataframe__())
    df_equals(modin_df, pandas_df)


def test_interchange_with_datetime():
    date_range = pd.date_range(
        start=pd.Timestamp("2024-01-01", unit="ns"),
        end=pd.Timestamp("2024-03-01", unit="ns"),
        freq="D",
    )
    modin_df = pd.DataFrame(
        {
            "datetime_s": date_range.astype("datetime64[s]"),
            "datetime_ns": date_range.astype("datetime64[ns]"),
        }
    )
    eval_df_protocol(modin_df)


================================================
FILE: modin/tests/interchange/dataframe_protocol/test_general.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Dataframe exchange protocol tests that are common for every implementation."""

import ctypes
import math

import pytest

import modin.pandas as pd


@pytest.fixture
def df_from_dict():
    def maker(dct, is_categorical=False):
        df = pd.DataFrame(dct, dtype=("category" if is_categorical else None))
        return df

    return maker


@pytest.mark.parametrize(
    "test_data",
    [
        {"a": ["foo", "bar"], "b": ["baz", "qux"]},
        {"a": [1.5, 2.5, 3.5], "b": [9.2, 10.5, 11.8]},
        {"A": [1, 2, 3, 4], "B": [1, 2, 3, 4]},
    ],
    ids=["str_data", "float_data", "int_data"],
)
def test_only_one_dtype(test_data, df_from_dict):
    columns = list(test_data.keys())
    df = df_from_dict(test_data)
    dfX = df.__dataframe__()

    column_size = len(test_data[columns[0]])
    for column in columns:
        assert dfX.get_column_by_name(column).null_count == 0
        assert dfX.get_column_by_name(column).size() == column_size
        assert dfX.get_column_by_name(column).offset == 0


def test_float_int(df_from_dict):
    df = df_from_dict(
        {
            "a": [1, 2, 3],
            "b": [3, 4, 5],
            "c": [1.5, 2.5, 3.5],
            "d": [9, 10, 11],
            "e": [True, False, True],
            "f": ["a", "", "c"],
        }
    )
    dfX = df.__dataframe__()
    columns = {"a": 0, "b": 0, "c": 2, "d": 0, "e": 20, "f": 21}

    for column, kind in columns.items():
        colX = dfX.get_column_by_name(column)
        assert colX.null_count == 0
        assert colX.size() == 3
        assert colX.offset == 0

        assert colX.dtype[0] == kind

    assert dfX.get_column_by_name("c").dtype[1] == 64


def test_na_float(df_from_dict):
    df = df_from_dict({"a": [1.0, math.nan, 2.0]})
    dfX = df.__dataframe__()
    colX = dfX.get_column_by_name("a")
    assert colX.null_count == 1


def test_null_count(df_from_dict):
    df = df_from_dict({"foo": [42]})
    dfX = df.__dataframe__()
    colX = dfX.get_column_by_name("foo")
    null_count = colX.null_count
    assert null_count == 0 and type(null_count) is int


def test_noncategorical(df_from_dict):
    df = df_from_dict({"a": [1, 2, 3]})
    dfX = df.__dataframe__()
    colX = dfX.get_column_by_name("a")
    with pytest.raises(TypeError):
        colX.describe_categorical


def test_categorical(df_from_dict):
    df = df_from_dict(
        {"weekday": ["Mon", "Tue", "Mon", "Wed", "Mon", "Thu", "Fri", "Sat", "Sun"]},
        is_categorical=True,
    )

    colX = df.__dataframe__().get_column_by_name("weekday")
    is_ordered, is_dictionary, _ = colX.describe_categorical.values()
    assert isinstance(is_ordered, bool)
    assert isinstance(is_dictionary, bool)


def test_dataframe(df_from_dict):
    df = df_from_dict(
        {"x": [True, True, False], "y": [1, 2, 0], "z": [9.2, 10.5, 11.8]}
    )
    dfX = df.__dataframe__()

    assert dfX.num_columns() == 3
    assert dfX.num_rows() == 3
    assert dfX.num_chunks() == 1
    assert list(dfX.column_names()) == ["x", "y", "z"]
    assert list(dfX.select_columns((0, 2)).column_names()) == list(
        dfX.select_columns_by_name(("x", "z")).column_names()
    )


@pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)])
def test_df_get_chunks(size, n_chunks, df_from_dict):
    df = df_from_dict({"x": list(range(size))})
    dfX = df.__dataframe__()
    chunks = list(dfX.get_chunks(n_chunks))
    assert len(chunks) == n_chunks
    assert sum(chunk.num_rows() for chunk in chunks) == size


@pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)])
def test_column_get_chunks(size, n_chunks, df_from_dict):
    df = df_from_dict({"x": list(range(size))})
    dfX = df.__dataframe__()
    chunks = list(dfX.get_column(0).get_chunks(n_chunks))
    assert len(chunks) == n_chunks
    assert sum(chunk.size() for chunk in chunks) == size


def test_get_columns(df_from_dict):
    df = df_from_dict({"a": [0, 1], "b": [2.5, 3.5]})
    dfX = df.__dataframe__()
    for colX in dfX.get_columns():
        assert colX.size() == 2
        assert colX.num_chunks() == 1
    assert dfX.get_column(0).dtype[0] == 0
    assert dfX.get_column(1).dtype[0] == 2


def test_buffer(df_from_dict):
    arr = [0, 1, -1]
    df = df_from_dict({"a": arr})
    dfX = df.__dataframe__()
    colX = dfX.get_column(0)
    bufX = colX.get_buffers()

    dataBuf, dataDtype = bufX["data"]
    assert dataBuf.bufsize > 0
    assert dataBuf.ptr != 0
    device, _ = dataBuf.__dlpack_device__()

    assert dataDtype[0] == 0

    if device == 1:  # CPU-only as we're going to directly read memory here
        bitwidth = dataDtype[1]
        ctype = {
            8: ctypes.c_int8,
            16: ctypes.c_int16,
            32: ctypes.c_int32,
            64: ctypes.c_int64,
        }[bitwidth]

        for idx, truth in enumerate(arr):
            val = ctype.from_address(dataBuf.ptr + idx * (bitwidth // 8)).value
            assert val == truth, f"Buffer at index {idx} mismatch"


================================================
FILE: modin/tests/numpy/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/numpy/test_array.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import warnings

import numpy
import pytest

import modin.numpy as np

from .utils import assert_scalar_or_array_equal


@pytest.fixture
def change_numpy_print_threshold():
    prev_threshold = numpy.get_printoptions()["threshold"]
    numpy.set_printoptions(threshold=50)
    yield prev_threshold
    numpy.set_printoptions(threshold=prev_threshold)


@pytest.mark.parametrize(
    "size",
    [
        100,
        (2, 100),
        (100, 2),
        (1, 100),
        (100, 1),
        (100, 100),
        (6, 100),
        (100, 6),
        (100, 7),
        (7, 100),
    ],
)
def test_repr(size, change_numpy_print_threshold):
    numpy_arr = numpy.random.randint(-100, 100, size=size)
    modin_arr = np.array(numpy_arr)
    assert repr(modin_arr) == repr(numpy_arr)


@pytest.mark.parametrize("size", [100, (2, 100), (100, 2), (1, 100), (100, 1)])
def test_shape(size):
    numpy_arr = numpy.random.randint(-100, 100, size=size)
    modin_arr = np.array(numpy_arr)
    assert modin_arr.shape == numpy_arr.shape


def test_dtype():
    numpy_arr = numpy.array([[1, "2"], [3, "4"]])
    modin_arr = np.array([[1, "2"], [3, "4"]])
    assert modin_arr.dtype == numpy_arr.dtype
    modin_arr = modin_arr == modin_arr.T
    numpy_arr = numpy_arr == numpy_arr.T
    assert modin_arr.dtype == numpy_arr.dtype


def test_conversion():
    import modin.pandas as pd
    from modin.numpy.utils import try_convert_from_interoperable_type

    df = pd.DataFrame(numpy.random.randint(0, 100, size=(100, 100)))
    series = df.iloc[0]
    df_converted = try_convert_from_interoperable_type(df)
    assert isinstance(df_converted, np.array)
    series_converted = try_convert_from_interoperable_type(series)
    assert isinstance(series_converted, np.array)
    assert_scalar_or_array_equal(df_converted, df)
    assert_scalar_or_array_equal(series_converted, series)
    pandas_df = df._to_pandas()
    pandas_series = series._to_pandas()
    pandas_converted = try_convert_from_interoperable_type(pandas_df)
    assert isinstance(pandas_converted, type(pandas_df))
    assert pandas_converted.equals(pandas_df)
    pandas_converted = try_convert_from_interoperable_type(pandas_series)
    assert isinstance(pandas_converted, type(pandas_series))
    assert pandas_converted.equals(pandas_series)


def test_to_df():
    import pandas

    import modin.pandas as pd
    from modin.tests.pandas.utils import df_equals

    modin_df = pd.DataFrame(np.array([1, 2, 3]))
    pandas_df = pandas.DataFrame(numpy.array([1, 2, 3]))
    df_equals(pandas_df, modin_df)
    modin_df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]))
    pandas_df = pandas.DataFrame(numpy.array([[1, 2, 3], [4, 5, 6]]))
    df_equals(pandas_df, modin_df)
    for kw in [{}, {"dtype": str}]:
        modin_df, pandas_df = [
            lib[0].DataFrame(
                lib[1].array([[1, 2, 3], [4, 5, 6]]),
                columns=["col 0", "col 1", "col 2"],
                index=pd.Index([4, 6]),
                **kw
            )
            for lib in ((pd, np), (pandas, numpy))
        ]
        df_equals(pandas_df, modin_df)
    df_equals(pandas_df, modin_df)


def test_to_series():
    import pandas

    import modin.pandas as pd
    from modin.tests.pandas.utils import df_equals

    with pytest.raises(ValueError, match="Data must be 1-dimensional"):
        pd.Series(np.array([[1, 2, 3], [4, 5, 6]]))
    modin_series = pd.Series(np.array([1, 2, 3]), index=pd.Index([-1, -2, -3]))
    pandas_series = pandas.Series(
        numpy.array([1, 2, 3]), index=pandas.Index([-1, -2, -3])
    )
    df_equals(modin_series, pandas_series)
    modin_series = pd.Series(
        np.array([1, 2, 3]), index=pd.Index([-1, -2, -3]), dtype=str
    )
    pandas_series = pandas.Series(
        numpy.array([1, 2, 3]), index=pandas.Index([-1, -2, -3]), dtype=str
    )
    df_equals(modin_series, pandas_series)


def test_update_inplace():
    out = np.array([1, 2, 3])
    arr1 = np.array([1, 2, 3])
    arr2 = np.array(out, copy=False)
    np.add(arr1, arr1, out=out)
    assert_scalar_or_array_equal(out, arr2)
    out = np.array([1, 2, 3])
    arr2 = np.array(out, copy=False)
    np.add(arr1, arr1, out=out, where=False)
    assert_scalar_or_array_equal(out, arr2)


@pytest.mark.parametrize(
    "data_out",
    [
        numpy.zeros((1, 3)),
        numpy.zeros((2, 3)),
    ],
)
def test_out_broadcast(data_out):
    if data_out.shape == (2, 3):
        pytest.xfail("broadcasting would require duplicating row: see GH#5819")
    data1 = [[1, 2, 3]]
    data2 = [7, 8, 9]
    modin_out, numpy_out = np.array(data_out), numpy.array(data_out)
    numpy.add(numpy.array(data1), numpy.array(data2), out=numpy_out)
    np.add(np.array(data1), np.array(data2), out=modin_out)
    assert_scalar_or_array_equal(modin_out, numpy_out)


def test_out_broadcast_error():
    with pytest.raises(ValueError):
        # Incompatible dimensions between inputs
        np.add(np.array([1, 2, 3]), np.array([[1, 2], [3, 4]]))

    with pytest.raises(ValueError):
        # Compatible input broadcast dimensions, but output array dimensions are wrong
        out = np.array([0])
        np.add(np.array([[1, 2], [3, 4]]), np.array([1, 2]), out=out)

    with pytest.raises(ValueError):
        # Compatible input broadcast dimensions, but output array dimensions are wrong
        # (cannot broadcast a 2x2 result into a 1x2 array)
        out = np.array([0, 0])
        np.add(np.array([[1, 2], [3, 4]]), np.array([1, 2]), out=out)

    with pytest.raises(ValueError):
        # Compatible input broadcast dimensions, but output array dimensions are wrong
        # (cannot broadcast 1x2 into 1D 2-element array)
        out = np.array([0, 0])
        np.add(np.array([[1, 2]]), np.array([1, 2]), out=out)

    with pytest.raises(ValueError):
        # Compatible input broadcast dimensions, but output array dimensions are wrong
        # (cannot broadcast a 2x2 result into a 3x2 array)
        # Technically, our error message here does not match numpy's exactly, as the
        # numpy message will specify both input shapes, whereas we only specify the
        # shape of the default broadcast between the two inputs
        out = np.array([[0, 0], [0, 0], [0, 0]])
        np.add(np.array([[1, 2], [3, 4]]), np.array([1, 2]), out=out)


@pytest.mark.parametrize("size", [100, (2, 100), (100, 2), (1, 100), (100, 1)])
def test_array_ufunc(size):
    # Test ufunc.__call__
    numpy_arr = numpy.random.randint(-100, 100, size=size)
    modin_arr = np.array(numpy_arr)
    modin_result = numpy.sign(modin_arr)
    numpy_result = numpy.sign(numpy_arr)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    # Test ufunc that we have support for.
    modin_result = numpy.add(modin_arr, modin_arr)
    numpy_result = numpy.add(numpy_arr, numpy_arr)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    # Test ufunc that we have support for, but method that we do not implement.
    modin_result = numpy.add.reduce(modin_arr)
    numpy_result = numpy.add.reduce(numpy_arr)
    assert numpy_result == modin_result
    # We do not test ufunc.reduce and ufunc.accumulate, since these require a binary reduce
    # operation that Modin does not currently support.


@pytest.mark.parametrize("size", [100, (2, 100), (100, 2), (1, 100), (100, 1)])
def test_array_function(size):
    numpy_arr = numpy.random.randint(-100, 100, size=size)
    modin_arr = np.array(numpy_arr)
    # Test from array shaping
    modin_result = numpy.ravel(modin_arr)
    numpy_result = numpy.ravel(numpy_arr)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    # Test from array creation
    modin_result = numpy.zeros_like(modin_arr)
    numpy_result = numpy.zeros_like(numpy_arr)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    # Test from math
    modin_result = numpy.sum(modin_arr)
    numpy_result = numpy.sum(numpy_arr)
    assert numpy_result == modin_result


def test_array_where():
    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)
    modin_flat_arr = np.array(numpy_flat_arr)
    with pytest.warns(
        UserWarning, match="np.where method with only condition specified"
    ):
        warnings.filterwarnings("ignore", message="Distributing")
        (modin_flat_arr <= 0).where()
    with pytest.raises(ValueError, match="np.where requires x and y"):
        (modin_flat_arr <= 0).where(x=["Should Fail."])
    with pytest.warns(UserWarning, match="np.where not supported when both x and y"):
        warnings.filterwarnings("ignore", message="Distributing")
        modin_result = (modin_flat_arr <= 0).where(x=4, y=5)
    numpy_result = numpy.where(numpy_flat_arr <= 0, 4, 5)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_flat_bool_arr = modin_flat_arr <= 0
    numpy_flat_bool_arr = numpy_flat_arr <= 0
    modin_result = modin_flat_bool_arr.where(x=5, y=modin_flat_arr)
    numpy_result = numpy.where(numpy_flat_bool_arr, 5, numpy_flat_arr)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_flat_bool_arr.where(x=modin_flat_arr, y=5)
    numpy_result = numpy.where(numpy_flat_bool_arr, numpy_flat_arr, 5)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_flat_bool_arr.where(x=modin_flat_arr, y=(-1 * modin_flat_arr))
    numpy_result = numpy.where(
        numpy_flat_bool_arr, numpy_flat_arr, (-1 * numpy_flat_arr)
    )
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_arr = numpy_flat_arr.reshape((10, 10))
    modin_arr = np.array(numpy_arr)
    modin_bool_arr = modin_arr > 0
    numpy_bool_arr = numpy_arr > 0
    modin_result = modin_bool_arr.where(modin_arr, 10 * modin_arr)
    numpy_result = numpy.where(numpy_bool_arr, numpy_arr, 10 * numpy_arr)
    assert_scalar_or_array_equal(modin_result, numpy_result)


@pytest.mark.parametrize("method", ["argmax", "argmin"])
def test_argmax_argmin(method):
    numpy_arr = numpy.array([[1, 2, 3], [4, 5, np.nan]])
    modin_arr = np.array(numpy_arr)
    assert_scalar_or_array_equal(
        getattr(np, method)(modin_arr, axis=1),
        getattr(numpy, method)(numpy_arr, axis=1),
    )


def test_flatten():
    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)
    modin_flat_arr = np.array(numpy_flat_arr)
    assert_scalar_or_array_equal(modin_flat_arr.flatten(), numpy_flat_arr.flatten())
    numpy_arr = numpy_flat_arr.reshape((10, 10))
    modin_arr = np.array(numpy_arr)
    assert_scalar_or_array_equal(modin_arr.flatten(), numpy_arr.flatten())


def test_transpose():
    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)
    modin_flat_arr = np.array(numpy_flat_arr)
    assert_scalar_or_array_equal(modin_flat_arr.transpose(), numpy_flat_arr.transpose())
    numpy_arr = numpy_flat_arr.reshape((10, 10))
    modin_arr = np.array(numpy_arr)
    assert_scalar_or_array_equal(modin_arr.transpose(), numpy_arr.transpose())
    assert_scalar_or_array_equal(modin_arr.T, numpy_arr.T)


def test_astype():
    numpy_arr = numpy.array([[1, 2], [3, 4]])
    modin_arr = np.array([[1, 2], [3, 4]])
    modin_result = modin_arr.astype(numpy.float64)
    numpy_result = numpy_arr.astype(numpy.float64)
    assert modin_result.dtype == numpy_result.dtype
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.astype(str)
    numpy_result = numpy_arr.astype(str)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_arr, numpy_arr)
    modin_result = modin_arr.astype(str, copy=False)
    numpy_result = numpy_arr.astype(str, copy=False)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_arr, numpy_arr)
    modin_result = modin_arr.astype(numpy.float64, copy=False)
    numpy_result = numpy_arr.astype(numpy.float64, copy=False)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_arr, numpy_arr)


def test_set_shape():
    numpy_arr = numpy.array([[1, 2, 3], [4, 5, 6]])
    numpy_arr.shape = (6,)
    modin_arr = np.array([[1, 2, 3], [4, 5, 6]])
    modin_arr.shape = (6,)
    assert_scalar_or_array_equal(modin_arr, numpy_arr)
    modin_arr.shape = 6  # Same as using (6,)
    assert_scalar_or_array_equal(modin_arr, numpy_arr)
    with pytest.raises(ValueError, match="cannot reshape"):
        modin_arr.shape = (4,)


def test__array__():
    numpy_arr = numpy.array([[1, 2, 3], [4, 5, 6]])
    modin_arr = np.array(numpy_arr)
    # this implicitly calls `__array__`
    converted_array = numpy.array(modin_arr)
    assert type(converted_array) is type(numpy_arr)
    assert_scalar_or_array_equal(converted_array, numpy_arr)


================================================
FILE: modin/tests/numpy/test_array_arithmetic.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy
import pytest

import modin.numpy as np

from .utils import assert_scalar_or_array_equal


@pytest.mark.parametrize(
    "operand1_shape",
    [
        100,
        (1, 100),
        (3, 100),
    ],
)
@pytest.mark.parametrize(
    "operand2_shape",
    [
        100,
        (1, 100),
        (3, 100),
        1,
    ],
)
@pytest.mark.parametrize(
    "operator",
    [
        "__add__",
        "__sub__",
        "__truediv__",
        "__mul__",
        "__rtruediv__",
        "__rmul__",
        "__radd__",
        "__rsub__",
        "__ge__",
        "__gt__",
        "__lt__",
        "__le__",
        "__eq__",
        "__ne__",
    ],
)
def test_basic_arithmetic_with_broadcast(operand1_shape, operand2_shape, operator):
    """Test of operators that support broadcasting."""
    if operand1_shape == (1, 100) or operand2_shape == (1, 100):
        # For some reason, marking the param with xfail leads to [XPASS(strict)] and a reported failure
        pytest.xfail(reason="broadcasting is broken: see GH#5894")
    operand1 = numpy.random.randint(-100, 100, size=operand1_shape)
    operand2 = numpy.random.randint(-100, 100, size=operand2_shape)
    numpy_result = getattr(operand1, operator)(operand2)
    if operand2_shape == 1:
        # Tests binary ops with a scalar
        modin_result = getattr(np.array(operand1), operator)(operand2[0])
    else:
        modin_result = getattr(np.array(operand1), operator)(np.array(operand2))
    if operator not in ["__truediv__", "__rtruediv__"]:
        assert_scalar_or_array_equal(
            modin_result,
            numpy_result,
            err_msg=f"Binary Op {operator} failed.",
        )
    else:
        # Truediv can have precision issues, where thanks to floating point error, the numbers
        # aren't exactly the same across both, but are functionally equivalent, since the difference
        # is less than 1e-12.
        numpy.testing.assert_array_almost_equal(
            modin_result._to_numpy(),
            numpy_result,
            decimal=12,
            err_msg="Binary Op __truediv__ failed.",
        )


@pytest.mark.parametrize("matched_axis", [0, 1])
@pytest.mark.parametrize(
    "operator",
    [
        "__add__",
        "__sub__",
        "__truediv__",
        "__mul__",
        "__rtruediv__",
        "__rmul__",
        "__radd__",
        "__rsub__",
        "__ge__",
        "__gt__",
        "__lt__",
        "__le__",
        "__eq__",
        "__ne__",
    ],
)
def test_binary_bad_broadcast(matched_axis, operator):
    """Tests broadcasts between 2d arrays that should fail."""
    if matched_axis == 0:
        operand1 = numpy.random.randint(-100, 100, size=(3, 100))
        operand2 = numpy.random.randint(-100, 100, size=(3, 200))
    else:
        operand1 = numpy.random.randint(-100, 100, size=(100, 3))
        operand2 = numpy.random.randint(-100, 100, size=(200, 3))
    with pytest.raises(ValueError):
        getattr(operand1, operator)(operand2)
    with pytest.raises(ValueError):
        getattr(np.array(operand1), operator)(np.array(operand2))


@pytest.mark.parametrize("operator", ["__pow__", "__floordiv__", "__mod__"])
def test_arithmetic(operator):
    """Test of operators that do not yet support broadcasting."""
    for size, textdim in ((100, "1D"), ((10, 10), "2D")):
        operand1 = numpy.random.randint(-100, 100, size=size)
        lower_bound = -100 if operator != "__pow__" else 0
        operand2 = numpy.random.randint(lower_bound, 100, size=size)
        modin_result = getattr(np.array(operand1), operator)(np.array(operand2))
        numpy_result = getattr(operand1, operator)(operand2)
        numpy.testing.assert_array_almost_equal(
            modin_result._to_numpy(),
            numpy_result,
            decimal=12,
            err_msg=f"Binary Op {operator} failed on {textdim} arrays.",
        )


def test_arithmetic_nans_and_zeros():
    numpy_arr1 = numpy.array([[1, 0, 3], [numpy.nan, 0, numpy.nan]])
    numpy_arr2 = numpy.array([1, 0, 0])
    assert_scalar_or_array_equal(
        (np.array(numpy_arr1) // np.array(numpy_arr2)),
        numpy_arr1 // numpy_arr2,
    )
    assert_scalar_or_array_equal(
        (np.array([0]) // 0),
        numpy.array([0]) // 0,
    )
    assert_scalar_or_array_equal(
        (np.array([0], dtype=numpy.float64) // 0),
        numpy.array([0], dtype=numpy.float64) // 0,
    )


@pytest.mark.parametrize("size", [100, (2, 100), (100, 2), (1, 100), (100, 1)])
def test_scalar_arithmetic(size):
    numpy_arr = numpy.random.randint(-100, 100, size=size)
    modin_arr = np.array(numpy_arr)
    scalar = numpy.random.randint(1, 100)
    assert_scalar_or_array_equal(
        (scalar * modin_arr), scalar * numpy_arr, err_msg="__mul__ failed."
    )
    assert_scalar_or_array_equal(
        (modin_arr * scalar),
        scalar * numpy_arr,
        err_msg="__rmul__ failed.",
    )
    assert_scalar_or_array_equal(
        (scalar / modin_arr),
        scalar / numpy_arr,
        err_msg="__rtruediv__ failed.",
    )
    assert_scalar_or_array_equal(
        (modin_arr / scalar),
        numpy_arr / scalar,
        err_msg="__truediv__ failed.",
    )
    assert_scalar_or_array_equal(
        (scalar + modin_arr),
        scalar + numpy_arr,
        err_msg="__radd__ failed.",
    )
    assert_scalar_or_array_equal(
        (modin_arr + scalar), scalar + numpy_arr, err_msg="__add__ failed."
    )
    assert_scalar_or_array_equal(
        (scalar - modin_arr),
        scalar - numpy_arr,
        err_msg="__rsub__ failed.",
    )
    assert_scalar_or_array_equal(
        (modin_arr - scalar), numpy_arr - scalar, err_msg="__sub__ failed."
    )


@pytest.mark.parametrize("op_name", ["abs", "exp", "sqrt", "tanh"])
def test_unary_arithmetic(op_name):
    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)
    modin_flat_arr = np.array(numpy_flat_arr)
    assert_scalar_or_array_equal(
        getattr(np, op_name)(modin_flat_arr),
        getattr(numpy, op_name)(numpy_flat_arr),
    )
    numpy_arr = numpy_flat_arr.reshape((10, 10))
    modin_arr = np.array(numpy_arr)
    assert_scalar_or_array_equal(
        getattr(np, op_name)(modin_arr), getattr(numpy, op_name)(numpy_arr)
    )


def test_invert():
    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)
    modin_flat_arr = np.array(numpy_flat_arr)
    assert_scalar_or_array_equal(~modin_flat_arr, ~numpy_flat_arr)
    numpy_arr = numpy_flat_arr.reshape((10, 10))
    modin_arr = np.array(numpy_arr)
    assert_scalar_or_array_equal(~modin_arr, ~numpy_arr)
    numpy_flat_arr = numpy.random.randint(-100, 100, size=100) < 0
    modin_flat_arr = np.array(numpy_flat_arr)
    assert_scalar_or_array_equal(~modin_flat_arr, ~numpy_flat_arr)
    numpy_arr = numpy_flat_arr.reshape((10, 10))
    modin_arr = np.array(numpy_arr)
    assert_scalar_or_array_equal(~modin_arr, ~numpy_arr)


================================================
FILE: modin/tests/numpy/test_array_axis_functions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy
import pytest

import modin.numpy as np

from .utils import assert_scalar_or_array_equal


def test_max():
    # Test 1D
    numpy_arr = numpy.random.randint(-100, 100, size=100)
    modin_arr = np.array(numpy_arr)
    assert modin_arr.max() == numpy_arr.max()
    modin_result = modin_arr.max(axis=0)
    numpy_result = modin_arr.max(axis=0)
    assert modin_result == numpy_result
    modin_result = modin_arr.max(initial=200)
    numpy_result = numpy_arr.max(initial=200)
    assert modin_result == numpy_result
    modin_result = modin_arr.max(initial=0, where=False)
    numpy_result = numpy_arr.max(initial=0, where=False)
    assert modin_result == numpy_result
    modin_result = modin_arr.max(keepdims=True)
    numpy_result = numpy_arr.max(keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_arr = numpy.array([1, 10000, 2, 3, 4, 5])
    modin_arr = np.array(numpy_arr)
    numpy_mask = numpy.array([True, False, True, True, True, True])
    modin_mask = np.array(numpy_mask)
    assert numpy_arr.max(where=numpy_mask, initial=5) == modin_arr.max(
        where=modin_mask, initial=5
    )
    # Test 2D
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    assert modin_arr.max() == numpy_arr.max()
    modin_result = modin_arr.max(axis=0)
    numpy_result = numpy_arr.max(axis=0)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.max(axis=0, keepdims=True)
    numpy_result = numpy_arr.max(axis=0, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.max(axis=1)
    numpy_result = numpy_arr.max(axis=1)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.max(axis=1, keepdims=True)
    numpy_result = numpy_arr.max(axis=1, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.max(initial=200)
    numpy_result = numpy_arr.max(initial=200)
    assert modin_result == numpy_result
    modin_result = modin_arr.max(initial=0, where=False)
    numpy_result = numpy_arr.max(initial=0, where=False)
    assert modin_result == numpy_result
    with pytest.raises(ValueError):
        modin_arr.max(out=modin_arr, keepdims=True)
    modin_out = np.array([[1]])
    numpy_out = modin_out._to_numpy()
    modin_result = modin_arr.max(out=modin_out, keepdims=True)
    numpy_result = numpy_arr.max(out=numpy_out, keepdims=True)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    modin_result = modin_arr.max(axis=0, where=False, initial=4)
    numpy_result = numpy_arr.max(axis=0, where=False, initial=4)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.max(axis=0, where=False, initial=4, out=modin_out)
    numpy_result = numpy_arr.max(axis=0, where=False, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.max(axis=0, initial=4, out=modin_out)
    numpy_result = numpy_arr.max(axis=0, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.max(axis=1, initial=4, out=modin_out)
    numpy_result = numpy_arr.max(axis=1, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    numpy_where = numpy.full(20, False)
    numpy_where[:10] = True
    numpy.random.shuffle(numpy_where)
    modin_where = np.array(numpy_where)
    modin_result = modin_arr.max(axis=0, initial=4, out=modin_out, where=modin_where)
    numpy_result = numpy_arr.max(axis=0, initial=4, out=numpy_out, where=numpy_where)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_arr = numpy.array([[1, 10000, 2], [3, 4, 5]])
    modin_arr = np.array(numpy_arr)
    numpy_mask = numpy.array([[True, False, True], [True, True, True]])
    modin_mask = np.array(numpy_mask)
    assert_scalar_or_array_equal(
        modin_arr.max(where=modin_mask, initial=5),
        numpy_arr.max(where=numpy_mask, initial=5),
    )


def test_min():
    # Test 1D
    numpy_arr = numpy.random.randint(-100, 100, size=100)
    modin_arr = np.array(numpy_arr)
    assert modin_arr.min() == numpy_arr.min()
    modin_result = modin_arr.min(axis=0)
    numpy_result = modin_arr.min(axis=0)
    assert modin_result == numpy_result
    modin_result = modin_arr.min(initial=-200)
    numpy_result = numpy_arr.min(initial=-200)
    assert modin_result == numpy_result
    modin_result = modin_arr.min(initial=0, where=False)
    numpy_result = numpy_arr.min(initial=0, where=False)
    assert modin_result == numpy_result
    modin_result = modin_arr.min(keepdims=True)
    numpy_result = numpy_arr.min(keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_arr = numpy.array([1, -10000, 2, 3, 4, 5])
    modin_arr = np.array(numpy_arr)
    numpy_mask = numpy.array([True, False, True, True, True, True])
    modin_mask = np.array(numpy_mask)
    assert numpy_arr.min(where=numpy_mask, initial=5) == modin_arr.min(
        where=modin_mask, initial=5
    )
    # Test 2D
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    assert modin_arr.min() == numpy_arr.min()
    modin_result = modin_arr.min(axis=0)
    numpy_result = numpy_arr.min(axis=0)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.min(axis=0, keepdims=True)
    numpy_result = numpy_arr.min(axis=0, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.min(axis=1)
    numpy_result = numpy_arr.min(axis=1)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.min(axis=1, keepdims=True)
    numpy_result = numpy_arr.min(axis=1, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.min(initial=-200)
    numpy_result = numpy_arr.min(initial=-200)
    assert modin_result == numpy_result
    modin_result = modin_arr.min(initial=0, where=False)
    numpy_result = numpy_arr.min(initial=0, where=False)
    assert modin_result == numpy_result
    with pytest.raises(ValueError):
        modin_arr.min(out=modin_arr, keepdims=True)
    modin_out = np.array([[1]])
    numpy_out = modin_out._to_numpy()
    modin_result = modin_arr.min(out=modin_out, keepdims=True)
    numpy_result = numpy_arr.min(out=numpy_out, keepdims=True)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    modin_result = modin_arr.min(axis=0, where=False, initial=4)
    numpy_result = numpy_arr.min(axis=0, where=False, initial=4)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.min(axis=0, where=False, initial=4, out=modin_out)
    numpy_result = numpy_arr.min(axis=0, where=False, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.min(axis=0, initial=4, out=modin_out)
    numpy_result = numpy_arr.min(axis=0, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.min(axis=1, initial=4, out=modin_out)
    numpy_result = numpy_arr.min(axis=1, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    numpy_where = numpy.full(20, False)
    numpy_where[:10] = True
    numpy.random.shuffle(numpy_where)
    modin_where = np.array(numpy_where)
    modin_result = modin_arr.min(axis=0, initial=4, out=modin_out, where=modin_where)
    numpy_result = numpy_arr.min(axis=0, initial=4, out=numpy_out, where=numpy_where)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_arr = numpy.array([[1, -10000, 2], [3, 4, 5]])
    modin_arr = np.array(numpy_arr)
    numpy_mask = numpy.array([[True, False, True], [True, True, True]])
    modin_mask = np.array(numpy_mask)
    assert_scalar_or_array_equal(
        modin_arr.min(where=modin_mask, initial=5),
        numpy_arr.min(where=numpy_mask, initial=5),
    )


def test_sum():
    # Test 1D
    numpy_arr = numpy.random.randint(-100, 100, size=100)
    modin_arr = np.array(numpy_arr)
    assert modin_arr.sum() == numpy_arr.sum()
    modin_result = modin_arr.sum(axis=0)
    numpy_result = modin_arr.sum(axis=0)
    assert modin_result == numpy_result
    modin_result = modin_arr.sum(initial=-200)
    numpy_result = numpy_arr.sum(initial=-200)
    assert modin_result == numpy_result
    modin_result = modin_arr.sum(initial=0, where=False)
    numpy_result = numpy_arr.sum(initial=0, where=False)
    assert modin_result == numpy_result
    modin_result = modin_arr.sum(keepdims=True)
    numpy_result = numpy_arr.sum(keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_arr = numpy.array([1, 10000, 2, 3, 4, 5])
    modin_arr = np.array(numpy_arr)
    numpy_mask = numpy.array([True, False, True, True, True, True])
    modin_mask = np.array(numpy_mask)
    assert numpy_arr.sum(where=numpy_mask) == modin_arr.sum(where=modin_mask)
    # Test 2D
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    assert modin_arr.sum() == numpy_arr.sum()
    modin_result = modin_arr.sum(axis=0)
    numpy_result = numpy_arr.sum(axis=0)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.sum(axis=0, keepdims=True)
    numpy_result = numpy_arr.sum(axis=0, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.sum(axis=1)
    numpy_result = numpy_arr.sum(axis=1)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.sum(axis=1, keepdims=True)
    numpy_result = numpy_arr.sum(axis=1, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.sum(initial=-200)
    numpy_result = numpy_arr.sum(initial=-200)
    assert modin_result == numpy_result
    modin_result = modin_arr.sum(initial=0, where=False)
    numpy_result = numpy_arr.sum(initial=0, where=False)
    assert modin_result == numpy_result
    with pytest.raises(ValueError):
        modin_arr.sum(out=modin_arr, keepdims=True)
    modin_out = np.array([[1]])
    numpy_out = modin_out._to_numpy()
    modin_result = modin_arr.sum(out=modin_out, keepdims=True)
    numpy_result = numpy_arr.sum(out=numpy_out, keepdims=True)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    modin_result = modin_arr.sum(axis=0, where=False, initial=4)
    numpy_result = numpy_arr.sum(axis=0, where=False, initial=4)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.sum(axis=0, where=False, initial=4, out=modin_out)
    numpy_result = numpy_arr.sum(axis=0, where=False, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.sum(axis=0, initial=4, out=modin_out)
    numpy_result = numpy_arr.sum(axis=0, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.sum(axis=1, initial=4, out=modin_out)
    numpy_result = numpy_arr.sum(axis=1, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    numpy_where = numpy.full(20, False)
    numpy_where[:10] = True
    numpy.random.shuffle(numpy_where)
    modin_where = np.array(numpy_where)
    modin_result = modin_arr.sum(axis=0, initial=4, out=modin_out, where=modin_where)
    numpy_result = numpy_arr.sum(axis=0, initial=4, out=numpy_out, where=numpy_where)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_where = numpy.full(400, False)
    numpy_where[:200] = True
    numpy.random.shuffle(numpy_where)
    numpy_where = numpy_where.reshape((20, 20))
    modin_where = np.array(numpy_where)
    modin_result = modin_arr.sum(where=modin_where)
    numpy_result = numpy_arr.sum(where=numpy_where)
    assert modin_result == numpy_result
    # Test NA propagation
    numpy_arr = numpy.array([[1, 2], [3, 4], [5, numpy.nan]])
    modin_arr = np.array([[1, 2], [3, 4], [5, np.nan]])
    assert numpy.isnan(modin_arr.sum())
    assert_scalar_or_array_equal(
        modin_arr.sum(axis=1),
        numpy_arr.sum(axis=1),
    )
    assert_scalar_or_array_equal(
        modin_arr.sum(axis=0),
        numpy_arr.sum(axis=0),
    )


def test_mean():
    # Test 1D
    numpy_arr = numpy.random.randint(-100, 100, size=100)
    modin_arr = np.array(numpy_arr)
    assert modin_arr.mean() == numpy_arr.mean()
    modin_result = modin_arr.mean(axis=0)
    numpy_result = modin_arr.mean(axis=0)
    assert modin_result == numpy_result
    modin_result = modin_arr.mean()
    numpy_result = numpy_arr.mean()
    assert modin_result == numpy_result
    modin_result = modin_arr.mean(keepdims=True)
    numpy_result = numpy_arr.mean(keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_arr = numpy.array([1, 10000, 2, 3, 4, 5])
    modin_arr = np.array(numpy_arr)
    numpy_mask = numpy.array([True, False, True, True, True, True])
    modin_mask = np.array(numpy_mask)
    assert numpy_arr.mean(where=numpy_mask) == modin_arr.mean(where=modin_mask)
    # Test 2D
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    assert modin_arr.mean() == numpy_arr.mean()
    modin_result = modin_arr.mean(axis=0)
    numpy_result = numpy_arr.mean(axis=0)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.mean(axis=0, keepdims=True)
    numpy_result = numpy_arr.mean(axis=0, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.mean(axis=1)
    numpy_result = numpy_arr.mean(axis=1)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.mean(axis=1, keepdims=True)
    numpy_result = numpy_arr.mean(axis=1, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.mean()
    numpy_result = numpy_arr.mean()
    assert modin_result == numpy_result
    with pytest.raises(ValueError):
        modin_arr.mean(out=modin_arr, keepdims=True)
    modin_out = np.array([[1]])
    numpy_out = modin_out._to_numpy()
    modin_result = modin_arr.mean(out=modin_out, keepdims=True)
    numpy_result = numpy_arr.mean(out=numpy_out, keepdims=True)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.mean(axis=0, where=False, out=modin_out)
    numpy_result = numpy_arr.mean(axis=0, where=False, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.mean(axis=0, out=modin_out)
    numpy_result = numpy_arr.mean(axis=0, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.mean(axis=1, out=modin_out)
    numpy_result = numpy_arr.mean(axis=1, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    numpy_where = numpy.full(20, False)
    numpy_where[:10] = True
    numpy.random.shuffle(numpy_where)
    modin_where = np.array(numpy_where)
    modin_result = modin_arr.mean(axis=0, out=modin_out, where=modin_where)
    numpy_result = numpy_arr.mean(axis=0, out=numpy_out, where=numpy_where)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_where = numpy.full(400, False)
    numpy_where[:200] = True
    numpy.random.shuffle(numpy_where)
    numpy_where = numpy_where.reshape((20, 20))
    modin_where = np.array(numpy_where)
    modin_result = modin_arr.mean(where=modin_where)
    numpy_result = numpy_arr.mean(where=numpy_where)
    assert modin_result == numpy_result
    # Test NA propagation
    numpy_arr = numpy.array([[1, 2], [3, 4], [5, numpy.nan]])
    modin_arr = np.array([[1, 2], [3, 4], [5, np.nan]])
    assert numpy.isnan(modin_arr.mean())
    assert_scalar_or_array_equal(
        modin_arr.mean(axis=1),
        numpy_arr.mean(axis=1),
    )
    assert_scalar_or_array_equal(
        modin_arr.mean(axis=0),
        numpy_arr.mean(axis=0),
    )
    numpy_where = numpy.array([[True, True], [True, True], [True, False]])
    modin_where = np.array(numpy_where)
    assert modin_arr.mean(where=modin_where) == numpy_arr.mean(where=numpy_where)


def test_prod():
    # Test 1D
    numpy_arr = numpy.random.randint(-100, 100, size=100)
    modin_arr = np.array(numpy_arr)
    assert modin_arr.prod() == numpy_arr.prod()
    modin_result = modin_arr.prod(axis=0)
    numpy_result = modin_arr.prod(axis=0)
    assert modin_result == numpy_result
    modin_result = modin_arr.prod(initial=-200)
    numpy_result = numpy_arr.prod(initial=-200)
    assert modin_result == numpy_result
    modin_result = modin_arr.prod(initial=0, where=False)
    numpy_result = numpy_arr.prod(initial=0, where=False)
    assert modin_result == numpy_result
    modin_result = modin_arr.prod(keepdims=True)
    numpy_result = numpy_arr.prod(keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_arr = numpy.array([1, 10000, 2, 3, 4, 5])
    modin_arr = np.array(numpy_arr)
    numpy_mask = numpy.array([True, False, True, True, True, True])
    modin_mask = np.array(numpy_mask)
    assert numpy_arr.prod(where=numpy_mask) == modin_arr.prod(where=modin_mask)
    # Test 2D
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    assert modin_arr.prod() == numpy_arr.prod()
    modin_result = modin_arr.prod(axis=0)
    numpy_result = numpy_arr.prod(axis=0)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.prod(axis=0, keepdims=True)
    numpy_result = numpy_arr.prod(axis=0, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.prod(axis=1)
    numpy_result = numpy_arr.prod(axis=1)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.prod(axis=1, keepdims=True)
    numpy_result = numpy_arr.prod(axis=1, keepdims=True)
    assert modin_result.shape == numpy_result.shape
    assert_scalar_or_array_equal(modin_result, numpy_result)
    modin_result = modin_arr.prod(initial=-200)
    numpy_result = numpy_arr.prod(initial=-200)
    assert modin_result == numpy_result
    modin_result = modin_arr.prod(initial=0, where=False)
    numpy_result = numpy_arr.prod(initial=0, where=False)
    assert modin_result == numpy_result
    with pytest.raises(ValueError):
        modin_arr.prod(out=modin_arr, keepdims=True)
    modin_out = np.array([[1]])
    numpy_out = modin_out._to_numpy()
    modin_result = modin_arr.prod(out=modin_out, keepdims=True)
    numpy_result = numpy_arr.prod(out=numpy_out, keepdims=True)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    modin_result = modin_arr.prod(axis=0, where=False, initial=4)
    numpy_result = numpy_arr.prod(axis=0, where=False, initial=4)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.prod(axis=0, where=False, initial=4, out=modin_out)
    numpy_result = numpy_arr.prod(axis=0, where=False, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))
    modin_arr = np.array(numpy_arr)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.prod(axis=0, initial=4, out=modin_out)
    numpy_result = numpy_arr.prod(axis=0, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    modin_result = modin_arr.prod(axis=1, initial=4, out=modin_out)
    numpy_result = numpy_arr.prod(axis=1, initial=4, out=numpy_out)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_out = numpy.ones(20)
    modin_out = np.array(numpy_out)
    numpy_where = numpy.full(20, False)
    numpy_where[:10] = True
    numpy.random.shuffle(numpy_where)
    modin_where = np.array(numpy_where)
    modin_result = modin_arr.prod(axis=0, initial=4, out=modin_out, where=modin_where)
    numpy_result = numpy_arr.prod(axis=0, initial=4, out=numpy_out, where=numpy_where)
    assert_scalar_or_array_equal(modin_result, numpy_result)
    assert_scalar_or_array_equal(modin_out, numpy_out)
    numpy_where = numpy.full(400, False)
    numpy_where[:200] = True
    numpy.random.shuffle(numpy_where)
    numpy_where = numpy_where.reshape((20, 20))
    modin_where = np.array(numpy_where)
    modin_result = modin_arr.prod(where=modin_where)
    numpy_result = numpy_arr.prod(where=numpy_where)
    assert modin_result == numpy_result
    # Test NA propagation
    numpy_arr = numpy.array([[1, 2], [3, 4], [5, numpy.nan]])
    modin_arr = np.array([[1, 2], [3, 4], [5, np.nan]])
    assert numpy.isnan(modin_arr.prod())
    assert_scalar_or_array_equal(
        modin_arr.prod(axis=1),
        numpy_arr.prod(axis=1),
    )
    assert_scalar_or_array_equal(
        modin_arr.prod(axis=0),
        numpy_arr.prod(axis=0),
    )


================================================
FILE: modin/tests/numpy/test_array_creation.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy

import modin.numpy as np

from .utils import assert_scalar_or_array_equal


def test_zeros_like():
    modin_arr = np.array([[1.0, 2.0], [3.0, 4.0]])
    numpy_arr = modin_arr._to_numpy()
    assert_scalar_or_array_equal(np.zeros_like(modin_arr), numpy.zeros_like(numpy_arr))
    assert_scalar_or_array_equal(
        np.zeros_like(modin_arr, dtype=numpy.int8),
        numpy.zeros_like(numpy_arr, dtype=numpy.int8),
    )
    assert_scalar_or_array_equal(
        np.zeros_like(modin_arr, shape=(10, 10)),
        numpy.zeros_like(numpy_arr, shape=(10, 10)),
    )
    modin_arr = np.array([[1, 2], [3, 4]])
    numpy_arr = modin_arr._to_numpy()
    assert_scalar_or_array_equal(
        np.zeros_like(modin_arr),
        numpy.zeros_like(numpy_arr),
    )


def test_ones_like():
    modin_arr = np.array([[1.0, 2.0], [3.0, 4.0]])
    numpy_arr = modin_arr._to_numpy()
    assert_scalar_or_array_equal(
        np.ones_like(modin_arr),
        numpy.ones_like(numpy_arr),
    )
    assert_scalar_or_array_equal(
        np.ones_like(modin_arr, dtype=numpy.int8),
        numpy.ones_like(numpy_arr, dtype=numpy.int8),
    )
    assert_scalar_or_array_equal(
        np.ones_like(modin_arr, shape=(10, 10)),
        numpy.ones_like(numpy_arr, shape=(10, 10)),
    )
    modin_arr = np.array([[1, 2], [3, 4]])
    numpy_arr = modin_arr._to_numpy()
    assert_scalar_or_array_equal(
        np.ones_like(modin_arr),
        numpy.ones_like(numpy_arr),
    )


================================================
FILE: modin/tests/numpy/test_array_indexing.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy
import pytest
from pandas.core.dtypes.common import is_list_like

import modin.numpy as np

from .utils import assert_scalar_or_array_equal


@pytest.mark.parametrize(
    "index",
    (
        0,
        1,
        -1,  # Scalar indices
        slice(0, 1, 1),
        slice(1, -1, 1),  # Slices
        [0, 2],
        [1, -1],  # Lists
    ),
    ids=lambda i: f"index={i}",
)
def test_getitem_1d(index):
    data = [1, 2, 3, 4, 5]
    numpy_result = numpy.array(data)[index]
    modin_result = np.array(data)[index]
    if is_list_like(numpy_result):
        assert_scalar_or_array_equal(modin_result, numpy_result)
        assert modin_result.shape == numpy_result.shape
    else:
        assert modin_result == numpy_result


@pytest.mark.parametrize(
    "index",
    (
        0,
        1,
        -1,  # Scalar indices
        slice(0, 1, 1),
        slice(1, -1, 1),  # Slices
        slice(None, None, None),
        slice(None, 1, None),
        slice(0, 1, None),
        slice(0, None, None),
        [0, 2],
        [2, 0],
        [1, -1],  # Lists
    ),
    ids=lambda i: f"index={i}",
)
def test_getitem_2d(index):
    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
    numpy_result = numpy.array(data)[index]
    modin_result = np.array(data)[index]
    if is_list_like(numpy_result):
        assert_scalar_or_array_equal(modin_result, numpy_result)
        assert modin_result.shape == numpy_result.shape
    else:
        assert modin_result == numpy_result


def test_getitem_nested():
    # Index into the result of slicing a 1D array
    data = [1, 2, 3, 4, 5]
    numpy_result = numpy.array(data)[1:3][1]
    modin_result = np.array(data)[1:3][1]
    if is_list_like(numpy_result):
        assert_scalar_or_array_equal(modin_result, numpy_result)
        assert modin_result.shape == numpy_result.shape
    else:
        assert (
            modin_result == numpy_result
        )  # Index into the result of indexing a 2D array
    data = [[1, 2, 3], [4, 5, 6]]
    numpy_result = numpy.array(data)[1][1]
    modin_result = np.array(data)[1][1]
    if is_list_like(numpy_result):
        assert_scalar_or_array_equal(modin_result, numpy_result)
        assert modin_result.shape == numpy_result.shape
    else:
        assert modin_result == numpy_result


@pytest.mark.parametrize(
    ("index", "value"),
    [
        (0, 1),
        (1, 1),
        (-1, 1),  # Scalar indices
        (slice(0, 1, 1), [7]),
        (slice(1, -1, 1), [7, 8, 9]),  # Slices
        (slice(0, 4, 1), 7),  # Slice with broadcast
        ([0, 2], [7, 8]),
        ([1, -1], [7, 8]),  # Lists
    ],
    ids=lambda i: f"{i}",
)
def test_setitem_1d(index, value):
    data = [1, 2, 3, 4, 5]
    modin_arr, numpy_arr = np.array(data), numpy.array(data)
    numpy_arr[index] = value
    modin_arr[index] = value
    assert_scalar_or_array_equal(modin_arr, numpy_arr)


def test_setitem_1d_error():
    arr = np.array([1, 2, 3, 4, 5])
    with pytest.raises(ValueError, match="could not broadcast"):
        arr[0:5] = [1, 2]


@pytest.mark.parametrize(
    ("index", "value"),
    [
        (0, 1),
        (1, 1),
        (-1, 1),  # Scalar indices
        (slice(0, 1, 1), [13]),  # arr[0:1:1] = [13]
        (slice(1, -1, 1), [13]),  # arr[1:-1:1] = 13
        (slice(None, None, None), [7]),  # arr[:] = [7]
        (slice(None, 1, None), [7]),  # arr[:1] = [7]
        (slice(0, 1, None), [7]),  # arr[0:1] = [7]
        (slice(0, None, None), [7]),  # arr[0:] = [7]
        ([0, 2], [[13, 14, 15], [16, 17, 18]]),
        ([2, 0], [[13, 14, 15], [16, 17, 18]]),
        ([1, -1], [[13, 14, 15], [16, 17, 18]]),  # Lists
    ],
    ids=lambda i: f"{i}",
)
def test_setitem_2d(index, value):
    if index == [2, 0]:
        pytest.xfail("indexing with unsorted list would fail: see GH#5886")
    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
    modin_arr, numpy_arr = np.array(data), numpy.array(data)
    numpy_arr[index] = value
    modin_arr[index] = value
    assert_scalar_or_array_equal(modin_arr, numpy_arr)


================================================
FILE: modin/tests/numpy/test_array_linalg.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


import numpy
import numpy.linalg as NLA
import pytest

import modin.numpy as np
import modin.numpy.linalg as LA
import modin.pandas as pd

from .utils import assert_scalar_or_array_equal


def test_dot_from_pandas_reindex():
    # Reindexing the dataframe does not change the output of dot
    # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dot.html
    df = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
    s = pd.Series([1, 1, 2, 1])
    result1 = np.dot(df, s)
    s2 = s.reindex([1, 0, 2, 3])
    result2 = np.dot(df, s2)
    assert_scalar_or_array_equal(result1, result2)


def test_dot_1d():
    x1 = numpy.random.randint(-100, 100, size=100)
    x2 = numpy.random.randint(-100, 100, size=100)
    numpy_result = numpy.dot(x1, x2)
    x1, x2 = np.array(x1), np.array(x2)
    modin_result = np.dot(x1, x2)
    assert_scalar_or_array_equal(modin_result, numpy_result)


def test_dot_2d():
    x1 = numpy.random.randint(-100, 100, size=(100, 3))
    x2 = numpy.random.randint(-100, 100, size=(3, 50))
    numpy_result = numpy.dot(x1, x2)
    x1, x2 = np.array(x1), np.array(x2)
    modin_result = np.dot(x1, x2)
    assert_scalar_or_array_equal(modin_result, numpy_result)


def test_dot_scalar():
    x1 = numpy.random.randint(-100, 100, size=(100, 3))
    x2 = numpy.random.randint(-100, 100)
    numpy_result = numpy.dot(x1, x2)
    x1 = np.array(x1)
    modin_result = np.dot(x1, x2)
    assert_scalar_or_array_equal(modin_result, numpy_result)


def test_matmul_scalar():
    x1 = numpy.random.randint(-100, 100, size=(100, 3))
    x2 = numpy.random.randint(-100, 100)
    x1 = np.array(x1)
    # Modin error message differs from numpy for readability; the original numpy error is:
    # ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc
    # core with signature (n?,k),(k,m?)->(n?,m?) requires 1)
    with pytest.raises(ValueError):
        x1 @ x2


def test_dot_broadcast():
    # 2D @ 1D
    x1 = numpy.random.randint(-100, 100, size=(100, 3))
    x2 = numpy.random.randint(-100, 100, size=(3,))
    numpy_result = numpy.dot(x1, x2)
    x1, x2 = np.array(x1), np.array(x2)
    modin_result = np.dot(x1, x2)
    assert_scalar_or_array_equal(modin_result, numpy_result)

    # 1D @ 2D
    x1 = numpy.random.randint(-100, 100, size=(100,))
    x2 = numpy.random.randint(-100, 100, size=(100, 3))
    numpy_result = numpy.dot(x1, x2)
    x1, x2 = np.array(x1), np.array(x2)
    modin_result = np.dot(x1, x2)
    assert_scalar_or_array_equal(modin_result, numpy_result)


@pytest.mark.parametrize("axis", [None, 0, 1], ids=["axis=None", "axis=0", "axis=1"])
def test_norm_fro_2d(axis):
    x1 = numpy.random.randint(-10, 10, size=(100, 3))
    numpy_result = NLA.norm(x1, axis=axis)
    x1 = np.array(x1)
    modin_result = LA.norm(x1, axis=axis)
    # Result may be a scalar
    if isinstance(modin_result, np.array):
        modin_result = modin_result._to_numpy()
    numpy.testing.assert_allclose(modin_result, numpy_result, rtol=1e-12)


def test_norm_fro_1d():
    x1 = numpy.random.randint(-10, 10, size=100)
    numpy_result = NLA.norm(x1)
    x1 = np.array(x1)
    modin_result = LA.norm(x1)
    numpy.testing.assert_allclose(modin_result, numpy_result, rtol=1e-12)


================================================
FILE: modin/tests/numpy/test_array_logic.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy
import pytest

import modin.numpy as np

from .utils import assert_scalar_or_array_equal

small_arr_c_2d = numpy.array(
    [
        [1j, 1, 0, -numpy.inf, numpy.inf, 0.5],
        [1 + 1.1j, numpy.nan, 0, numpy.nan, 2, 0.3],
    ]
)
small_arr_c_1d = numpy.array([numpy.nan, 0, -numpy.inf, numpy.inf, 5, -0.1, 1 + 1.1j])

small_arr_r_2d = numpy.array(
    [[1, 0, -numpy.inf, numpy.inf, 0.5], [numpy.nan, 0, numpy.nan, 2, 0.3]]
)
small_arr_r_1d = numpy.array([numpy.nan, 0, -numpy.inf, numpy.inf, 5, -0.1])


@pytest.mark.parametrize("operand_shape", [100, (3, 100)])
@pytest.mark.parametrize("operator", ["any", "all"])
@pytest.mark.parametrize("axis", [None, 0, 1], ids=["axis=None", "axis=0", "axis=1"])
def test_unary_with_axis(operand_shape, operator, axis):
    if isinstance(operand_shape, int) and axis == 1:
        pytest.skip("cannot use axis=1 on 1D arrays")
    x1 = numpy.random.randint(-100, 100, size=operand_shape)
    numpy_result = getattr(numpy, operator)(x1, axis=axis)
    x1 = np.array(x1)
    modin_result = getattr(np, operator)(x1, axis=axis)
    assert_scalar_or_array_equal(
        modin_result, numpy_result, err_msg=f"Unary operator {operator} failed."
    )


def test_all_any_where():
    arr = np.array([[0, 1], [1, 0]])
    where = np.array([[False, True], [True, False]])
    result = arr.all(where=where)
    # Result should be np.bool_ True, since where mask isolates the non-zero elements
    assert result

    where = np.array([[True, False], [False, False]])
    result = arr.all(where=where, axis=1)
    assert_scalar_or_array_equal(result, numpy.array([False, True]))

    # Results should contain vacuous Trues in the relevant shape
    result = arr.all(where=False, axis=1)
    assert_scalar_or_array_equal(result, numpy.array([True, True]))
    result = arr.all(where=False, axis=0)
    assert_scalar_or_array_equal(result, numpy.array([True, True]))
    assert bool(arr.all(where=False, axis=None))

    where = np.array([[True, False], [False, True]])
    result = arr.any(where=where)
    # Result should be np.bool_ False, since mask isolates only zero elements
    assert not result

    where = np.array([[False, True], [False, False]])
    result = arr.any(where=where, axis=1)
    assert_scalar_or_array_equal(result, numpy.array([True, False]))

    # Results should contain vacuous Falses in the relevant shape
    result = arr.any(where=False, axis=1)
    assert_scalar_or_array_equal(result, numpy.array([False, False]))
    result = arr.any(where=False, axis=0)
    assert_scalar_or_array_equal(result, numpy.array([False, False]))
    assert not bool(arr.any(where=False, axis=None))


@pytest.mark.parametrize("data", [small_arr_c_2d, small_arr_c_1d], ids=["2D", "1D"])
@pytest.mark.parametrize(
    "operator", ["isfinite", "isinf", "isnan", "iscomplex", "isreal"]
)
def test_unary_with_complex(data, operator):
    x1 = data
    numpy_result = getattr(numpy, operator)(x1)
    x1 = np.array(x1)
    modin_result = getattr(np, operator)(x1)
    assert_scalar_or_array_equal(modin_result, numpy_result)


def test_isnat():
    x1 = numpy.array([numpy.datetime64("2016-01-01"), numpy.datetime64("NaT")])
    numpy_result = numpy.isnat(x1)
    x1 = np.array(x1)
    modin_result = np.isnat(x1)
    assert_scalar_or_array_equal(modin_result, numpy_result)


@pytest.mark.parametrize("data", [small_arr_r_2d, small_arr_r_1d], ids=["2D", "1D"])
@pytest.mark.parametrize("operator", ["isneginf", "isposinf"])
def test_unary_without_complex(data, operator):
    x1 = data
    numpy_result = getattr(numpy, operator)(x1)
    x1 = np.array(x1)
    modin_result = getattr(np, operator)(x1)
    assert_scalar_or_array_equal(modin_result, numpy_result)


@pytest.mark.parametrize("data", [small_arr_r_2d, small_arr_r_1d], ids=["2D", "1D"])
def test_logical_not(data):
    x1 = data
    numpy_result = numpy.logical_not(x1)
    x1 = np.array(x1)
    modin_result = np.logical_not(x1)
    assert_scalar_or_array_equal(modin_result, numpy_result)


@pytest.mark.parametrize("operand1_shape", [100, (3, 100)])
@pytest.mark.parametrize("operand2_shape", [100, (3, 100)])
@pytest.mark.parametrize("operator", ["logical_and", "logical_or", "logical_xor"])
def test_logical_binops(operand1_shape, operand2_shape, operator):
    if operand1_shape != operand2_shape:
        pytest.xfail("TODO fix broadcasting behavior for binary logic operators")
    x1 = numpy.random.randint(-100, 100, size=operand1_shape)
    x2 = numpy.random.randint(-100, 100, size=operand2_shape)
    numpy_result = getattr(numpy, operator)(x1, x2)
    x1, x2 = np.array(x1), np.array(x2)
    modin_result = getattr(np, operator)(x1, x2)
    assert_scalar_or_array_equal(
        modin_result, numpy_result, err_msg=f"Logic binary operator {operator} failed."
    )


================================================
FILE: modin/tests/numpy/test_array_math.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy
import pytest

import modin.numpy as np

from .utils import assert_scalar_or_array_equal


@pytest.mark.parametrize(
    "data",
    [
        [3, 2, 1, 1],
        [-87.434, -90.908, -87.152, -84.903],
        [-87.434, -90.908, np.nan, -87.152, -84.903],
    ],
    ids=["ints", "floats", "floats with nan"],
)
@pytest.mark.parametrize("op", ["argmin", "argmax"])
def test_argmax_argmin(data, op):
    numpy_result = getattr(numpy, op)(numpy.array(data))
    modin_result = getattr(np, op)(np.array(data))
    assert_scalar_or_array_equal(modin_result, numpy_result)


def test_rem_mod():
    """Tests remainder and mod, which, unlike the C/matlab equivalents, are identical in numpy."""
    a = numpy.array([[2, -1], [10, -3]])
    b = numpy.array(([-3, 3], [3, -7]))
    numpy_result = numpy.remainder(a, b)
    modin_result = np.remainder(np.array(a), np.array(b))
    assert_scalar_or_array_equal(modin_result, numpy_result)

    numpy_result = numpy.mod(a, b)
    modin_result = np.mod(np.array(a), np.array(b))
    assert_scalar_or_array_equal(modin_result, numpy_result)


================================================
FILE: modin/tests/numpy/test_array_shaping.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy
import pytest

import modin.numpy as np

from .utils import assert_scalar_or_array_equal


@pytest.mark.parametrize("operand_shape", [100, (100, 3), (3, 100)])
def test_ravel(operand_shape):
    x = numpy.random.randint(-100, 100, size=operand_shape)
    numpy_result = numpy.ravel(x)
    modin_result = np.ravel(np.array(x))
    assert_scalar_or_array_equal(modin_result, numpy_result)


@pytest.mark.parametrize("operand_shape", [100, (100, 3), (3, 100)])
def test_shape(operand_shape):
    x = numpy.random.randint(-100, 100, size=operand_shape)
    numpy_result = numpy.shape(x)
    modin_result = np.shape(np.array(x))
    assert modin_result == numpy_result


@pytest.mark.parametrize("operand_shape", [100, (100, 3), (3, 100)])
def test_transpose(operand_shape):
    x = numpy.random.randint(-100, 100, size=operand_shape)
    numpy_result = numpy.transpose(x)
    modin_result = np.transpose(np.array(x))
    assert_scalar_or_array_equal(modin_result, numpy_result)


@pytest.mark.parametrize("axis", [0, 1])
def test_split_2d(axis):
    x = numpy.random.randint(-100, 100, size=(6, 4))
    # Integer argument: split into N equal arrays along axis
    numpy_result = numpy.split(x, 2, axis=axis)
    modin_result = np.split(np.array(x), 2, axis=axis)
    for modin_entry, numpy_entry in zip(modin_result, numpy_result):
        assert_scalar_or_array_equal(modin_entry, numpy_entry)
    # List argument: split at specified indices
    idxs = [2, 3]
    numpy_result = numpy.split(x, idxs, axis=axis)
    modin_result = np.split(np.array(x), idxs, axis=axis)
    for modin_entry, numpy_entry in zip(modin_result, numpy_result):
        assert_scalar_or_array_equal(modin_entry, numpy_entry)


def test_split_2d_oob():
    # Supplying an index out of bounds results in an empty sub-array, for which modin
    # would return a numpy array by default
    x = numpy.random.randint(-100, 100, size=(6, 4))
    idxs = [2, 3, 6]
    numpy_result = numpy.split(x, idxs)
    modin_result = np.split(np.array(x), idxs)
    for modin_entry, numpy_entry in zip(modin_result, numpy_result):
        assert_scalar_or_array_equal(modin_entry, numpy_entry)


def test_split_2d_uneven():
    x = np.array(numpy.random.randint(-100, 100, size=(3, 2)))
    with pytest.raises(
        ValueError, match="array split does not result in an equal division"
    ):
        np.split(x, 2)


def test_hstack():
    # 2D arrays
    a = numpy.random.randint(-100, 100, size=(5, 3))
    b = numpy.random.randint(-100, 100, size=(5, 2))
    numpy_result = numpy.hstack((a, b))
    modin_result = np.hstack((np.array(a), np.array(b)))
    assert_scalar_or_array_equal(modin_result, numpy_result)
    # 1D arrays
    a = numpy.random.randint(-100, 100, size=(5,))
    b = numpy.random.randint(-100, 100, size=(3,))
    numpy_result = numpy.hstack((a, b))
    modin_result = np.hstack((np.array(a), np.array(b)))
    assert_scalar_or_array_equal(modin_result, numpy_result)


def test_append():
    # Examples taken from numpy docs
    xs = [[1, 2, 3], [[4, 5, 6], [7, 8, 9]]]
    numpy_result = numpy.append(*xs)
    modin_result = np.append(*[np.array(x) for x in xs])
    assert_scalar_or_array_equal(modin_result, numpy_result)

    numpy_result = numpy.append([[1, 2, 3], [4, 5, 6]], [[7, 8, 9]], axis=0)
    modin_result = np.append(np.array([[1, 2, 3], [4, 5, 6]]), [[7, 8, 9]], axis=0)
    assert_scalar_or_array_equal(modin_result, numpy_result)


@pytest.mark.xfail(reason="append error checking is incorrect: see GH#5896")
def test_append_error():
    with pytest.raises(ValueError):
        np.append(np.array([[1, 2, 3], [4, 5, 6]]), np.array([7, 8, 9]), axis=0)


================================================
FILE: modin/tests/numpy/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy

import modin.numpy as np


def assert_scalar_or_array_equal(x1, x2, err_msg=""):
    """
    Assert whether the result of the numpy and modin computations are the same.

    If either argument is a modin array object, then `_to_numpy()` is called on it.
    The arguments are compared with `numpy.testing.assert_array_equals`.
    """
    if isinstance(x1, np.array):
        x1 = x1._to_numpy()
    if isinstance(x2, np.array):
        x2 = x2._to_numpy()
    numpy.testing.assert_array_equal(x1, x2, err_msg=err_msg)


================================================
FILE: modin/tests/pandas/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/pandas/conftest.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pytest

from modin.config import Engine, StorageFormat


def pytest_collection_modifyitems(items):
    try:
        if (
            Engine.get() in ("Ray", "Unidist", "Dask", "Python")
            and StorageFormat.get() != "Base"
        ):
            for item in items:
                if item.name in (
                    "test_dataframe_dt_index[3s-both-DateCol-_NoDefault.no_default]",
                    "test_dataframe_dt_index[3s-right-DateCol-_NoDefault.no_default]",
                ):
                    item.add_marker(
                        pytest.mark.xfail(
                            reason="https://github.com/modin-project/modin/issues/6399"
                        )
                    )
    except ImportError:
        # No engine
        ...


================================================
FILE: modin/tests/pandas/data/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/pandas/data/blah.csv
================================================
,Presidents,Presidents,Presidents,Presidents,Presidents,Presidents,Presidents,Presidents,Presidents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes
,Pure mentions,Pure mentions,Pure mentions,Pure tags,Pure tags,Pure tags,Mentions + Tags,Mentions + Tags,Mentions + Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,"Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS","Subcontintents, No POTUS",Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,"Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS","Themes, No POTUS"
,IND,DEP,DEP,IND,DEP,DEP,IND,DEP,DEP,IND,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,IND,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,IND,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,IND,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP
,all,obama_mention,trump_mention,pr_tags,obama_tag,trump_tag,all,obama_m+t,trump_m+t,pr_tags,Caribbean,Southern Asia,Middle Africa,Northern Europe,Southern Europe,Western Asia,South America,Polynesia,Antarctica,Eastern Africa,Australia and New Zealand,Western Europe,Western Africa,Eastern Europe,Central America,Northern America,South-eastern Asia,Southern Africa,Eastern Asia,Northern Africa,Melanesia,Micronesia,Central Asia,pr_tags_np,Caribbean_np,Southern Asia_np,Middle Africa_np,Northern Europe_np,Southern Europe_np,Western Asia_np,South America_np,Polynesia_np,Antarctica_np,Eastern Africa_np,Australia and New Zealand_np,Western Europe_np,Western Africa_np,Eastern Europe_np,Central America_np,Northern America_np,South-eastern Asia_np,Southern Africa_np,Eastern Asia_np,Northern Africa_np,Melanesia_np,Micronesia_np,Central Asia_np,pr_themes,Top/News,Top/News/Sports,Top/Features/Travel/Guides/Activities and Interests/Golf,Top/News/Sports/Golf,Top/News/Business,Top/Opinion/Opinion,Top/Opinion,Top/News/Education,Top/Classifieds/Job Market/Job Categories/Education,Top/Features/Travel/Guides/Destinations/North America/United States/New York/New York City,Top/News/U.S./Mid-Atlantic,Top/Opinion/Opinion/Op-Ed,Top/Features/Travel/Guides/Destinations/North America/United States,Top/Features/Travel/Guides/Destinations/North America,Top/News/Technology,Top/News/U.S.,Top/News/New York and Region,"Top/News/U.S./U.S. States, Territories and Possessions/New York",Top/Features/Travel/Guides/Destinations/North America/United States/California,Top/Features/Books,"Top/News/U.S./U.S. States, Territories and Possessions/California",Top/Opinion/Opinion/Editorials,Top/Features/Travel/Guides/Activities and Interests/Family,Top/Opinion/Opinion/Op-Ed/Contributors,Top/Features/Travel/Guides/Destinations/Europe,Top/Features/Movies/News and Features,Top/Features/Arts/Music,Top/Features/Travel/Guides/Activities and Interests/Music,Top/Features/Arts,Top/Classifieds/Paid Death Notices,Top/Features/Movies,Top/Features/Travel/Guides/Destinations/Asia/China,"Top/Classifieds/Job Market/Job Categories/Marketing, Advertising and PR",Top/Features/Travel/Guides/Destinations/Asia,"Top/News/U.S./U.S. States, Territories and Possessions/Arizona",Top/Features/Travel/Guides/Destinations/North America/United States/Arizona,Top/News/U.S./Rockies,Top/Features/Travel/Guides/Destinations/North America/United States/New Jersey,Top/Features/Books/Book Reviews,Top/Features/Travel/Guides/Destinations/Asia/Pakistan,Top/News/World/Asia Pacific,Top/News/World/Countries and Territories/Pakistan,Top/News/World,Top/News/World/Countries and Territories/Afghanistan,Top/Features/Travel/Guides/Destinations/North America/United States/South Carolina,Top/Features/Travel/Guides/Destinations/Middle East/Israel,Top/Features/Travel/Guides/Destinations/Middle East,Top/News/World/Middle East,Top/Features/Travel/Guides/Destinations/Middle East/Iran,Top/News/World/Countries and Territories/Israel,Top/Features/Travel/Guides/Destinations/North America/United States/Colorado,Top/News/New York and Region/New Jersey,Top/Features/Travel/Guides/Destinations/Central and South America,Top/Features/Travel/Guides/Destinations/Central and South America/Colombia,Top/Features/Travel/Guides/Destinations/Africa/Kenya,Top/Features/Travel/Guides/Activities and Interests/Food and Wine,"Top/News/U.S./U.S. States, Territories and Possessions/Massachusetts",Top/News/Sports/Pro Football/National Football League/Washington Redskins,education and schools,teachers and school employees,privacy,politics and government,law and legislation,tests and testing,computers and the internet,finances,abortion,no index terms from nytimes,privatization,books and literature,motion pictures,united states politics and government,christians and christianity,religion and churches,advertising and marketing,budgets and budgeting,elections,medicine and health,presidents and presidency (us),presidential elections (us),minorities (us),recordings (audio),handicapped,homosexuality,labor,suits and litigation,colleges and universities,recordings (video),blacks,public opinion,primaries,lobbying and lobbyists,hispanic-americans,"armament, defense and military forces",appointments and executive changes,copyrights,philanthropy,mathematics,recession and depression,reading and writing skills,writing and writers,ratings and rating systems,jews,language and languages,television,computer software,police,taxation,governors (us),oil (petroleum) and gasoline,news and news media,global warming,environment,islam,presidential election of 1988,drug abuse and traffic,marijuana,women,church-state relations,editorials,gun control,election issues,immigration and refugees,sex,"awards, decorations and honors",terrorism,nazi policies toward jews and minorities,weather,electronic mail,quotation of the day,decisions and verdicts,equal educational opportunities,libraries and librarians,advertising,baseball,illegal aliens,media,crime and criminals,roads and traffic,automobiles,ethics,art,property taxes,speech,freedom of speech and expression,political advertising,reviews,sex crimes,prostitution,insurance,hurricanes and tropical storms,hurricane katrina,floods,election results,strikes,united states armament and defense,basketball,horse racing,united states international relations,international relations,firearms,health insurance and managed care,health insurance,discrimination,music,airlines and airplanes,drugs (pharmaceuticals),diseases and conditions,banks and banking,college athletics,football,impeachment,frauds and swindling,new year,correction stories,trees and shrubs,home repairs,olympic games,apparel,home furnishings,earthquakes,home repairs and improvements,world trade center (nyc),fish and other marine life,office buildings and commercial properties,noise,legislatures and parliaments,tuition,presidential election of 2004,mayors,soccer,restaurants,unemployment,biographical information,radio,"conventions, national (us)",computer and video games,presidential election of 2008,super bowl,demonstrations and riots,marriages,deaths (obituaries),accidents and safety,standards and standardization,referendums,exercise,children and youth,murders and attempted murders,international trade and world market,wages and salaries,coaches and managers,archaeology and anthropology,palestinians,birth control and family planning,economic conditions and trends,united states economy,telephones and telecommunications,restoration and rehabilitation,dairy products,animals,sales,"prices (fares, fees and rates)",energy and power,atomic weapons,holidays and special occasions,medicaid,medicare,christmas,"war crimes, genocide and crimes against humanity",presidential election of 2000,fires and firefighters,fires and firemen,air pollution,robberies and thefts,conventions and conferences,food,diet and nutrition,stocks and bonds,electric light and power,light,blackouts and brownouts (electrical),theater,hijacking,pentagon building,"suspensions, dismissals and resignations",scholarships and fellowships,newspapers,travel and vacations,building (construction),games,torture,cellular telephones,sentences (criminal),bridges and tunnels,affirmative action,credit,birds,space,postal service,pornography and obscenity,steroids,embargoes and economic sanctions,smoking and tobacco,social security (us),child care,inventions and patents,vaccination and immunization,prisons and prisoners,retirement,currency,transit systems,subways,snow and snowstorms,housing,priests,company reports,corporations,layoffs and job reductions,magazines,aged,viruses,biological and chemical warfare,opera,parades,states (us),constitutional amendments,cancer,pensions and retirement plans,child abuse and neglect,government employees,culture,blacks (in us),radiation,documentary films and programs,retail stores and trade,spanish language,"mergers, acquisitions and divestitures",small business,poetry and poets,rock music,identification devices,space shuttle,atomic energy,interest rates,police brutality and misconduct,science and technology,running,marathon running,research,weight,homeless persons,cocaine and crack cocaine,suicides and suicide attempts,bicycles and bicycling,buses,pregnancy and obstetrics,contests and prizes,vetoes (us),jewels and jewelry,academy awards (oscars),parties (social),festivals,"divorce, separations and annulments",gas (fuel),photography,comedy and humor,world series,hotels and motels,serial murders,textiles,gambling,cooking and cookbooks,recipes,beverages,tennis,shoes and boots,dogs,"hockey, ice",extradition,boxing,"indians, american",violence,chemicals,sports of the times (times column),arson,vietnam war,boycotts,toys,cruises,ships and shipping,trade shows and fairs,mental health and disorders,wines,alcoholic beverages,dancing,golf,auctions,mutual funds,swimming,historic buildings and sites,weddings and engagements,freedom and human rights,athletics and sports,draft and recruitment (sports),hospitals,genetics and heredity,foreign aid,anthrax,acquired immune deficiency syndrome,insects,consumer protection,mines and mining,blood,doctors,nursing and nurses,airports,water,death and dying,dna (deoxyribonucleic acid),third world and developing countries,food contamination and poisoning,agriculture,livestock,acquired immune deficiency syndrome (aids),no index terms,regulation and deregulation of industry,taxicabs and taxicab drivers,meat,babies,shortages,nasdaq composite index,government bonds,security and warning systems,grain,transplants,freedom of the press,metals and minerals,computer security,bombs and explosives,population,mortgages,customs (tariff),farmers,automobile racing,biology and biochemistry,anatomy and physiology,production,factories and industrial plants,track and field,summer games (olympics),foreign investments,stadiums and arenas,foreign service,ncaa basketball tournament,waste materials and disposal,hunting and trapping,deportation,casinos,world cup (soccer),reproduction (biological),surgery and surgeons,kidnapping,heart,alcohol abuse,domestic violence,capital punishment,hostages,world war ii (1939-45),war and revolution,civil war and guerrilla warfare,jury system,entertainment and amusements,child abuse,sexual harassment,war crimes and criminals,censorship,railroads,asylum (political),legal profession,courts,political prisoners,prisoners of war,men,dow jones stock average,probation and parole,drunken and reckless driving,organized crime,futures and options trading,securities and commodities violations,assaults,physics,bribery,debating,recalls and bans of products,credit and money cards,drought,fines (penalties),perjury,bars,trades (sports),skiing,attacks on police,race,bankruptcies,bridge (card game),renting and leasing,condominiums,commuting,stations and terminals (passenger),shutdowns (institutional),beaches,families and family life,brain,book trade,futures trading,federal taxes (us),tax credits,assassinations and attempted assassinations,recycling of waste materials,automobile insurance and liability,delays (transportation),transportation,flowers and plants,steel and iron,chemistry,anti-semitism,soft drinks,consumer behavior,parks and other recreation areas,leisure,gardens and gardening,figure skating,ice skating,"fishing, sport",executives and management,coups d'etat and attempted coups d'etat,supermarkets,rescues,accounting and accountants,rain,judges,water pollution,satellites,trucks and trucking,playoff games,"fishing, commercial",antitrust actions and laws,royal family,personal finances,parking,utility vehicles and other light trucks,racketeering and racketeers,interscholastic athletics,chess,jazz,breast,explosions,foster care,classical music,intelligence,navies,architecture,organized labor,bakeries and baked products,espionage,local government,interior design,guards,computer chips,boats and boating,forests and forestry,zoning,hiring and promotion,area planning and renewal,marketing and merchandising,embezzlement,boards of directors,smuggling,land use policies,monuments and memorials,endangered and extinct species,"age, chronological",welfare (us),child custody and support,wiretapping and other eavesdropping devices and methods,ferries,history,collectors and collections,missiles and missile defense systems,arms control and limitation and disarmament,constitutions,shopping centers,tax evasion,design,free agents (sports),genetic engineering,pilots,military aircraft,liability for products,real estate,veterans,antiques,missiles,furniture,drug traffic,discount selling,savings,fruit,deaths,united states open (tennis),english language,records and achievements,united states foreign service,recording equipment,nightclubs and cabarets,beer,brokers and brokerage firms,buildings (structures),"arbitration, conciliation and mediation",hazardous and toxic substances,defense contracts,arms sales abroad,military personnel,missing persons,concerts and recitals,drug addiction and abuse,geographic profiles,geography,fast food industry,labeling and labels,military bases and installations,vice presidents and vice presidency (us),volunteers,layoffs (labor),income,gifts,treaties,shootings,city councils,social conditions and trends,urban areas,shows (exhibits),data processing (computers),stock prices and trading volume,office buildings,immigration and emigration,kurds,income tax,software products,personal computers,summit conferences,prices,bombs and bomb plots,racial relations,search and seizure,museums,"health, personal",contracts,industry profiles,refugees and expatriates,trials,disclosure of information,unemployment and job market,suburbs,special sections,reform and reorganization,cooperatives,federal aid (us),relocation of business,cable television,attorneys general,electronics,book reviews,"names, organizational","minorities (ethnic, racial, religious)","new models, design and products",terms not available,account changes,surveys and series,military action,whitewater case,company and organization profiles,savings and loan associations,art shows,independence movements,life styles,suits and claims against government,presidential election of 1996,forecasts,threats and threatening messages,persian gulf war,pr_themes_np,Top/News_np,Top/News/Sports_np,Top/Features/Travel/Guides/Activities and Interests/Golf_np,Top/News/Sports/Golf_np,Top/News/Business_np,Top/Opinion/Opinion_np,Top/Opinion_np,Top/News/Education_np,Top/Classifieds/Job Market/Job Categories/Education_np,Top/Features/Travel/Guides/Destinations/North America/United States/New York/New York City_np,Top/News/U.S./Mid-Atlantic_np,Top/Opinion/Opinion/Op-Ed_np,Top/Features/Travel/Guides/Destinations/North America/United States_np,Top/Features/Travel/Guides/Destinations/North America_np,Top/News/Technology_np,Top/News/U.S._np,Top/News/New York and Region_np,"Top/News/U.S./U.S. States, Territories and Possessions/New York_np",Top/Features/Travel/Guides/Destinations/North America/United States/California_np,Top/Features/Books_np,"Top/News/U.S./U.S. States, Territories and Possessions/California_np",Top/Opinion/Opinion/Editorials_np,Top/Features/Travel/Guides/Activities and Interests/Family_np,Top/Opinion/Opinion/Op-Ed/Contributors_np,Top/Features/Travel/Guides/Destinations/Europe_np,Top/Features/Movies/News and Features_np,Top/Features/Arts/Music_np,Top/Features/Travel/Guides/Activities and Interests/Music_np,Top/Features/Arts_np,Top/Classifieds/Paid Death Notices_np,Top/Features/Movies_np,Top/Features/Travel/Guides/Destinations/Asia/China_np,"Top/Classifieds/Job Market/Job Categories/Marketing, Advertising and PR_np",Top/Features/Travel/Guides/Destinations/Asia_np,"Top/News/U.S./U.S. States, Territories and Possessions/Arizona_np",Top/Features/Travel/Guides/Destinations/North America/United States/Arizona_np,Top/News/U.S./Rockies_np,Top/Features/Travel/Guides/Destinations/North America/United States/New Jersey_np,Top/Features/Books/Book Reviews_np,Top/Features/Travel/Guides/Destinations/Asia/Pakistan_np,Top/News/World/Asia Pacific_np,Top/News/World/Countries and Territories/Pakistan_np,Top/News/World_np,Top/News/World/Countries and Territories/Afghanistan_np,Top/Features/Travel/Guides/Destinations/North America/United States/South Carolina_np,Top/Features/Travel/Guides/Destinations/Middle East/Israel_np,Top/Features/Travel/Guides/Destinations/Middle East_np,Top/News/World/Middle East_np,Top/Features/Travel/Guides/Destinations/Middle East/Iran_np,Top/News/World/Countries and Territories/Israel_np,Top/Features/Travel/Guides/Destinations/North America/United States/Colorado_np,Top/News/New York and Region/New Jersey_np,Top/Features/Travel/Guides/Destinations/Central and South America_np,Top/Features/Travel/Guides/Destinations/Central and South America/Colombia_np,Top/Features/Travel/Guides/Destinations/Africa/Kenya_np,Top/Features/Travel/Guides/Activities and Interests/Food and Wine_np,"Top/News/U.S./U.S. States, Territories and Possessions/Massachusetts_np",Top/News/Sports/Pro Football/National Football League/Washington Redskins_np,education and schools_np,teachers and school employees_np,privacy_np,politics and government_np,law and legislation_np,tests and testing_np,computers and the internet_np,finances_np,abortion_np,no index terms from nytimes_np,privatization_np,books and literature_np,motion pictures_np,united states politics and government_np,christians and christianity_np,religion and churches_np,advertising and marketing_np,budgets and budgeting_np,elections_np,medicine and health_np,presidents and presidency (us)_np,presidential elections (us)_np,minorities (us)_np,recordings (audio)_np,handicapped_np,homosexuality_np,labor_np,suits and litigation_np,colleges and universities_np,recordings (video)_np,blacks_np,public opinion_np,primaries_np,lobbying and lobbyists_np,hispanic-americans_np,"armament, defense and military forces_np",appointments and executive changes_np,copyrights_np,philanthropy_np,mathematics_np,recession and depression_np,reading and writing skills_np,writing and writers_np,ratings and rating systems_np,jews_np,language and languages_np,television_np,computer software_np,police_np,taxation_np,governors (us)_np,oil (petroleum) and gasoline_np,news and news media_np,global warming_np,environment_np,islam_np,presidential election of 1988_np,drug abuse and traffic_np,marijuana_np,women_np,church-state relations_np,editorials_np,gun control_np,election issues_np,immigration and refugees_np,sex_np,"awards, decorations and honors_np",terrorism_np,nazi policies toward jews and minorities_np,weather_np,electronic mail_np,quotation of the day_np,decisions and verdicts_np,equal educational opportunities_np,libraries and librarians_np,advertising_np,baseball_np,illegal aliens_np,media_np,crime and criminals_np,roads and traffic_np,automobiles_np,ethics_np,art_np,property taxes_np,speech_np,freedom of speech and expression_np,political advertising_np,reviews_np,sex crimes_np,prostitution_np,insurance_np,hurricanes and tropical storms_np,hurricane katrina_np,floods_np,election results_np,strikes_np,united states armament and defense_np,basketball_np,horse racing_np,united states international relations_np,international relations_np,firearms_np,health insurance and managed care_np,health insurance_np,discrimination_np,music_np,airlines and airplanes_np,drugs (pharmaceuticals)_np,diseases and conditions_np,banks and banking_np,college athletics_np,football_np,impeachment_np,frauds and swindling_np,new year_np,correction stories_np,trees and shrubs_np,home repairs_np,olympic games_np,apparel_np,home furnishings_np,earthquakes_np,home repairs and improvements_np,world trade center (nyc)_np,fish and other marine life_np,office buildings and commercial properties_np,noise_np,legislatures and parliaments_np,tuition_np,presidential election of 2004_np,mayors_np,soccer_np,restaurants_np,unemployment_np,biographical information_np,radio_np,"conventions, national (us)_np",computer and video games_np,presidential election of 2008_np,super bowl_np,demonstrations and riots_np,marriages_np,deaths (obituaries)_np,accidents and safety_np,standards and standardization_np,referendums_np,exercise_np,children and youth_np,murders and attempted murders_np,international trade and world market_np,wages and salaries_np,coaches and managers_np,archaeology and anthropology_np,palestinians_np,birth control and family planning_np,economic conditions and trends_np,united states economy_np,telephones and telecommunications_np,restoration and rehabilitation_np,dairy products_np,animals_np,sales_np,"prices (fares, fees and rates)_np",energy and power_np,atomic weapons_np,holidays and special occasions_np,medicaid_np,medicare_np,christmas_np,"war crimes, genocide and crimes against humanity_np",presidential election of 2000_np,fires and firefighters_np,fires and firemen_np,air pollution_np,robberies and thefts_np,conventions and conferences_np,food_np,diet and nutrition_np,stocks and bonds_np,electric light and power_np,light_np,blackouts and brownouts (electrical)_np,theater_np,hijacking_np,pentagon building_np,"suspensions, dismissals and resignations_np",scholarships and fellowships_np,newspapers_np,travel and vacations_np,building (construction)_np,games_np,torture_np,cellular telephones_np,sentences (criminal)_np,bridges and tunnels_np,affirmative action_np,credit_np,birds_np,space_np,postal service_np,pornography and obscenity_np,steroids_np,embargoes and economic sanctions_np,smoking and tobacco_np,social security (us)_np,child care_np,inventions and patents_np,vaccination and immunization_np,prisons and prisoners_np,retirement_np,currency_np,transit systems_np,subways_np,snow and snowstorms_np,housing_np,priests_np,company reports_np,corporations_np,layoffs and job reductions_np,magazines_np,aged_np,viruses_np,biological and chemical warfare_np,opera_np,parades_np,states (us)_np,constitutional amendments_np,cancer_np,pensions and retirement plans_np,child abuse and neglect_np,government employees_np,culture_np,blacks (in us)_np,radiation_np,documentary films and programs_np,retail stores and trade_np,spanish language_np,"mergers, acquisitions and divestitures_np",small business_np,poetry and poets_np,rock music_np,identification devices_np,space shuttle_np,atomic energy_np,interest rates_np,police brutality and misconduct_np,science and technology_np,running_np,marathon running_np,research_np,weight_np,homeless persons_np,cocaine and crack cocaine_np,suicides and suicide attempts_np,bicycles and bicycling_np,buses_np,pregnancy and obstetrics_np,contests and prizes_np,vetoes (us)_np,jewels and jewelry_np,academy awards (oscars)_np,parties (social)_np,festivals_np,"divorce, separations and annulments_np",gas (fuel)_np,photography_np,comedy and humor_np,world series_np,hotels and motels_np,serial murders_np,textiles_np,gambling_np,cooking and cookbooks_np,recipes_np,beverages_np,tennis_np,shoes and boots_np,dogs_np,"hockey, ice_np",extradition_np,boxing_np,"indians, american_np",violence_np,chemicals_np,sports of the times (times column)_np,arson_np,vietnam war_np,boycotts_np,toys_np,cruises_np,ships and shipping_np,trade shows and fairs_np,mental health and disorders_np,wines_np,alcoholic beverages_np,dancing_np,golf_np,auctions_np,mutual funds_np,swimming_np,historic buildings and sites_np,weddings and engagements_np,freedom and human rights_np,athletics and sports_np,draft and recruitment (sports)_np,hospitals_np,genetics and heredity_np,foreign aid_np,anthrax_np,acquired immune deficiency syndrome_np,insects_np,consumer protection_np,mines and mining_np,blood_np,doctors_np,nursing and nurses_np,airports_np,water_np,death and dying_np,dna (deoxyribonucleic acid)_np,third world and developing countries_np,food contamination and poisoning_np,agriculture_np,livestock_np,acquired immune deficiency syndrome (aids)_np,no index terms_np,regulation and deregulation of industry_np,taxicabs and taxicab drivers_np,meat_np,babies_np,shortages_np,nasdaq composite index_np,government bonds_np,security and warning systems_np,grain_np,transplants_np,freedom of the press_np,metals and minerals_np,computer security_np,bombs and explosives_np,population_np,mortgages_np,customs (tariff)_np,farmers_np,automobile racing_np,biology and biochemistry_np,anatomy and physiology_np,production_np,factories and industrial plants_np,track and field_np,summer games (olympics)_np,foreign investments_np,stadiums and arenas_np,foreign service_np,ncaa basketball tournament_np,waste materials and disposal_np,hunting and trapping_np,deportation_np,casinos_np,world cup (soccer)_np,reproduction (biological)_np,surgery and surgeons_np,kidnapping_np,heart_np,alcohol abuse_np,domestic violence_np,capital punishment_np,hostages_np,world war ii (1939-45)_np,war and revolution_np,civil war and guerrilla warfare_np,jury system_np,entertainment and amusements_np,child abuse_np,sexual harassment_np,war crimes and criminals_np,censorship_np,railroads_np,asylum (political)_np,legal profession_np,courts_np,political prisoners_np,prisoners of war_np,men_np,dow jones stock average_np,probation and parole_np,drunken and reckless driving_np,organized crime_np,futures and options trading_np,securities and commodities violations_np,assaults_np,physics_np,bribery_np,debating_np,recalls and bans of products_np,credit and money cards_np,drought_np,fines (penalties)_np,perjury_np,bars_np,trades (sports)_np,skiing_np,attacks on police_np,race_np,bankruptcies_np,bridge (card game)_np,renting and leasing_np,condominiums_np,commuting_np,stations and terminals (passenger)_np,shutdowns (institutional)_np,beaches_np,families and family life_np,brain_np,book trade_np,futures trading_np,federal taxes (us)_np,tax credits_np,assassinations and attempted assassinations_np,recycling of waste materials_np,automobile insurance and liability_np,delays (transportation)_np,transportation_np,flowers and plants_np,steel and iron_np,chemistry_np,anti-semitism_np,soft drinks_np,consumer behavior_np,parks and other recreation areas_np,leisure_np,gardens and gardening_np,figure skating_np,ice skating_np,"fishing, sport_np",executives and management_np,coups d'etat and attempted coups d'etat_np,supermarkets_np,rescues_np,accounting and accountants_np,rain_np,judges_np,water pollution_np,satellites_np,trucks and trucking_np,playoff games_np,"fishing, commercial_np",antitrust actions and laws_np,royal family_np,personal finances_np,parking_np,utility vehicles and other light trucks_np,racketeering and racketeers_np,interscholastic athletics_np,chess_np,jazz_np,breast_np,explosions_np,foster care_np,classical music_np,intelligence_np,navies_np,architecture_np,organized labor_np,bakeries and baked products_np,espionage_np,local government_np,interior design_np,guards_np,computer chips_np,boats and boating_np,forests and forestry_np,zoning_np,hiring and promotion_np,area planning and renewal_np,marketing and merchandising_np,embezzlement_np,boards of directors_np,smuggling_np,land use policies_np,monuments and memorials_np,endangered and extinct species_np,"age, chronological_np",welfare (us)_np,child custody and support_np,wiretapping and other eavesdropping devices and methods_np,ferries_np,history_np,collectors and collections_np,missiles and missile defense systems_np,arms control and limitation and disarmament_np,constitutions_np,shopping centers_np,tax evasion_np,design_np,free agents (sports)_np,genetic engineering_np,pilots_np,military aircraft_np,liability for products_np,real estate_np,veterans_np,antiques_np,missiles_np,furniture_np,drug traffic_np,discount selling_np,savings_np,fruit_np,deaths_np,united states open (tennis)_np,english language_np,records and achievements_np,united states foreign service_np,recording equipment_np,nightclubs and cabarets_np,beer_np,brokers and brokerage firms_np,buildings (structures)_np,"arbitration, conciliation and mediation_np",hazardous and toxic substances_np,defense contracts_np,arms sales abroad_np,military personnel_np,missing persons_np,concerts and recitals_np,drug addiction and abuse_np,geographic profiles_np,geography_np,fast food industry_np,labeling and labels_np,military bases and installations_np,vice presidents and vice presidency (us)_np,volunteers_np,layoffs (labor)_np,income_np,gifts_np,treaties_np,shootings_np,city councils_np,social conditions and trends_np,urban areas_np,shows (exhibits)_np,data processing (computers)_np,stock prices and trading volume_np,office buildings_np,immigration and emigration_np,kurds_np,income tax_np,software products_np,personal computers_np,summit conferences_np,prices_np,bombs and bomb plots_np,racial relations_np,search and seizure_np,museums_np,"health, personal_np",contracts_np,industry profiles_np,refugees and expatriates_np,trials_np,disclosure of information_np,unemployment and job market_np,suburbs_np,special sections_np,reform and reorganization_np,cooperatives_np,federal aid (us)_np,relocation of business_np,cable television_np,attorneys general_np,electronics_np,book reviews_np,"names, organizational_np","minorities (ethnic, racial, religious)_np","new models, design and products_np",terms not available_np,account changes_np,surveys and series_np,military action_np,whitewater case_np,company and organization profiles_np,savings and loan associations_np,art shows_np,independence movements_np,life styles_np,suits and claims against government_np,presidential election of 1996_np,forecasts_np,threats and threatening messages_np,persian gulf war_np
1,blah,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


================================================
FILE: modin/tests/pandas/data/issue_1930.csv
================================================
,col1,col2,col3,col4,col5
0,0,4,8,12,0
1,1,5,9,13,0
2,2,6,10,14,0
3,3,7,11,15,0


================================================
FILE: modin/tests/pandas/data/issue_2074.csv
================================================
one,two, three, five, six, seven, eight
three,three, five, six, seven, eight, nine
one,four, three, five, six, seven, eight
one,two, three, five, six, seven, eight
one,two, three, five, six, seven, eight
one,two, three, five, six, seven, eight
three,four, five, six, seven, eight, nine
one,two, three, five, six, seven, eight
three,four, five, six, seven, eight, nine
three,four, five, six, seven, eight, nine
three,four, five, six, seven, eight, nine
three,four, five, six, seven, eight, nine


================================================
FILE: modin/tests/pandas/data/issue_2239.csv
================================================
1585542839.000000, 1585542839.000000, 1585542839.000000
32.000000, 32.000000, 32.000000
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51


================================================
FILE: modin/tests/pandas/data/issue_3119.csv
================================================
,a,b,c
i1,0,1,2
i2,3,4,5
i3,6,7,8
i4,9,10,11


================================================
FILE: modin/tests/pandas/data/issue_4543.csv
================================================
str_data,float_data,country
fanta,3.14,usa
cocacola,9.8,france
sprite,89.2,china


================================================
FILE: modin/tests/pandas/data/issue_976.csv
================================================
1;11800000560005;11800000560005;  ;;-;. ;. i; ; ;105.6000
1;10200007400477;10200007400477;  ;;-;. ;;³ ; ;696.6400
1;11100008540930;11100008540930;  ;2;9;. ;.; ; ;124.4800
1;12300000051493;12300000051493;  ;;50;. ;.;'- ; ;-0.4700
1;12300000117460;12300000117460; ³ ;;60;. ;;'- ; ;221.0400


================================================
FILE: modin/tests/pandas/data/multiple_csv/test_data0.csv
================================================
a,b,c
0,True,x
1,False,y
2,True,z
3,False,w


================================================
FILE: modin/tests/pandas/data/multiple_csv/test_data1.csv
================================================
a,b,c
4,True,m
5,False,n
6,True,t
7,True,l


================================================
FILE: modin/tests/pandas/data/newlines.csv
================================================
col1,col2,col3,col4
"This is a very long
string with several
newline characters
that will probably cause some
problem for Modin
and I suspect that
we
will hopefully
reproduce the issue",2,3,4
"H",2,3,4
"I",2,3,4
"J",2,3,4
"And there is another
string with several
newline characters
that will probably cause some
problem for Modin
and I suspect that
we
will hopefully
reproduce the issue",2,3,4
"I",2,3,4
"J",2,3,4
"H",2,3,4
"I",2,3,4
"J",2,3,4
"H",2,3,4
"I",2,3,4
"And there is another
string with several
newline characters
that will probably cause some
problem for Modin
and I suspect that
we
will hopefully
reproduce the issue",2,"And
there is another
string with several
newline characters
that will probably cause some
problem for Modin
and I suspect that
we
will hopefully
reproduce the issue",4
"I",2,3,4
"J",2,3,4
"H",2,3,4
"I",2,3,4
"J",2,3,4
"H",2,3,4
"I",2,3,4
"And there is another
string with several
newline characters
that will probably cause some
problem for Modin
and I suspect that
we
will hopefully
reproduce the issue",2,3,4


================================================
FILE: modin/tests/pandas/data/test_categories.csv
================================================
111,AAA
222,BBB
333,CCC


================================================
FILE: modin/tests/pandas/data/test_categories.json
================================================
{"one":{"0":111,"1":222,"2":333},"two":{"0":"AAA","1":"BBB","2":"CCC"}}

================================================
FILE: modin/tests/pandas/data/test_data.fwf
================================================
ACW000116041961TAVG -142  k  183  k  419  k  720  k 1075  k 1546  k 1517  k 1428  k 1360  k 1121  k  457  k  -92  k
ACW000116041962TAVG   60  k   32  k -207  k  582  k  855  k 1328  k 1457  k 1340  k 1110  k  941  k  270  k -179  k
ACW000116041963TAVG -766  k -606  k -152  k  488  k 1171  k 1574  k 1567  k 1543  k 1279  k  887  k  513  k -161  k
ACW000116041964TAVG    9  k -138  k    2  k  685  k 1166  k 1389  k 1453  k 1504  k 1168  k  735  k  493  k   59  k
ACW000116041965TAVG   -9  k -158  k  -15  k  537  k  934  k 1447  k 1434  k 1424  k 1324  k  921  k  -22  k -231  k
ACW000116041966TAVG -490  k -614  k  108  k  246  k 1082  k 1642  k 1620  k 1471  k 1195  k  803  k  329  k    2  k
ACW000116041967TAVG -270  k   36  k  397  k  481  k 1052  k 1373  k 1655  k 1598  k 1318  k  997  k  559  k  -96  k
ACW000116041968TAVG -306  k -183  k  220  k  714  k  935  k 1635  k 1572  k 1718  k 1331  k  781  k  180  k  -56  k
ACW000116041969TAVG -134  k -494  k -185  k  497  k  962  k 1634  k 1687  k 1773  k 1379  k  932  k  321  k -275  k
ACW000116041970TAVG -483  k -704  k  -75  k  261  k 1093  k 1724  k 1470  k 1609  k 1163  k  836  k  300  k   73  k
ACW000116041971TAVG   -6  k   83  k  -40  k  472  k 1180  k 1411  k 1700  k 1600  k 1165  k  908  k  361  k  383  k
ACW000116041972TAVG -377  k   -4  k  250  k  556  k 1117  k 1444  k 1778  k 1545  k 1073  k  797  k  481  k  404  k
ACW000116041973TAVG   61  k  169  k  453  k  472  k 1075  k 1545  k 1866  k 1579  k 1199  k  563  k  154  k   11  k
ACW000116041974TAVG  191  k  209  k  339  k  748  k 1094  k 1463  k 1498  k 1541  k 1319  k  585  k  428  k  335  k
ACW000116041975TAVG  346  k   88  k  198  k  488  k 1165  k 1483  k 1756  k 1906  k 1374  k  845  k  406  k  387  k
ACW000116041976TAVG -163  k  -62  k -135  k  502  k 1128  k 1461  k 1822  k 1759  k 1136  k  715  k  458  k -205  k
ACW000116041977TAVG -192  k -279  k  234  k  332  k 1128  k 1566  k 1565  k 1556  k 1126  k  949  k  421  k  162  k
ACW000116041978TAVG   55  k -354  k   66  k  493  k 1155  k 1552  k 1564  k 1555  k 1061  k  932  k  688  k -464  k
ACW000116041979TAVG -618  k -632  k   35  k  474  k  993  k 1566  k 1484  k 1483  k 1229  k  647  k  412  k  -40  k
ACW000116041980TAVG -340  k -500  k  -35  k  524  k 1071  k 1534  k 1655  k 1502  k 1269  k  660  k  138  k  125  k

================================================
FILE: modin/tests/pandas/data/test_data.json
================================================
{"Duration":60,"Pulse":110,"Maxpulse":130,"Calories":409}
{"Duration":60,"Pulse":117,"Maxpulse":145,"Calories":479}
{"Duration":60,"Pulse":103,"Maxpulse":135,"Calories":340}
{"Duration":45,"Pulse":109,"Maxpulse":175,"Calories":282}
{"Duration":45,"Pulse":117,"Maxpulse":148,"Calories":406}
{"Duration":60,"Pulse":102,"Maxpulse":127,"Calories":300}


================================================
FILE: modin/tests/pandas/data/test_delim.csv
================================================
a|b|c|d|e
1|2|3|4|5
2|3|4|5|6
3|4|5|6|7
4|5|6|7|8
5|6|7|8|9
6|7|8|9|0


================================================
FILE: modin/tests/pandas/data/test_different_columns_in_rows.json
================================================
{"a1": 1}
{"a2": 1}
{"a3": 2}
{"a4": 1}
{"a5": 2}
{"a6": 1}
{"a7": 2}
{"a8": 1}
{"a9": 2}
{"a10": 1}
{"a11": 2}
{"a12": 1}
{"a13": 2}
{"a14": 1}
{"a15": 2}
{"a16": 2}


================================================
FILE: modin/tests/pandas/data/test_null_col.csv
================================================
a,b,c
1,1,
2,2,
3,3,


================================================
FILE: modin/tests/pandas/data/test_time_parsing.csv
================================================
timestamp,year,month,date,symbol,high,low,open,close,spread,volume
2010-04-01 00:00:00,2010,04,01,USD/JPY,93.52600,93.36100,93.51800,93.38200,0.00500,3049
2010-04-01 00:30:00,2010,04,01,USD/JPY,93.47500,93.35200,93.38500,93.39100,0.00600,2251
2010-04-01 01:00:00,2010,04,01,USD/JPY,93.42100,93.32600,93.39100,93.38400,0.00600,1577

================================================
FILE: modin/tests/pandas/data/test_usecols.csv
================================================
a,b,c,d,e
1,2,3,4,5
2,3,4,5,6
3,4,5,6,7
4,5,6,7,8
5,6,7,8,9
6,7,8,9,0


================================================
FILE: modin/tests/pandas/dataframe/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/pandas/dataframe/test_binary.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import matplotlib
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions, StorageFormat
from modin.core.dataframe.pandas.partitioning.axis_partition import (
    PandasDataframeAxisPartition,
)
from modin.core.storage_formats.pandas.query_compiler_caster import (
    _assert_casting_functions_wrap_same_implementation,
)
from modin.tests.pandas.utils import (
    CustomIntegerForAddition,
    NonCommutativeMultiplyInteger,
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    test_data,
    test_data_keys,
    test_data_values,
)
from modin.tests.test_utils import (
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)
from modin.utils import get_current_execution

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


@pytest.mark.parametrize(
    "other",
    [
        lambda df, axis: 4,
        lambda df, axis: df.iloc[0] if axis == "columns" else list(df[df.columns[0]]),
        lambda df, axis: {
            label: idx + 1
            for idx, label in enumerate(df.axes[0 if axis == "rows" else 1])
        },
        lambda df, axis: {
            label if idx % 2 else f"random_key{idx}": idx + 1
            for idx, label in enumerate(df.axes[0 if axis == "rows" else 1][::-1])
        },
    ],
    ids=[
        "scalar",
        "series_or_list",
        "dictionary_keys_equal_columns",
        "dictionary_keys_unequal_columns",
    ],
)
@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize(
    "op",
    [
        *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
        *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
    ],
)
@pytest.mark.parametrize("backend", [None, "pyarrow"])
def test_math_functions(other, axis, op, backend):
    data = test_data["float_nan_data"]
    if (op == "floordiv" or op == "rfloordiv") and axis == "rows":
        # lambda == "series_or_list"
        pytest.xfail(reason="different behavior")

    if op == "rmod" and axis == "rows":
        # lambda == "series_or_list"
        pytest.xfail(reason="different behavior")

    if op in ("mod", "rmod") and backend == "pyarrow":
        pytest.skip(reason="These functions are not implemented in pandas itself")
    eval_general(
        *create_test_dfs(data, backend=backend),
        lambda df: getattr(df, op)(other(df, axis), axis=axis),
    )


@pytest.mark.parametrize("other", [lambda df: 2, lambda df: df])
def test___divmod__(other):
    data = test_data["float_nan_data"]
    eval_general(*create_test_dfs(data), lambda df: divmod(df, other(df)))


def test___rdivmod__():
    data = test_data["float_nan_data"]
    eval_general(*create_test_dfs(data), lambda df: divmod(2, df))


@pytest.mark.parametrize(
    "other",
    [lambda df: df[: -(2**4)], lambda df: df[df.columns[0]].reset_index(drop=True)],
    ids=["check_missing_value", "check_different_index"],
)
@pytest.mark.parametrize("fill_value", [None, 3.0])
@pytest.mark.parametrize(
    "op",
    [
        *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
        *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
    ],
)
def test_math_functions_fill_value(other, fill_value, op, request):
    data = test_data["int_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    expected_exception = None
    if "check_different_index" in request.node.callspec.id and fill_value == 3.0:
        expected_exception = NotImplementedError("fill_value 3.0 not supported.")

    eval_general(
        modin_df,
        pandas_df,
        lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
        expected_exception=expected_exception,
        # This test causes an empty slice to be generated thus triggering:
        # https://github.com/modin-project/modin/issues/5974
        comparator_kwargs={"check_dtypes": get_current_execution() != "BaseOnPython"},
    )


@pytest.mark.parametrize(
    "op",
    [
        *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
        *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
    ],
)
def test_math_functions_level(op):
    modin_df = pd.DataFrame(test_data["int_data"])
    modin_df.index = pandas.MultiIndex.from_tuples(
        [(i // 4, i // 2, i) for i in modin_df.index]
    )

    # Defaults to pandas
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_df)
    ):
        # Operation against self for sanity check
        getattr(modin_df, op)(modin_df, axis=0, level=1)


@pytest.mark.parametrize(
    "math_op, alias",
    [
        ("truediv", "divide"),
        ("truediv", "div"),
        ("rtruediv", "rdiv"),
        ("mul", "multiply"),
        ("sub", "subtract"),
        ("add", "__add__"),
        ("radd", "__radd__"),
        ("truediv", "__truediv__"),
        ("rtruediv", "__rtruediv__"),
        ("floordiv", "__floordiv__"),
        ("rfloordiv", "__rfloordiv__"),
        ("mod", "__mod__"),
        ("rmod", "__rmod__"),
        ("mul", "__mul__"),
        ("rmul", "__rmul__"),
        ("pow", "__pow__"),
        ("rpow", "__rpow__"),
        ("sub", "__sub__"),
        ("rsub", "__rsub__"),
    ],
)
def test_math_alias(math_op, alias):
    _assert_casting_functions_wrap_same_implementation(
        getattr(pd.DataFrame, math_op), getattr(pd.DataFrame, alias)
    )


@pytest.mark.parametrize("other", ["as_left", 4, 4.0, "a"])
@pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_comparison(data, op, other, request):
    def operation(df):
        return getattr(df, op)(df if other == "as_left" else other)

    expected_exception = None
    if "int_data" in request.node.callspec.id and other == "a":
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7019")
    elif "float_nan_data" in request.node.callspec.id and other == "a":
        expected_exception = TypeError(
            "Invalid comparison between dtype=float64 and str"
        )

    eval_general(
        *create_test_dfs(data),
        operation=operation,
        expected_exception=expected_exception,
    )


@pytest.mark.skipif(
    StorageFormat.get() != "Pandas",
    reason="Modin on this engine doesn't create virtual partitions.",
)
@pytest.mark.parametrize(
    "left_virtual,right_virtual", [(True, False), (False, True), (True, True)]
)
def test_virtual_partitions(left_virtual: bool, right_virtual: bool):
    # This test covers https://github.com/modin-project/modin/issues/4691
    n: int = 1000
    pd_df = pandas.DataFrame(list(range(n)))

    def modin_df(is_virtual):
        if not is_virtual:
            return pd.DataFrame(pd_df)
        result = pd.concat([pd.DataFrame([i]) for i in range(n)], ignore_index=True)
        # Modin should rebalance the partitions after the concat, producing virtual partitions.
        assert isinstance(
            result._query_compiler._modin_frame._partitions[0][0],
            PandasDataframeAxisPartition,
        )
        return result

    df_equals(modin_df(left_virtual) + modin_df(right_virtual), pd_df + pd_df)


@pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_multi_level_comparison(data, op):
    modin_df_multi_level = pd.DataFrame(data)

    new_idx = pandas.MultiIndex.from_tuples(
        [(i // 4, i // 2, i) for i in modin_df_multi_level.index]
    )
    modin_df_multi_level.index = new_idx

    # Defaults to pandas
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_df_multi_level)
    ):
        # Operation against self for sanity check
        getattr(modin_df_multi_level, op)(modin_df_multi_level, axis=0, level=1)


@pytest.mark.parametrize(
    "frame1_data,frame2_data,expected_pandas_equals",
    [
        pytest.param({}, {}, True, id="two_empty_dataframes"),
        pytest.param([[1]], [[0]], False, id="single_unequal_values"),
        pytest.param([[None]], [[None]], True, id="single_none_values"),
        pytest.param([[np.nan]], [[np.nan]], True, id="single_nan_values"),
        pytest.param({1: [10]}, {1.0: [10]}, True, id="different_column_types"),
        pytest.param({1: [10]}, {2: [10]}, False, id="different_columns"),
        pytest.param(
            pandas.DataFrame({1: [10]}, index=[1]),
            pandas.DataFrame({1: [10]}, index=[1.0]),
            True,
            id="different_index_types",
        ),
        pytest.param(
            pandas.DataFrame({1: [10]}, index=[1]),
            pandas.DataFrame({1: [10]}, index=[2]),
            False,
            id="different_indexes",
        ),
        pytest.param({1: [10]}, {1: [10.0]}, False, id="different_value_types"),
        pytest.param(
            [[1, 2], [3, 4]],
            [[1, 2], [3, 4]],
            True,
            id="equal_two_by_two_dataframes",
        ),
        pytest.param(
            [[1, 2], [3, 4]],
            [[5, 2], [3, 4]],
            False,
            id="unequal_two_by_two_dataframes",
        ),
        pytest.param(
            [[1, 1]],
            [[1]],
            False,
            id="different_row_lengths",
        ),
        pytest.param(
            [[1], [1]],
            [[1]],
            False,
            id="different_column_lengths",
        ),
    ],
)
def test_equals(frame1_data, frame2_data, expected_pandas_equals):
    modin_df1 = pd.DataFrame(frame1_data)
    pandas_df1 = pandas.DataFrame(frame1_data)
    modin_df2 = pd.DataFrame(frame2_data)
    pandas_df2 = pandas.DataFrame(frame2_data)

    pandas_equals = pandas_df1.equals(pandas_df2)
    assert pandas_equals == expected_pandas_equals, (
        "Test expected pandas to say the dataframes were"
        + f"{'' if expected_pandas_equals else ' not'} equal, but they were"
        + f"{' not' if expected_pandas_equals else ''} equal."
    )

    assert modin_df1.equals(modin_df2) == pandas_equals
    assert modin_df1.equals(pandas_df2) == pandas_equals


def test_equals_several_partitions():
    modin_series1 = pd.concat([pd.DataFrame([0, 1]), pd.DataFrame([None, 1])])
    modin_series2 = pd.concat([pd.DataFrame([0, 1]), pd.DataFrame([1, None])])
    assert not modin_series1.equals(modin_series2)


def test_equals_with_nans():
    df1 = pd.DataFrame([0, 1, None], dtype="uint8[pyarrow]")
    df2 = pd.DataFrame([None, None, None], dtype="uint8[pyarrow]")
    assert not df1.equals(df2)


@pytest.mark.parametrize("is_more_other_partitions", [True, False])
@pytest.mark.parametrize(
    "op_type", ["df_ser", "df_df", "ser_ser_same_name", "ser_ser_different_name"]
)
@pytest.mark.parametrize(
    "is_idx_aligned", [True, False], ids=["idx_aligned", "idx_not_aligned"]
)
def test_mismatched_row_partitions(is_idx_aligned, op_type, is_more_other_partitions):
    data = [0, 1, 2, 3, 4, 5]
    modin_df1, pandas_df1 = create_test_dfs({"a": data, "b": data})
    modin_df, pandas_df = modin_df1.loc[:2], pandas_df1.loc[:2]

    modin_df2 = pd.concat((modin_df, modin_df))
    pandas_df2 = pandas.concat((pandas_df, pandas_df))
    if is_more_other_partitions:
        modin_df2, modin_df1 = modin_df1, modin_df2
        pandas_df2, pandas_df1 = pandas_df1, pandas_df2

    if is_idx_aligned:
        if is_more_other_partitions:
            modin_df1.index = pandas_df1.index = pandas_df2.index
        else:
            modin_df2.index = pandas_df2.index = pandas_df1.index

    # Pandas don't support this case because result will contain duplicate values by col axis.
    if op_type == "df_ser" and not is_idx_aligned and is_more_other_partitions:
        eval_general(
            modin_df2,
            pandas_df2,
            lambda df: (
                df / modin_df1.a if isinstance(df, pd.DataFrame) else df / pandas_df1.a
            ),
            expected_exception=ValueError(
                "cannot reindex on an axis with duplicate labels"
            ),
        )
        return

    if op_type == "df_ser":
        modin_res = modin_df2 / modin_df1.a
        pandas_res = pandas_df2 / pandas_df1.a
    elif op_type == "df_df":
        modin_res = modin_df2 / modin_df1
        pandas_res = pandas_df2 / pandas_df1
    elif op_type == "ser_ser_same_name":
        modin_res = modin_df2.a / modin_df1.a
        pandas_res = pandas_df2.a / pandas_df1.a
    elif op_type == "ser_ser_different_name":
        modin_res = modin_df2.a / modin_df1.b
        pandas_res = pandas_df2.a / pandas_df1.b
    else:
        raise Exception(f"op_type: {op_type} not supported in test")
    df_equals(modin_res, pandas_res)


def test_duplicate_indexes():
    data = [0, 1, 2, 3, 4, 5]
    modin_df1, pandas_df1 = create_test_dfs(
        {"a": data, "b": data}, index=[0, 1, 2, 0, 1, 2]
    )
    modin_df2, pandas_df2 = create_test_dfs({"a": data, "b": data})
    df_equals(modin_df1 / modin_df2, pandas_df1 / pandas_df2)
    df_equals(modin_df1 / modin_df1, pandas_df1 / pandas_df1)


@pytest.mark.parametrize("subset_operand", ["left", "right"])
def test_mismatched_col_partitions(subset_operand):
    data = [0, 1, 2, 3]
    modin_df1, pandas_df1 = create_test_dfs({"a": data, "b": data})
    modin_df_tmp, pandas_df_tmp = create_test_dfs({"c": data})

    modin_df2 = pd.concat([modin_df1, modin_df_tmp], axis=1)
    pandas_df2 = pandas.concat([pandas_df1, pandas_df_tmp], axis=1)

    if subset_operand == "right":
        modin_res = modin_df2 + modin_df1
        pandas_res = pandas_df2 + pandas_df1
    else:
        modin_res = modin_df1 + modin_df2
        pandas_res = pandas_df1 + pandas_df2

    df_equals(modin_res, pandas_res)


@pytest.mark.parametrize("empty_operand", ["right", "left", "both"])
def test_empty_df(empty_operand):
    modin_df, pandas_df = create_test_dfs([0, 1, 2, 0, 1, 2])
    modin_df_empty, pandas_df_empty = create_test_dfs()

    if empty_operand == "right":
        modin_res = modin_df + modin_df_empty
        pandas_res = pandas_df + pandas_df_empty
    elif empty_operand == "left":
        modin_res = modin_df_empty + modin_df
        pandas_res = pandas_df_empty + pandas_df
    else:
        modin_res = modin_df_empty + modin_df_empty
        pandas_res = pandas_df_empty + pandas_df_empty

    df_equals(modin_res, pandas_res)


def test_add_string_to_df():
    modin_df, pandas_df = create_test_dfs(["a", "b"])
    eval_general(modin_df, pandas_df, lambda df: "string" + df)
    eval_general(modin_df, pandas_df, lambda df: df + "string")


def test_add_custom_class():
    # see https://github.com/modin-project/modin/issues/5236
    # Test that we can add any object that is addable to pandas object data
    # via "+".
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: df + CustomIntegerForAddition(4),
    )


def test_non_commutative_multiply_pandas():
    # The non commutative integer class implementation is tricky. Check that
    # multiplying such an integer with a pandas dataframe is really not
    # commutative.
    pandas_df = pandas.DataFrame([[1]], dtype=int)
    integer = NonCommutativeMultiplyInteger(2)
    assert not (integer * pandas_df).equals(pandas_df * integer)


def test_non_commutative_multiply():
    # This test checks that mul and rmul do different things when
    # multiplication is not commutative, e.g. for adding a string to a string.
    # For context see https://github.com/modin-project/modin/issues/5238
    modin_df, pandas_df = create_test_dfs([1], dtype=int)
    integer = NonCommutativeMultiplyInteger(2)
    eval_general(modin_df, pandas_df, lambda s: integer * s)
    eval_general(modin_df, pandas_df, lambda s: s * integer)


@pytest.mark.parametrize(
    "op",
    [
        *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
        *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
    ],
)
@pytest.mark.parametrize(
    "val1",
    [
        pytest.param([10, 20], id="int"),
        pytest.param([10, True], id="obj"),
        pytest.param([True, True], id="bool"),
        pytest.param([3.5, 4.5], id="float"),
    ],
)
@pytest.mark.parametrize(
    "val2",
    [
        pytest.param([10, 20], id="int"),
        pytest.param([10, True], id="obj"),
        pytest.param([True, True], id="bool"),
        pytest.param([3.5, 4.5], id="float"),
        pytest.param(2, id="int scalar"),
        pytest.param(True, id="bool scalar"),
        pytest.param(3.5, id="float scalar"),
    ],
)
def test_arithmetic_with_tricky_dtypes(val1, val2, op, request):
    modin_df1, pandas_df1 = create_test_dfs(val1)
    modin_df2, pandas_df2 = (
        create_test_dfs(val2) if isinstance(val2, list) else (val2, val2)
    )

    expected_exception = None
    if (
        "bool-bool" in request.node.callspec.id
        or "bool scalar-bool" in request.node.callspec.id
    ) and op in [
        "pow",
        "rpow",
        "truediv",
        "rtruediv",
        "floordiv",
        "rfloordiv",
    ]:
        op_name = op[1:] if op.startswith("r") else op
        expected_exception = NotImplementedError(
            f"operator '{op_name}' not implemented for bool dtypes"
        )
    elif (
        "bool-bool" in request.node.callspec.id
        or "bool scalar-bool" in request.node.callspec.id
    ) and op in ["sub", "rsub"]:
        expected_exception = TypeError(
            "numpy boolean subtract, the `-` operator, is not supported, "
            + "use the bitwise_xor, the `^` operator, or the logical_xor function instead."
        )

    eval_general(
        (modin_df1, modin_df2),
        (pandas_df1, pandas_df2),
        lambda dfs: getattr(dfs[0], op)(dfs[1]),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize(
    "data, other_data",
    [
        ({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "C": [7, 8, 9]}),
        ({"C": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "A": [7, 8, 9]}),
    ],
)
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("match_index", [True, False])
def test_bin_op_mismatched_columns(data, other_data, axis, match_index):
    modin_df, pandas_df = create_test_dfs(data)
    other_modin_df, other_pandas_df = create_test_dfs(other_data)
    if axis == 0:
        if not match_index:
            modin_df.index = pandas_df.index = ["1", "2", "3"]
            other_modin_df.index = other_pandas_df.index = ["2", "1", "3"]
    eval_general(
        modin_df,
        pandas_df,
        lambda df: (
            df.add(other_modin_df, axis=axis)
            if isinstance(df, pd.DataFrame)
            else df.add(other_pandas_df, axis=axis)
        ),
    )


================================================
FILE: modin/tests/pandas/dataframe/test_default.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import io
import warnings

import matplotlib
import numpy as np
import pandas
import pandas._libs.lib as lib
import pyarrow as pa
import pytest
from numpy.testing import assert_array_equal
from packaging.version import Version

import modin.pandas as pd
from modin.config import Backend, Engine, NPartitions, StorageFormat
from modin.pandas.io import to_pandas
from modin.tests.pandas.utils import (
    axis_keys,
    axis_values,
    create_test_dfs,
    create_test_series,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    generate_multiindex,
    modin_df_almost_equals_pandas,
    name_contains,
    numeric_dfs,
    test_data,
    test_data_diff_dtype,
    test_data_keys,
    test_data_large_categorical_dataframe,
    test_data_resample,
    test_data_values,
)
from modin.tests.test_utils import (
    current_execution_is_native,
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)
from modin.utils import get_current_execution

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = [
    pytest.mark.filterwarnings(default_to_pandas_ignore_string),
    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT
    pytest.mark.filterwarnings(
        "ignore:.*bool is now deprecated and will be removed:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:first is deprecated and will be removed:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:last is deprecated and will be removed:FutureWarning"
    ),
]


@pytest.mark.parametrize(
    "op, make_args",
    [
        ("align", lambda df: {"other": df}),
        ("corrwith", lambda df: {"other": df}),
        ("ewm", lambda df: {"com": 0.5}),
        ("from_dict", lambda df: {"data": None}),
        ("from_records", lambda df: {"data": to_pandas(df)}),
        ("hist", lambda df: {"column": "int_col"}),
        ("interpolate", None),
        ("mask", lambda df: {"cond": df != 0}),
        ("pct_change", None),
        ("to_xarray", None),
        ("flags", None),
        ("set_flags", lambda df: {"allows_duplicate_labels": False}),
    ],
)
def test_ops_defaulting_to_pandas(op, make_args):
    modin_df = pd.DataFrame(test_data_diff_dtype).drop(["str_col", "bool_col"], axis=1)
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_df)
    ):
        operation = getattr(modin_df, op)
        if make_args is not None:
            operation(**make_args(modin_df))
        else:
            try:
                operation()
            # `except` for non callable attributes
            except TypeError:
                pass


def test_style():
    data = test_data_values[0]
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.DataFrame(data).style


def test_to_timestamp():
    idx = pd.date_range("1/1/2012", periods=5, freq="M")
    df = pd.DataFrame(np.random.randint(0, 100, size=(len(idx), 4)), index=idx)

    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(df)
    ):
        df.to_period().to_timestamp()


@pytest.mark.parametrize(
    "data",
    test_data_values + [test_data_large_categorical_dataframe],
    ids=test_data_keys + ["categorical_ints"],
)
def test_to_numpy(data):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
    assert_array_equal(modin_df.values, pandas_df.values)


@pytest.mark.skipif(
    StorageFormat.get() != "Pandas",
    reason="NativeQueryCompiler does not contain partitions.",
)
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_partition_to_numpy(data):
    frame = pd.DataFrame(data)
    for partition in frame._query_compiler._modin_frame._partitions.flatten().tolist():
        assert_array_equal(partition.to_pandas().values, partition.to_numpy())


def test_asfreq():
    index = pd.date_range("1/1/2000", periods=4, freq="min")
    series = pd.Series([0.0, None, 2.0, 3.0], index=index)
    df = pd.DataFrame({"s": series})
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(df)
    ):
        # We are only testing that this defaults to pandas, so we will just check for
        # the warning
        df.asfreq(freq="30S")


def test_assign():
    data = test_data_values[0]
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    modin_result = modin_df.assign(new_column=pd.Series(modin_df.iloc[:, 0]))
    pandas_result = pandas_df.assign(new_column=pandas.Series(pandas_df.iloc[:, 0]))
    df_equals(modin_result, pandas_result)
    modin_result = modin_df.assign(
        new_column=pd.Series(modin_df.iloc[:, 0]),
        new_column2=pd.Series(modin_df.iloc[:, 1]),
    )
    pandas_result = pandas_df.assign(
        new_column=pandas.Series(pandas_df.iloc[:, 0]),
        new_column2=pandas.Series(pandas_df.iloc[:, 1]),
    )
    df_equals(modin_result, pandas_result)


def test_at_time():
    i = pd.date_range("2008-01-01", periods=1000, freq="12H")
    modin_df = pd.DataFrame({"A": list(range(1000)), "B": list(range(1000))}, index=i)
    pandas_df = pandas.DataFrame(
        {"A": list(range(1000)), "B": list(range(1000))}, index=i
    )
    df_equals(modin_df.at_time("12:00"), pandas_df.at_time("12:00"))
    df_equals(modin_df.at_time("3:00"), pandas_df.at_time("3:00"))
    df_equals(modin_df.T.at_time("12:00", axis=1), pandas_df.T.at_time("12:00", axis=1))


def test_between_time():
    i = pd.date_range("2008-01-01", periods=1000, freq="12H")
    modin_df = pd.DataFrame({"A": list(range(1000)), "B": list(range(1000))}, index=i)
    pandas_df = pandas.DataFrame(
        {"A": list(range(1000)), "B": list(range(1000))}, index=i
    )
    df_equals(
        modin_df.between_time("12:00", "17:00"),
        pandas_df.between_time("12:00", "17:00"),
    )
    df_equals(
        modin_df.between_time("3:00", "4:00"),
        pandas_df.between_time("3:00", "4:00"),
    )
    df_equals(
        modin_df.T.between_time("12:00", "17:00", axis=1),
        pandas_df.T.between_time("12:00", "17:00", axis=1),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_bfill(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    df_equals(modin_df.bfill(), pandas_df.bfill())


@pytest.mark.parametrize("limit_area", [None, "inside", "outside"])
@pytest.mark.parametrize("method", ["ffill", "bfill"])
def test_ffill_bfill_limit_area(method, limit_area):
    modin_df, pandas_df = create_test_dfs([1, None, 2, None])
    eval_general(
        modin_df, pandas_df, lambda df: getattr(df, method)(limit_area=limit_area)
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_bool(data):
    modin_df = pd.DataFrame(data)

    with pytest.warns(
        FutureWarning, match="bool is now deprecated and will be removed"
    ):
        with pytest.raises(ValueError):
            modin_df.bool()
            modin_df.__bool__()

    single_bool_pandas_df = pandas.DataFrame([True])
    single_bool_modin_df = pd.DataFrame([True])

    assert single_bool_pandas_df.bool() == single_bool_modin_df.bool()

    with pytest.raises(ValueError):
        # __bool__ always raises this error for DataFrames
        single_bool_modin_df.__bool__()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_boxplot(data):
    modin_df = pd.DataFrame(data)

    assert modin_df.boxplot() == to_pandas(modin_df).boxplot()


def test_combine_first():
    data1 = {"A": [None, 0], "B": [None, 4]}
    modin_df1 = pd.DataFrame(data1)
    pandas_df1 = pandas.DataFrame(data1)
    data2 = {"A": [1, 1], "B": [3, 3]}
    modin_df2 = pd.DataFrame(data2)
    pandas_df2 = pandas.DataFrame(data2)
    df_equals(
        modin_df1.combine_first(modin_df2),
        pandas_df1.combine_first(pandas_df2),
        # https://github.com/modin-project/modin/issues/5959
        check_dtypes=False,
    )


class TestCorr:
    @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"])
    @pytest.mark.parametrize("backend", [None, "pyarrow"])
    def test_corr(self, method, backend):
        eval_general(
            *create_test_dfs(test_data["int_data"], backend=backend),
            lambda df: df.corr(method=method),
        )
        # Modin result may slightly differ from pandas result
        # due to floating pointing arithmetic.
        eval_general(
            *create_test_dfs(test_data["float_nan_data"], backend=backend),
            lambda df: df.corr(method=method),
            comparator=modin_df_almost_equals_pandas,
        )

    @pytest.mark.parametrize("min_periods", [1, 3, 5, 6])
    def test_corr_min_periods(self, min_periods):
        # only 3 valid values (a valid value is considered a row with no NaNs)
        eval_general(
            *create_test_dfs({"a": [1, 2, 3], "b": [3, 1, 5]}),
            lambda df: df.corr(min_periods=min_periods),
        )

        # only 5 valid values (a valid value is considered a row with no NaNs)
        eval_general(
            *create_test_dfs(
                {"a": [1, 2, 3, 4, 5, np.nan], "b": [1, 2, 1, 4, 5, np.nan]}
            ),
            lambda df: df.corr(min_periods=min_periods),
        )

        # only 4 valid values (a valid value is considered a row with no NaNs)
        eval_general(
            *create_test_dfs(
                {"a": [1, np.nan, 3, 4, 5, 6], "b": [1, 2, 1, 4, 5, np.nan]}
            ),
            lambda df: df.corr(min_periods=min_periods),
        )

        if StorageFormat.get() == "Pandas":
            # only 4 valid values located in different partitions (a valid value is considered a row with no NaNs)
            modin_df, pandas_df = create_test_dfs(
                {"a": [1, np.nan, 3, 4, 5, 6], "b": [1, 2, 1, 4, 5, np.nan]}
            )
            modin_df = pd.concat([modin_df.iloc[:3], modin_df.iloc[3:]])
            assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
            eval_general(
                modin_df, pandas_df, lambda df: df.corr(min_periods=min_periods)
            )

    @pytest.mark.parametrize("numeric_only", [True, False])
    def test_corr_non_numeric(self, numeric_only):
        if not numeric_only:
            pytest.xfail(reason="https://github.com/modin-project/modin/issues/7023")
        eval_general(
            *create_test_dfs({"a": [1, 2, 3], "b": [3, 2, 5], "c": ["a", "b", "c"]}),
            lambda df: df.corr(numeric_only=numeric_only),
        )

    @pytest.mark.skipif(
        StorageFormat.get() != "Pandas",
        reason="doesn't make sense for non-partitioned executions",
    )
    def test_corr_nans_in_different_partitions(self):
        # NaN in the first partition
        modin_df, pandas_df = create_test_dfs(
            {"a": [np.nan, 2, 3, 4, 5, 6], "b": [3, 4, 2, 0, 7, 8]}
        )
        modin_df = pd.concat([modin_df.iloc[:2], modin_df.iloc[2:4], modin_df.iloc[4:]])

        assert modin_df._query_compiler._modin_frame._partitions.shape == (3, 1)
        eval_general(modin_df, pandas_df, lambda df: df.corr())

        # NaN in the last partition
        modin_df, pandas_df = create_test_dfs(
            {"a": [1, 2, 3, 4, 5, np.nan], "b": [3, 4, 2, 0, 7, 8]}
        )
        modin_df = pd.concat([modin_df.iloc[:2], modin_df.iloc[2:4], modin_df.iloc[4:]])

        assert modin_df._query_compiler._modin_frame._partitions.shape == (3, 1)
        eval_general(modin_df, pandas_df, lambda df: df.corr())

        # NaN in two partitions
        modin_df, pandas_df = create_test_dfs(
            {"a": [np.nan, 2, 3, 4, 5, 6], "b": [3, 4, 2, 0, 7, np.nan]}
        )
        modin_df = pd.concat([modin_df.iloc[:2], modin_df.iloc[2:4], modin_df.iloc[4:]])

        assert modin_df._query_compiler._modin_frame._partitions.shape == (3, 1)
        eval_general(modin_df, pandas_df, lambda df: df.corr())

        # NaN in all partitions
        modin_df, pandas_df = create_test_dfs(
            {"a": [np.nan, 2, 3, np.nan, 5, 6], "b": [3, 4, 2, 0, 7, np.nan]}
        )
        modin_df = pd.concat([modin_df.iloc[:2], modin_df.iloc[2:4], modin_df.iloc[4:]])

        assert modin_df._query_compiler._modin_frame._partitions.shape == (3, 1)
        eval_general(modin_df, pandas_df, lambda df: df.corr())


@pytest.mark.parametrize("min_periods", [1, 3, 5], ids=lambda x: f"min_periods={x}")
@pytest.mark.parametrize("ddof", [1, 2, 4], ids=lambda x: f"ddof={x}")
@pytest.mark.parametrize("backend", [None, "pyarrow"])
def test_cov(min_periods, ddof, backend):
    eval_general(
        *create_test_dfs(test_data["int_data"], backend=backend),
        lambda df: df.cov(min_periods=min_periods, ddof=ddof),
        comparator=df_equals,
    )
    # Modin result may slightly differ from pandas result
    # due to floating pointing arithmetic. That's why we use `modin_df_almost_equals_pandas`.
    eval_general(
        *create_test_dfs(test_data["float_nan_data"], backend=backend),
        lambda df: df.cov(min_periods=min_periods),
        comparator=modin_df_almost_equals_pandas,
    )


@pytest.mark.parametrize("numeric_only", [True, False])
def test_cov_numeric_only(numeric_only):
    if not numeric_only:
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7023")
    eval_general(
        *create_test_dfs({"a": [1, 2, 3], "b": [3, 2, 5], "c": ["a", "b", "c"]}),
        lambda df: df.cov(numeric_only=numeric_only),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dot(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    col_len = len(modin_df.columns)

    # Test list input
    arr = np.arange(col_len)
    modin_result = modin_df.dot(arr)
    pandas_result = pandas_df.dot(arr)
    df_equals(modin_result, pandas_result)

    # Test bad dimensions
    with pytest.raises(ValueError):
        modin_df.dot(np.arange(col_len + 10))

    # Test series input
    modin_series = pd.Series(np.arange(col_len), index=modin_df.columns)
    pandas_series = pandas.Series(np.arange(col_len), index=pandas_df.columns)
    modin_result = modin_df.dot(modin_series)
    pandas_result = pandas_df.dot(pandas_series)
    df_equals(modin_result, pandas_result)

    # Test dataframe input
    modin_result = modin_df.dot(modin_df.T)
    pandas_result = pandas_df.dot(pandas_df.T)
    df_equals(modin_result, pandas_result)

    # Test when input series index doesn't line up with columns
    with pytest.raises(ValueError):
        modin_df.dot(pd.Series(np.arange(col_len)))

    # Test case when left dataframe has size (n x 1)
    # and right dataframe has size (1 x n)
    modin_df = pd.DataFrame(modin_series)
    pandas_df = pandas.DataFrame(pandas_series)
    modin_result = modin_df.dot(modin_df.T)
    pandas_result = pandas_df.dot(pandas_df.T)
    df_equals(modin_result, pandas_result)

    # Test case when left dataframe has size (1 x 1)
    # and right dataframe has size (1 x n)
    modin_result = pd.DataFrame([1]).dot(modin_df.T)
    pandas_result = pandas.DataFrame([1]).dot(pandas_df.T)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_matmul(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    col_len = len(modin_df.columns)

    # Test list input
    arr = np.arange(col_len)
    modin_result = modin_df @ arr
    pandas_result = pandas_df @ arr
    df_equals(modin_result, pandas_result)

    # Test bad dimensions
    with pytest.raises(ValueError):
        modin_df @ np.arange(col_len + 10)

    # Test series input
    modin_series = pd.Series(np.arange(col_len), index=modin_df.columns)
    pandas_series = pandas.Series(np.arange(col_len), index=pandas_df.columns)
    modin_result = modin_df @ modin_series
    pandas_result = pandas_df @ pandas_series
    df_equals(modin_result, pandas_result)

    # Test dataframe input
    modin_result = modin_df @ modin_df.T
    pandas_result = pandas_df @ pandas_df.T
    df_equals(modin_result, pandas_result)

    # Test when input series index doesn't line up with columns
    with pytest.raises(ValueError):
        modin_df @ pd.Series(np.arange(col_len))


def test_first():
    i = pd.date_range("2010-04-09", periods=400, freq="2D")
    modin_df = pd.DataFrame({"A": list(range(400)), "B": list(range(400))}, index=i)
    pandas_df = pandas.DataFrame(
        {"A": list(range(400)), "B": list(range(400))}, index=i
    )
    with pytest.warns(FutureWarning, match="first is deprecated and will be removed"):
        modin_result = modin_df.first("3D")
    df_equals(modin_result, pandas_df.first("3D"))
    df_equals(modin_df.first("20D"), pandas_df.first("20D"))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_info_default_param(data):
    with io.StringIO() as first, io.StringIO() as second:
        eval_general(
            pd.DataFrame(data),
            pandas.DataFrame(data),
            verbose=None,
            max_cols=None,
            memory_usage=None,
            operation=lambda df, **kwargs: df.info(**kwargs),
            buf=lambda df: second if isinstance(df, pandas.DataFrame) else first,
        )
        modin_info = first.getvalue().splitlines()
        pandas_info = second.getvalue().splitlines()

        assert modin_info[0] == str(pd.DataFrame)
        assert pandas_info[0] == str(pandas.DataFrame)
        assert modin_info[1:] == pandas_info[1:]


# randint data covers https://github.com/modin-project/modin/issues/5137
@pytest.mark.parametrize(
    "data", [test_data_values[0], np.random.randint(0, 100, (10, 10))]
)
@pytest.mark.parametrize("verbose", [True, False])
@pytest.mark.parametrize("max_cols", [10, 99999999])
@pytest.mark.parametrize("memory_usage", [True, False, "deep"])
@pytest.mark.parametrize("show_counts", [True, False])
def test_info(data, verbose, max_cols, memory_usage, show_counts):
    with io.StringIO() as first, io.StringIO() as second:
        eval_general(
            pd.DataFrame(data),
            pandas.DataFrame(data),
            operation=lambda df, **kwargs: df.info(**kwargs),
            verbose=verbose,
            max_cols=max_cols,
            memory_usage=memory_usage,
            show_counts=show_counts,
            buf=lambda df: second if isinstance(df, pandas.DataFrame) else first,
        )
        modin_info = first.getvalue().splitlines()
        pandas_info = second.getvalue().splitlines()

        assert modin_info[0] == str(pd.DataFrame)
        assert pandas_info[0] == str(pandas.DataFrame)
        assert modin_info[1:] == pandas_info[1:]


@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("numeric_only", [False, True])
@pytest.mark.parametrize("method", ["kurtosis", "kurt"])
def test_kurt_kurtosis(axis, skipna, numeric_only, method):
    data = test_data["float_nan_data"]

    eval_general(
        *create_test_dfs(data),
        lambda df: getattr(df, method)(
            axis=axis, skipna=skipna, numeric_only=numeric_only
        ),
    )


def test_last():
    modin_index = pd.date_range("2010-04-09", periods=400, freq="2D")
    pandas_index = pandas.date_range("2010-04-09", periods=400, freq="2D")
    modin_df = pd.DataFrame(
        {"A": list(range(400)), "B": list(range(400))}, index=modin_index
    )
    pandas_df = pandas.DataFrame(
        {"A": list(range(400)), "B": list(range(400))}, index=pandas_index
    )
    with pytest.warns(FutureWarning, match="last is deprecated and will be removed"):
        modin_result = modin_df.last("3D")
    df_equals(modin_result, pandas_df.last("3D"))
    df_equals(modin_df.last("20D"), pandas_df.last("20D"))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "id_vars", [lambda df: df.columns[0], lambda df: df.columns[:4], None]
)
@pytest.mark.parametrize(
    "value_vars", [lambda df: df.columns[-1], lambda df: df.columns[-4:], None]
)
def test_melt(data, id_vars, value_vars):
    def melt(df, *args, **kwargs):
        return df.melt(*args, **kwargs).sort_values(["variable", "value"])

    eval_general(
        *create_test_dfs(data),
        lambda df, *args, **kwargs: melt(df, *args, **kwargs).reset_index(drop=True),
        id_vars=id_vars,
        value_vars=value_vars,
    )


# Functional test for BUG:7206
def test_melt_duplicate_col_names():
    data = {"data": [[1, 2], [3, 4]], "columns": ["dupe", "dupe"]}

    def melt(df, *args, **kwargs):
        return df.melt(*args, **kwargs).sort_values(["variable", "value"])

    eval_general(
        *create_test_dfs(**data),
        lambda df, *args, **kwargs: melt(df, *args, **kwargs).reset_index(drop=True),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "index",
    [lambda df: df.columns[0], lambda df: df.columns[:2], lib.no_default],
    ids=["one_column_index", "several_columns_index", "default"],
)
@pytest.mark.parametrize(
    "columns", [lambda df: df.columns[len(df.columns) // 2]], ids=["one_column"]
)
@pytest.mark.parametrize(
    "values",
    [lambda df: df.columns[-1], lambda df: df.columns[-2:], lib.no_default],
    ids=["one_column_values", "several_columns_values", "default"],
)
def test_pivot(data, index, columns, values, request):
    current_execution = get_current_execution()
    if (
        "one_column_values-one_column-default-float_nan_data"
        in request.node.callspec.id
        or "default-one_column-several_columns_index" in request.node.callspec.id
        or "default-one_column-one_column_index" in request.node.callspec.id
        or (
            (current_execution == "BaseOnPython" or current_execution_is_native())
            and index is lib.no_default
        )
    ):
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7010")

    expected_exception = None
    if index is not lib.no_default:
        expected_exception = ValueError(
            "Index contains duplicate entries, cannot reshape"
        )
    eval_general(
        *create_test_dfs(data),
        lambda df, *args, **kwargs: df.pivot(*args, **kwargs),
        index=index,
        columns=columns,
        values=values,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", [test_data["int_data"]], ids=["int_data"])
@pytest.mark.parametrize(
    "index",
    [
        pytest.param(lambda df: df.columns[0], id="single_index_col"),
        pytest.param(
            lambda df: [*df.columns[0:2], *df.columns[-7:-4]], id="multiple_index_cols"
        ),
        pytest.param(None, id="default_index"),
    ],
)
@pytest.mark.parametrize(
    "columns",
    [
        pytest.param(lambda df: df.columns[len(df.columns) // 2], id="single_col"),
        pytest.param(
            lambda df: [
                *df.columns[(len(df.columns) // 2) : (len(df.columns) // 2 + 4)],
                df.columns[-7],
            ],
            id="multiple_cols",
        ),
        pytest.param(None, id="default_columns"),
    ],
)
@pytest.mark.parametrize(
    "values",
    [
        pytest.param(lambda df: df.columns[-1], id="single_value_col"),
        pytest.param(lambda df: df.columns[-4:-1], id="multiple_value_cols"),
        pytest.param(None, id="default_values"),
    ],
)
@pytest.mark.parametrize(
    "aggfunc",
    [
        pytest.param(np.mean, id="callable_tree_reduce_func"),
        pytest.param("mean", id="tree_reduce_func"),
        pytest.param("nunique", id="full_axis_func"),
    ],
)
def test_pivot_table_data(data, index, columns, values, aggfunc, request):
    if (
        "callable_tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "callable_tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "full_axis_func-single_value_col-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "full_axis_func-multiple_value_cols-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
    ):
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7011")
    md_df, pd_df = create_test_dfs(data)

    # when values is None the output will be huge-dimensional,
    # so reducing dimension of testing data at that case
    if values is None:
        md_df, pd_df = md_df.iloc[:42, :42], pd_df.iloc[:42, :42]

    expected_exception = None
    if "default_columns-default_index" in request.node.callspec.id:
        expected_exception = ValueError("No group keys passed!")
    elif (
        "callable_tree_reduce_func" in request.node.callspec.id
        and "int_data" in request.node.callspec.id
    ):
        expected_exception = TypeError("'numpy.float64' object is not callable")

    eval_general(
        md_df,
        pd_df,
        operation=lambda df, *args, **kwargs: df.pivot_table(
            *args, **kwargs
        ).sort_index(axis=int(index is not None)),
        index=index,
        columns=columns,
        values=values,
        aggfunc=aggfunc,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", [test_data["int_data"]], ids=["int_data"])
@pytest.mark.parametrize(
    "index",
    [
        pytest.param([], id="no_index_cols"),
        pytest.param(lambda df: df.columns[0], id="single_index_column"),
        pytest.param(
            lambda df: [df.columns[0], df.columns[len(df.columns) // 2 - 1]],
            id="multiple_index_cols",
        ),
    ],
)
@pytest.mark.parametrize(
    "columns",
    [
        pytest.param(lambda df: df.columns[len(df.columns) // 2], id="single_column"),
        pytest.param(
            lambda df: [
                *df.columns[(len(df.columns) // 2) : (len(df.columns) // 2 + 4)],
                df.columns[-7],
            ],
            id="multiple_cols",
        ),
    ],
)
@pytest.mark.parametrize(
    "values",
    [
        pytest.param(lambda df: df.columns[-1], id="single_value"),
        pytest.param(lambda df: df.columns[-4:-1], id="multiple_values"),
    ],
)
@pytest.mark.parametrize(
    "aggfunc",
    [
        pytest.param(["mean", "sum"], id="list_func"),
        pytest.param(
            lambda df: {df.columns[5]: "mean", df.columns[-5]: "sum"}, id="dict_func"
        ),
    ],
)
@pytest.mark.parametrize(
    "margins_name",
    [pytest.param("Custom name", id="str_name")],
)
@pytest.mark.parametrize("fill_value", [None, 0])
@pytest.mark.parametrize("backend", [None, "pyarrow"])
def test_pivot_table_margins(
    data,
    index,
    columns,
    values,
    aggfunc,
    margins_name,
    fill_value,
    backend,
    request,
):
    expected_exception = None
    if "dict_func" in request.node.callspec.id:
        expected_exception = KeyError("Column(s) ['col28', 'col38'] do not exist")
    eval_general(
        *create_test_dfs(data, backend=backend),
        operation=lambda df, *args, **kwargs: df.pivot_table(*args, **kwargs),
        index=index,
        columns=columns,
        values=values,
        aggfunc=aggfunc,
        margins=True,
        margins_name=margins_name,
        fill_value=fill_value,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize(
    "aggfunc",
    [
        pytest.param("sum", id="MapReduce_func"),
        pytest.param("nunique", id="FullAxis_func"),
    ],
)
@pytest.mark.parametrize("margins", [True, False])
def test_pivot_table_fill_value(aggfunc, margins):
    md_df, pd_df = create_test_dfs(test_data["int_data"])
    eval_general(
        md_df,
        pd_df,
        operation=lambda df, *args, **kwargs: df.pivot_table(*args, **kwargs),
        index=md_df.columns[0],
        columns=md_df.columns[1],
        values=md_df.columns[2],
        aggfunc=aggfunc,
        margins=margins,
        fill_value=10,
    )


@pytest.mark.parametrize("data", [test_data["int_data"]], ids=["int_data"])
def test_pivot_table_dropna(data):
    eval_general(
        *create_test_dfs(data),
        operation=lambda df, *args, **kwargs: df.pivot_table(*args, **kwargs),
        index=lambda df: df.columns[0],
        columns=lambda df: df.columns[1],
        values=lambda df: df.columns[-1],
        dropna=False,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_plot(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if name_contains(request.node.name, numeric_dfs):
        # We have to test this way because equality in plots means same object.
        zipped_plot_lines = zip(modin_df.plot().lines, pandas_df.plot().lines)
        for left, right in zipped_plot_lines:
            if isinstance(left.get_xdata(), np.ma.core.MaskedArray) and isinstance(
                right.get_xdata(), np.ma.core.MaskedArray
            ):
                assert all((left.get_xdata() == right.get_xdata()).data)
            else:
                assert np.array_equal(left.get_xdata(), right.get_xdata())
            if isinstance(left.get_ydata(), np.ma.core.MaskedArray) and isinstance(
                right.get_ydata(), np.ma.core.MaskedArray
            ):
                assert all((left.get_ydata() == right.get_ydata()).data)
            else:
                assert np.array_equal(left.get_xdata(), right.get_xdata())


def test_replace():
    modin_df = pd.DataFrame(
        {"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9], "C": ["a", "b", "c", "d", "e"]}
    )
    pandas_df = pandas.DataFrame(
        {"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9], "C": ["a", "b", "c", "d", "e"]}
    )
    modin_result = modin_df.replace({"A": 0, "B": 5}, 100)
    pandas_result = pandas_df.replace({"A": 0, "B": 5}, 100)
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.replace({"A": {0: 100, 4: 400}})
    pandas_result = pandas_df.replace({"A": {0: 100, 4: 400}})
    df_equals(modin_result, pandas_result)

    modin_df = pd.DataFrame({"A": ["bat", "foo", "bait"], "B": ["abc", "bar", "xyz"]})
    pandas_df = pandas.DataFrame(
        {"A": ["bat", "foo", "bait"], "B": ["abc", "bar", "xyz"]}
    )
    modin_result = modin_df.replace(regex={r"^ba.$": "new", "foo": "xyz"})
    pandas_result = pandas_df.replace(regex={r"^ba.$": "new", "foo": "xyz"})
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.replace(regex=[r"^ba.$", "foo"], value="new")
    pandas_result = pandas_df.replace(regex=[r"^ba.$", "foo"], value="new")
    df_equals(modin_result, pandas_result)

    modin_df.replace(regex=[r"^ba.$", "foo"], value="new", inplace=True)
    pandas_df.replace(regex=[r"^ba.$", "foo"], value="new", inplace=True)
    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("rule", ["5min", pandas.offsets.Hour()])
@pytest.mark.parametrize("axis", [0])
def test_resampler(rule, axis):
    data, index = (
        test_data_resample["data"],
        test_data_resample["index"],
    )
    modin_resampler = pd.DataFrame(data, index=index).resample(rule, axis=axis)
    pandas_resampler = pandas.DataFrame(data, index=index).resample(rule, axis=axis)

    assert pandas_resampler.indices == modin_resampler.indices
    assert pandas_resampler.groups == modin_resampler.groups

    df_equals(
        modin_resampler.get_group(name=list(modin_resampler.groups)[0]),
        pandas_resampler.get_group(name=list(pandas_resampler.groups)[0]),
    )


@pytest.mark.parametrize("rule", ["5min"])
@pytest.mark.parametrize("axis", ["index", "columns"])
@pytest.mark.parametrize(
    "method",
    [
        *("count", "sum", "std", "sem", "size", "prod", "ohlc", "quantile"),
        *("min", "median", "mean", "max", "last", "first", "nunique", "var"),
        *("interpolate", "asfreq", "nearest", "bfill", "ffill"),
    ],
)
def test_resampler_functions(rule, axis, method):
    data, index = (
        test_data_resample["data"],
        test_data_resample["index"],
    )
    modin_df = pd.DataFrame(data, index=index)
    pandas_df = pandas.DataFrame(data, index=index)
    if axis == "columns":
        columns = pandas.date_range(
            "31/12/2000", periods=len(pandas_df.columns), freq="min"
        )
        modin_df.columns = columns
        pandas_df.columns = columns

    expected_exception = None
    if method in ("interpolate", "asfreq", "nearest", "bfill", "ffill"):
        # It looks like pandas is preparing to completely
        # remove `axis` parameter for `resample` function.
        expected_exception = AssertionError("axis must be 0")

    eval_general(
        modin_df,
        pandas_df,
        lambda df: getattr(df.resample(rule, axis=axis), method)(),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("rule", ["5min"])
@pytest.mark.parametrize("axis", ["index", "columns"])
@pytest.mark.parametrize(
    "method_arg",
    [
        ("pipe", lambda x: x.max() - x.min()),
        ("transform", lambda x: (x - x.mean()) / x.std()),
        ("apply", ["sum", "mean", "max"]),
        ("aggregate", ["sum", "mean", "max"]),
    ],
)
def test_resampler_functions_with_arg(rule, axis, method_arg):
    data, index = (
        test_data_resample["data"],
        test_data_resample["index"],
    )
    modin_df = pd.DataFrame(data, index=index)
    pandas_df = pandas.DataFrame(data, index=index)
    if axis == "columns":
        columns = pandas.date_range(
            "31/12/2000", periods=len(pandas_df.columns), freq="min"
        )
        modin_df.columns = columns
        pandas_df.columns = columns

    method, arg = method_arg[0], method_arg[1]

    expected_exception = None
    if method in ("apply", "aggregate"):
        expected_exception = NotImplementedError("axis other than 0 is not supported")

    eval_general(
        modin_df,
        pandas_df,
        lambda df: getattr(df.resample(rule, axis=axis), method)(arg),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("rule", ["5min"])
@pytest.mark.parametrize("closed", ["left", "right"])
@pytest.mark.parametrize("label", ["right", "left"])
@pytest.mark.parametrize(
    "on",
    [
        None,
        pytest.param(
            "DateColumn",
            marks=pytest.mark.xfail(
                condition=Engine.get() in ("Ray", "Unidist", "Dask", "Python")
                and StorageFormat.get() != "Base",
                reason="https://github.com/modin-project/modin/issues/6399",
            ),
        ),
    ],
)
@pytest.mark.parametrize("level", [None, 1])
def test_resample_specific(rule, closed, label, on, level):
    data, index = (
        test_data_resample["data"],
        test_data_resample["index"],
    )
    modin_df = pd.DataFrame(data, index=index)
    pandas_df = pandas.DataFrame(data, index=index)

    if on is None and level is not None:
        index = pandas.MultiIndex.from_product(
            [
                ["a", "b", "c", "d"],
                pandas.date_range("31/12/2000", periods=len(pandas_df) // 4, freq="h"),
            ]
        )
        pandas_df.index = index
        modin_df.index = index
    else:
        level = None

    if on is not None:
        pandas_df[on] = pandas.date_range(
            "22/06/1941", periods=len(pandas_df), freq="min"
        )
        modin_df[on] = pandas.date_range(
            "22/06/1941", periods=len(modin_df), freq="min"
        )

    pandas_resampler = pandas_df.resample(
        rule,
        closed=closed,
        label=label,
        on=on,
        level=level,
    )
    modin_resampler = modin_df.resample(
        rule,
        closed=closed,
        label=label,
        on=on,
        level=level,
    )
    df_equals(modin_resampler.var(0), pandas_resampler.var(0))
    if on is None and level is None:
        df_equals(
            modin_resampler.fillna(method="nearest"),
            pandas_resampler.fillna(method="nearest"),
        )


@pytest.mark.parametrize(
    "columns",
    [
        "volume",
        "date",
        ["volume"],
        ("volume",),
        pandas.Series(["volume"]),
        pandas.Index(["volume"]),
        ["volume", "volume", "volume"],
        ["volume", "price", "date"],
    ],
    ids=[
        "column",
        "only_missed_column",
        "list",
        "tuple",
        "series",
        "index",
        "duplicate_column",
        "missed_column",
    ],
)
def test_resample_getitem(columns, request):
    index = pandas.date_range("1/1/2013", periods=9, freq="min")
    data = {
        "price": range(9),
        "volume": range(10, 19),
    }
    expected_exception = None
    if "only_missed_column" in request.node.callspec.id:
        expected_exception = KeyError("Column not found: date")
    elif "missed_column" in request.node.callspec.id:
        expected_exception = KeyError("Columns not found: 'date'")
    eval_general(
        *create_test_dfs(data, index=index),
        lambda df: df.resample("3min")[columns].mean(),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("index", ["default", "ndarray", "has_duplicates"])
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("periods", [0, 1, -1, 10, -10, 1000000000, -1000000000])
def test_shift(data, index, axis, periods):
    modin_df, pandas_df = create_test_dfs(data)
    if index == "ndarray":
        data_column_length = len(data[next(iter(data))])
        modin_df.index = pandas_df.index = np.arange(2, data_column_length + 2)
    elif index == "has_duplicates":
        modin_df.index = pandas_df.index = list(modin_df.index[:-3]) + [0, 1, 2]

    df_equals(
        modin_df.shift(periods=periods, axis=axis),
        pandas_df.shift(periods=periods, axis=axis),
    )
    df_equals(
        modin_df.shift(periods=periods, axis=axis, fill_value=777),
        pandas_df.shift(periods=periods, axis=axis, fill_value=777),
    )


@pytest.mark.parametrize("is_multi_idx", [True, False], ids=["idx_multi", "idx_index"])
@pytest.mark.parametrize("is_multi_col", [True, False], ids=["col_multi", "col_index"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_stack(data, is_multi_idx, is_multi_col):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    if is_multi_idx:
        if len(pandas_df.index) == 256:
            index = pd.MultiIndex.from_product(
                [
                    ["a", "b", "c", "d"],
                    ["x", "y", "z", "last"],
                    ["i", "j", "k", "index"],
                    [1, 2, 3, 4],
                ]
            )
        elif len(pandas_df.index) == 100:
            index = pd.MultiIndex.from_product(
                [
                    ["x", "y", "z", "last"],
                    ["a", "b", "c", "d", "f"],
                    ["i", "j", "k", "l", "index"],
                ]
            )
        else:
            index = pd.MultiIndex.from_tuples(
                [(i, i * 2, i * 3) for i in range(len(pandas_df.index))]
            )
    else:
        index = pandas_df.index

    if is_multi_col:
        if len(pandas_df.columns) == 64:
            columns = pd.MultiIndex.from_product(
                [["A", "B", "C", "D"], ["xx", "yy", "zz", "LAST"], [10, 20, 30, 40]]
            )
        elif len(pandas_df.columns) == 100:
            columns = pd.MultiIndex.from_product(
                [
                    ["xx", "yy", "zz", "LAST"],
                    ["A", "B", "C", "D", "F"],
                    ["I", "J", "K", "L", "INDEX"],
                ]
            )
        else:
            columns = pd.MultiIndex.from_tuples(
                [(i, i * 2, i * 3) for i in range(len(pandas_df.columns))]
            )
    else:
        columns = pandas_df.columns

    pandas_df.columns = columns
    pandas_df.index = index

    modin_df.columns = columns
    modin_df.index = index

    df_equals(modin_df.stack(), pandas_df.stack())

    if is_multi_col:
        df_equals(modin_df.stack(level=0), pandas_df.stack(level=0))
        df_equals(modin_df.stack(level=[0, 1]), pandas_df.stack(level=[0, 1]))
        df_equals(modin_df.stack(level=[0, 1, 2]), pandas_df.stack(level=[0, 1, 2]))


@pytest.mark.parametrize("sort", [True, False])
def test_stack_sort(sort):
    # Example frame slightly modified from pandas docs to be unsorted
    cols = pd.MultiIndex.from_tuples([("weight", "pounds"), ("weight", "kg")])
    modin_df, pandas_df = create_test_dfs(
        [[1, 2], [2, 4]], index=["cat", "dog"], columns=cols
    )
    df_equals(modin_df.stack(sort=sort), pandas_df.stack(sort=sort))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis1", [0, 1])
@pytest.mark.parametrize("axis2", [0, 1])
def test_swapaxes(data, axis1, axis2):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    pandas_result = pandas_df.swapaxes(axis1, axis2)
    modin_result = modin_df.swapaxes(axis1, axis2)
    df_equals(modin_result, pandas_result)


def test_swapaxes_axes_names():
    modin_df = pd.DataFrame(test_data_values[0])
    modin_result1 = modin_df.swapaxes(0, 1)
    modin_result2 = modin_df.swapaxes("columns", "index")
    df_equals(modin_result1, modin_result2)


def test_swaplevel():
    data = np.random.randint(1, 100, 12)
    modin_df = pd.DataFrame(
        data,
        index=pd.MultiIndex.from_tuples(
            [
                (num, letter, color)
                for num in range(1, 3)
                for letter in ["a", "b", "c"]
                for color in ["Red", "Green"]
            ],
            names=["Number", "Letter", "Color"],
        ),
    )
    pandas_df = pandas.DataFrame(
        data,
        index=pandas.MultiIndex.from_tuples(
            [
                (num, letter, color)
                for num in range(1, 3)
                for letter in ["a", "b", "c"]
                for color in ["Red", "Green"]
            ],
            names=["Number", "Letter", "Color"],
        ),
    )
    df_equals(
        modin_df.swaplevel("Number", "Color"),
        pandas_df.swaplevel("Number", "Color"),
    )
    df_equals(modin_df.swaplevel(), pandas_df.swaplevel())
    df_equals(modin_df.swaplevel(0, 1), pandas_df.swaplevel(0, 1))


def test_take():
    modin_df = pd.DataFrame(
        [
            ("falcon", "bird", 389.0),
            ("parrot", "bird", 24.0),
            ("lion", "mammal", 80.5),
            ("monkey", "mammal", np.nan),
        ],
        columns=["name", "class", "max_speed"],
        index=[0, 2, 3, 1],
    )
    pandas_df = pandas.DataFrame(
        [
            ("falcon", "bird", 389.0),
            ("parrot", "bird", 24.0),
            ("lion", "mammal", 80.5),
            ("monkey", "mammal", np.nan),
        ],
        columns=["name", "class", "max_speed"],
        index=[0, 2, 3, 1],
    )
    df_equals(modin_df.take([0, 3]), pandas_df.take([0, 3]))
    df_equals(modin_df.take([2], axis=1), pandas_df.take([2], axis=1))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_to_records(data):
    # `to_records` doesn't work when `index` is among column names
    eval_general(
        *create_test_dfs(data),
        lambda df: (
            df.dropna().drop("index", axis=1) if "index" in df.columns else df.dropna()
        ).to_records(),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_to_string(data):
    eval_general(
        *create_test_dfs(data),
        lambda df: df.to_string(),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_truncate(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    before = 1
    after = len(modin_df - 3)
    df_equals(modin_df.truncate(before, after), pandas_df.truncate(before, after))

    before = 1
    after = 3
    df_equals(modin_df.truncate(before, after), pandas_df.truncate(before, after))

    before = modin_df.columns[1]
    after = modin_df.columns[-3]
    try:
        pandas_result = pandas_df.truncate(before, after, axis=1)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_df.truncate(before, after, axis=1)
    else:
        modin_result = modin_df.truncate(before, after, axis=1)
        df_equals(modin_result, pandas_result)

    before = modin_df.columns[1]
    after = modin_df.columns[3]
    try:
        pandas_result = pandas_df.truncate(before, after, axis=1)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_df.truncate(before, after, axis=1)
    else:
        modin_result = modin_df.truncate(before, after, axis=1)
        df_equals(modin_result, pandas_result)

    before = None
    after = None
    df_equals(modin_df.truncate(before, after), pandas_df.truncate(before, after))
    try:
        pandas_result = pandas_df.truncate(before, after, axis=1)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_df.truncate(before, after, axis=1)
    else:
        modin_result = modin_df.truncate(before, after, axis=1)
        df_equals(modin_result, pandas_result)


def test_truncate_before_greater_than_after():
    df = pd.DataFrame([[1, 2, 3]])
    with pytest.raises(ValueError, match="Truncate: 1 must be after 2"):
        df.truncate(before=2, after=1)


def test_tz_convert():
    modin_idx = pd.date_range(
        "1/1/2012", periods=500, freq="2D", tz="America/Los_Angeles"
    )
    pandas_idx = pandas.date_range(
        "1/1/2012", periods=500, freq="2D", tz="America/Los_Angeles"
    )
    data = np.random.randint(0, 100, size=(len(modin_idx), 4))
    modin_df = pd.DataFrame(data, index=modin_idx)
    pandas_df = pandas.DataFrame(data, index=pandas_idx)
    modin_result = modin_df.tz_convert("UTC", axis=0)
    pandas_result = pandas_df.tz_convert("UTC", axis=0)
    df_equals(modin_result, pandas_result)

    modin_multi = pd.MultiIndex.from_arrays([modin_idx, range(len(modin_idx))])
    pandas_multi = pandas.MultiIndex.from_arrays([pandas_idx, range(len(modin_idx))])
    modin_series = pd.DataFrame(data, index=modin_multi)
    pandas_series = pandas.DataFrame(data, index=pandas_multi)
    df_equals(
        modin_series.tz_convert("UTC", axis=0, level=0),
        pandas_series.tz_convert("UTC", axis=0, level=0),
    )


def test_tz_localize():
    idx = pd.date_range("1/1/2012", periods=400, freq="2D")
    data = np.random.randint(0, 100, size=(len(idx), 4))
    modin_df = pd.DataFrame(data, index=idx)
    pandas_df = pandas.DataFrame(data, index=idx)
    df_equals(modin_df.tz_localize("UTC", axis=0), pandas_df.tz_localize("UTC", axis=0))
    df_equals(
        modin_df.tz_localize("America/Los_Angeles", axis=0),
        pandas_df.tz_localize("America/Los_Angeles", axis=0),
    )


@pytest.mark.parametrize("is_multi_idx", [True, False], ids=["idx_multi", "idx_index"])
@pytest.mark.parametrize("is_multi_col", [True, False], ids=["col_multi", "col_index"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_unstack(data, is_multi_idx, is_multi_col):
    modin_df, pandas_df = create_test_dfs(data)

    if is_multi_idx:
        index = generate_multiindex(len(pandas_df), nlevels=4, is_tree_like=True)
    else:
        index = pandas_df.index

    if is_multi_col:
        columns = generate_multiindex(
            len(pandas_df.columns), nlevels=3, is_tree_like=True
        )
    else:
        columns = pandas_df.columns

    pandas_df.columns = modin_df.columns = columns
    pandas_df.index = modin_df.index = index

    df_equals(modin_df.unstack(), pandas_df.unstack())
    df_equals(modin_df.unstack(level=1), pandas_df.unstack(level=1))
    if is_multi_idx:
        df_equals(modin_df.unstack(level=[0, 1]), pandas_df.unstack(level=[0, 1]))
        df_equals(modin_df.unstack(level=[0, 1, 2]), pandas_df.unstack(level=[0, 1, 2]))
        df_equals(
            modin_df.unstack(level=[0, 1, 2, 3]), pandas_df.unstack(level=[0, 1, 2, 3])
        )


@pytest.mark.parametrize(
    "multi_col", ["col_multi_tree", "col_multi_not_tree", "col_index"]
)
@pytest.mark.parametrize(
    "multi_idx", ["idx_multi_tree", "idx_multi_not_tree", "idx_index"]
)
def test_unstack_multiindex_types(multi_col, multi_idx):
    MAX_NROWS = MAX_NCOLS = 36

    pandas_df = pandas.DataFrame(test_data["int_data"]).iloc[:MAX_NROWS, :MAX_NCOLS]
    modin_df = pd.DataFrame(test_data["int_data"]).iloc[:MAX_NROWS, :MAX_NCOLS]

    def get_new_index(index, cond):
        if cond == "col_multi_tree" or cond == "idx_multi_tree":
            return generate_multiindex(len(index), nlevels=3, is_tree_like=True)
        elif cond == "col_multi_not_tree" or cond == "idx_multi_not_tree":
            return generate_multiindex(len(index), nlevels=3)
        else:
            return index

    pandas_df.columns = modin_df.columns = get_new_index(pandas_df.columns, multi_col)
    pandas_df.index = modin_df.index = get_new_index(pandas_df.index, multi_idx)

    df_equals(modin_df.unstack(), pandas_df.unstack())
    df_equals(modin_df.unstack(level=1), pandas_df.unstack(level=1))
    if multi_idx != "idx_index":
        df_equals(modin_df.unstack(level=[0, 1]), pandas_df.unstack(level=[0, 1]))
        df_equals(modin_df.unstack(level=[0, 1, 2]), pandas_df.unstack(level=[0, 1, 2]))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("copy_kwargs", ({"copy": True}, {"copy": None}, {}))
@pytest.mark.parametrize(
    "get_array, get_array_name",
    (
        (lambda df, copy_kwargs: df.__array__(**copy_kwargs), "__array__"),
        (lambda df, copy_kwargs: np.array(df, **copy_kwargs), "np.array"),
    ),
)
def test___array__(data, copy_kwargs, get_array, get_array_name):
    if (
        get_array_name == "np.array"
        and Version(np.__version__) < Version("2")
        and "copy" in copy_kwargs
        and copy_kwargs["copy"] is None
    ):
        pytest.skip(reason="np.array does not support copy=None before numpy 2.0")
    assert_array_equal(*(get_array(df, copy_kwargs) for df in create_test_dfs(data)))


@pytest.mark.xfail(
    condition=Backend.get() != "Pandas",
    raises=AssertionError,
    reason="https://github.com/modin-project/modin/issues/4650",
)
def test___array__copy_false_creates_view():
    def do_in_place_update_via_copy(df):
        array = np.array(df, copy=False)
        array[0, 0] += 1

    eval_general(
        *create_test_dfs([[11]]), do_in_place_update_via_copy, __inplace__=True
    )


@pytest.mark.parametrize("data", [[False], [True], [1, 2]])
def test___bool__(data):
    eval_general(
        *create_test_dfs(data),
        lambda df: df.__bool__(),
        expected_exception=ValueError(
            "The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
        ),
    )


@pytest.mark.parametrize(
    "is_sparse_data", [True, False], ids=["is_sparse", "is_not_sparse"]
)
def test_hasattr_sparse(is_sparse_data):
    modin_df, pandas_df = (
        create_test_dfs(pandas.arrays.SparseArray(test_data["float_nan_data"].values()))
        if is_sparse_data
        else create_test_dfs(test_data["float_nan_data"])
    )
    eval_general(modin_df, pandas_df, lambda df: hasattr(df, "sparse"))


def test_setattr_axes():
    # Test that setting .index or .columns does not warn
    df = pd.DataFrame([[1, 2], [3, 4]])
    with warnings.catch_warnings():
        if get_current_execution() != "BaseOnPython":
            # In BaseOnPython, setting columns raises a warning because get_axis
            #  defaults to pandas.
            warnings.simplefilter("error")
        df.index = ["foo", "bar"]
        # Check that ensure_index was called
        pd.testing.assert_index_equal(df.index, pandas.Index(["foo", "bar"]))

        df.columns = [9, 10]
        pd.testing.assert_index_equal(df.columns, pandas.Index([9, 10]))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_attrs(data):
    modin_df, pandas_df = create_test_dfs(data)
    eval_general(modin_df, pandas_df, lambda df: df.attrs)


def test_df_from_series_with_tuple_name():
    # Tests that creating a DataFrame from a series with a tuple name results in
    # a DataFrame with MultiIndex columns.
    pandas_result = pandas.DataFrame(pandas.Series(name=("a", 1)))
    # 1. Creating a Modin DF from native pandas Series
    df_equals(pd.DataFrame(pandas.Series(name=("a", 1))), pandas_result)
    # 2. Creating a Modin DF from Modin Series
    df_equals(pd.DataFrame(pd.Series(name=("a", 1))), pandas_result)


def test_large_df_warns_distributing_takes_time():
    # https://github.com/modin-project/modin/issues/6574

    regex = r"Distributing (.*) object\. This may take some time\."
    with pytest.warns(UserWarning, match=regex):
        pd.DataFrame(np.random.randint(1_000_000, size=(100_000, 10)))


def test_large_series_warns_distributing_takes_time():
    # https://github.com/modin-project/modin/issues/6574

    regex = r"Distributing (.*) object\. This may take some time\."
    with pytest.warns(UserWarning, match=regex):
        pd.Series(np.random.randint(1_000_000, size=(2_500_000)))


def test_df_does_not_warn_distributing_takes_time():
    # https://github.com/modin-project/modin/issues/6574

    regex = r"Distributing (.*) object\. This may take some time\."
    with warnings.catch_warnings():
        warnings.filterwarnings("error", regex, UserWarning)
        pd.DataFrame(np.random.randint(1_000_000, size=(100_000, 9)))


def test_series_does_not_warn_distributing_takes_time():
    # https://github.com/modin-project/modin/issues/6574

    regex = r"Distributing (.*) object\. This may take some time\."
    with warnings.catch_warnings():
        warnings.filterwarnings("error", regex, UserWarning)
        pd.Series(np.random.randint(1_000_000, size=(2_400_000)))


@pytest.mark.parametrize("dtype", [np.int64, pd.ArrowDtype(pa.int64())])
def test_empty_df_dtypes(dtype):
    df = pd.DataFrame({"A": []}, dtype=dtype)
    assert df.dtypes["A"] == dtype


def test_array_ufunc():
    modin_df, pandas_df = create_test_dfs([[1, 2], [3, 4]])
    eval_general(modin_df, pandas_df, np.sqrt)
    modin_ser, pandas_ser = create_test_series([1, 2, 3, 4, 9])
    eval_general(modin_ser, pandas_ser, np.sqrt)


================================================
FILE: modin/tests/pandas/dataframe/test_indexing.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import os
import sys

import matplotlib
import numpy as np
import pandas
import pytest
from pandas._testing import ensure_clean

import modin.pandas as pd
from modin.config import MinRowPartitionSize, NPartitions
from modin.pandas.indexing import is_range_like
from modin.pandas.testing import assert_index_equal
from modin.tests.pandas.utils import (
    NROWS,
    RAND_HIGH,
    RAND_LOW,
    arg_keys,
    assert_dtypes_equal,
    axis_keys,
    axis_values,
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    generate_multiindex,
    int_arg_keys,
    int_arg_values,
    name_contains,
    test_data,
    test_data_keys,
    test_data_values,
)
from modin.utils import get_current_execution

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
# of defaulting to pandas.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


def eval_setitem(md_df, pd_df, value, col=None, loc=None, expected_exception=None):
    if loc is not None:
        col = pd_df.columns[loc]

    value_getter = value if callable(value) else (lambda *args, **kwargs: value)

    eval_general(
        md_df,
        pd_df,
        lambda df: df.__setitem__(col, value_getter(df)),
        __inplace__=True,
        expected_exception=expected_exception,
    )


def eval_loc(md_df, pd_df, value, key):
    if isinstance(value, tuple):
        assert len(value) == 2
        # case when value for pandas different
        md_value, pd_value = value
    else:
        md_value, pd_value = value, value

    eval_general(
        md_df,
        pd_df,
        lambda df: df.loc.__setitem__(
            key, pd_value if isinstance(df, pandas.DataFrame) else md_value
        ),
        __inplace__=True,
    )


@pytest.mark.parametrize(
    "dates",
    [
        ["2018-02-27 09:03:30", "2018-02-27 09:04:30"],
        ["2018-02-27 09:03:00", "2018-02-27 09:05:00"],
    ],
)
@pytest.mark.parametrize("subset", ["a", "b", ["a", "b"], None])
def test_asof_with_nan(dates, subset):
    data = {"a": [10, 20, 30, 40, 50], "b": [None, None, None, None, 500]}
    index = pd.DatetimeIndex(
        [
            "2018-02-27 09:01:00",
            "2018-02-27 09:02:00",
            "2018-02-27 09:03:00",
            "2018-02-27 09:04:00",
            "2018-02-27 09:05:00",
        ]
    )
    modin_where = pd.DatetimeIndex(dates)
    pandas_where = pandas.DatetimeIndex(dates)
    compare_asof(data, index, modin_where, pandas_where, subset)


@pytest.mark.parametrize(
    "dates",
    [
        ["2018-02-27 09:03:30", "2018-02-27 09:04:30"],
        ["2018-02-27 09:03:00", "2018-02-27 09:05:00"],
    ],
)
@pytest.mark.parametrize("subset", ["a", "b", ["a", "b"], None])
def test_asof_without_nan(dates, subset):
    data = {"a": [10, 20, 30, 40, 50], "b": [70, 600, 30, -200, 500]}
    index = pd.DatetimeIndex(
        [
            "2018-02-27 09:01:00",
            "2018-02-27 09:02:00",
            "2018-02-27 09:03:00",
            "2018-02-27 09:04:00",
            "2018-02-27 09:05:00",
        ]
    )
    modin_where = pd.DatetimeIndex(dates)
    pandas_where = pandas.DatetimeIndex(dates)
    compare_asof(data, index, modin_where, pandas_where, subset)


@pytest.mark.parametrize(
    "lookup",
    [[60, 70, 90], [60.5, 70.5, 100]],
)
@pytest.mark.parametrize("subset", ["col2", "col1", ["col1", "col2"], None])
def test_asof_large(lookup, subset):
    data = test_data["float_nan_data"]
    index = list(range(NROWS))
    modin_where = pd.Index(lookup)
    pandas_where = pandas.Index(lookup)
    compare_asof(data, index, modin_where, pandas_where, subset)


def compare_asof(
    data, index, modin_where: pd.Index, pandas_where: pandas.Index, subset
):
    modin_df = pd.DataFrame(data, index=index)
    pandas_df = pandas.DataFrame(data, index=index)
    df_equals(
        modin_df.asof(modin_where, subset=subset),
        pandas_df.asof(pandas_where, subset=subset),
    )
    df_equals(
        modin_df.asof(modin_where.values, subset=subset),
        pandas_df.asof(pandas_where.values, subset=subset),
    )
    df_equals(
        modin_df.asof(list(modin_where.values), subset=subset),
        pandas_df.asof(list(pandas_where.values), subset=subset),
    )
    df_equals(
        modin_df.asof(modin_where.values[0], subset=subset),
        pandas_df.asof(pandas_where.values[0], subset=subset),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_first_valid_index(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    assert modin_df.first_valid_index() == (pandas_df.first_valid_index())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("n", int_arg_values, ids=arg_keys("n", int_arg_keys))
def test_head(data, n):
    # Test normal dataframe head
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    df_equals(modin_df.head(n), pandas_df.head(n))
    df_equals(modin_df.head(len(modin_df) + 1), pandas_df.head(len(pandas_df) + 1))

    # Test head when we call it from a QueryCompilerView
    modin_result = modin_df.loc[:, ["col1", "col3", "col3"]].head(n)
    pandas_result = pandas_df.loc[:, ["col1", "col3", "col3"]].head(n)
    df_equals(modin_result, pandas_result)


@pytest.mark.skip(reason="Defaulting to Pandas")
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_iat(data):
    modin_df = pd.DataFrame(data)

    with pytest.raises(NotImplementedError):
        modin_df.iat()


@pytest.mark.gpu
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_iloc(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if not name_contains(request.node.name, ["empty_data"]):
        # Scalar
        np.testing.assert_equal(modin_df.iloc[0, 1], pandas_df.iloc[0, 1])

        # Series
        df_equals(modin_df.iloc[0], pandas_df.iloc[0])
        df_equals(modin_df.iloc[1:, 0], pandas_df.iloc[1:, 0])
        df_equals(modin_df.iloc[1:2, 0], pandas_df.iloc[1:2, 0])

        # DataFrame
        df_equals(modin_df.iloc[[1, 2]], pandas_df.iloc[[1, 2]])
        # See issue #80
        # df_equals(modin_df.iloc[[1, 2], [1, 0]], pandas_df.iloc[[1, 2], [1, 0]])
        df_equals(modin_df.iloc[1:2, 0:2], pandas_df.iloc[1:2, 0:2])

        # Issue #43
        modin_df.iloc[0:3, :]

        # Write Item
        modin_df.iloc[[1, 2]] = 42
        pandas_df.iloc[[1, 2]] = 42
        df_equals(modin_df, pandas_df)

        modin_df = pd.DataFrame(data)
        pandas_df = pandas.DataFrame(data)
        modin_df.iloc[0] = modin_df.iloc[1]
        pandas_df.iloc[0] = pandas_df.iloc[1]
        df_equals(modin_df, pandas_df)

        modin_df = pd.DataFrame(data)
        pandas_df = pandas.DataFrame(data)
        modin_df.iloc[:, 0] = modin_df.iloc[:, 1]
        pandas_df.iloc[:, 0] = pandas_df.iloc[:, 1]
        df_equals(modin_df, pandas_df)

        # From issue #1775
        df_equals(
            modin_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5])],
            pandas_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5])],
        )

        # Read values, selecting rows with callable and a column with a scalar.
        df_equals(
            pandas_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5]), 0],
            modin_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5]), 0],
        )
    else:
        with pytest.raises(IndexError):
            modin_df.iloc[0, 1]


@pytest.mark.gpu
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_index(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.index, pandas_df.index)
    modin_df_cp = modin_df.copy()
    pandas_df_cp = pandas_df.copy()

    modin_df_cp.index = [str(i) for i in modin_df_cp.index]
    pandas_df_cp.index = [str(i) for i in pandas_df_cp.index]
    df_equals(modin_df_cp.index, pandas_df_cp.index)


@pytest.mark.gpu
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_indexing_duplicate_axis(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    modin_df.index = pandas_df.index = [i // 3 for i in range(len(modin_df))]
    assert any(modin_df.index.duplicated())
    assert any(pandas_df.index.duplicated())

    df_equals(modin_df.iloc[0], pandas_df.iloc[0])
    df_equals(modin_df.loc[0], pandas_df.loc[0])
    df_equals(modin_df.iloc[0, 0:4], pandas_df.iloc[0, 0:4])
    df_equals(
        modin_df.loc[0, modin_df.columns[0:4]],
        pandas_df.loc[0, pandas_df.columns[0:4]],
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "key_func",
    [
        # test for the case from https://github.com/modin-project/modin/issues/4308
        lambda df: "non_existing_column",
        lambda df: df.columns[0],
        lambda df: df.index,
        lambda df: [df.index, df.columns[0]],
        lambda df: (
            pandas.Series(list(range(len(df.index))))
            if isinstance(df, pandas.DataFrame)
            else pd.Series(list(range(len(df))))
        ),
    ],
    ids=[
        "non_existing_column",
        "first_column_name",
        "original_index",
        "list_of_index_and_first_column_name",
        "series_of_integers",
    ],
)
@pytest.mark.parametrize(
    "drop_kwargs",
    [{"drop": True}, {"drop": False}, {}],
    ids=["drop_True", "drop_False", "no_drop_param"],
)
def test_set_index(data, key_func, drop_kwargs, request):
    if (
        "list_of_index_and_first_column_name" in request.node.name
        and "drop_False" in request.node.name
    ):
        pytest.xfail(
            reason="KeyError: https://github.com/modin-project/modin/issues/5636"
        )
    expected_exception = None
    if "non_existing_column" in request.node.callspec.id:
        expected_exception = KeyError(
            "None of ['non_existing_column'] are in the columns"
        )
    eval_general(
        *create_test_dfs(data),
        lambda df: df.set_index(key_func(df), **drop_kwargs),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("index", ["a", ["a", ("b", "")]])
def test_set_index_with_multiindex(index):
    # see #5186 for details
    kwargs = {"columns": [["a", "b", "c", "d"], ["", "", "x", "y"]]}
    modin_df, pandas_df = create_test_dfs(np.random.rand(2, 4), **kwargs)
    eval_general(modin_df, pandas_df, lambda df: df.set_index(index))


@pytest.mark.gpu
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_keys(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.keys(), pandas_df.keys())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_loc(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    key1 = modin_df.columns[0]
    key2 = modin_df.columns[1]
    # Scalar
    df_equals(modin_df.loc[0, key1], pandas_df.loc[0, key1])

    # Series
    df_equals(modin_df.loc[0], pandas_df.loc[0])
    df_equals(modin_df.loc[1:, key1], pandas_df.loc[1:, key1])
    df_equals(modin_df.loc[1:2, key1], pandas_df.loc[1:2, key1])
    df_equals(modin_df.loc[:, key1], pandas_df.loc[:, key1])

    # DataFrame
    df_equals(modin_df.loc[[1, 2]], pandas_df.loc[[1, 2]])

    indices = [i % 3 == 0 for i in range(len(modin_df.index))]
    columns = [i % 5 == 0 for i in range(len(modin_df.columns))]

    # Key is a list of booleans
    modin_result = modin_df.loc[indices, columns]
    pandas_result = pandas_df.loc[indices, columns]
    df_equals(modin_result, pandas_result)

    # Key is a Modin or pandas series of booleans
    df_equals(
        modin_df.loc[pd.Series(indices), pd.Series(columns, index=modin_df.columns)],
        pandas_df.loc[
            pandas.Series(indices), pandas.Series(columns, index=modin_df.columns)
        ],
    )

    modin_result = modin_df.loc[:, columns]
    pandas_result = pandas_df.loc[:, columns]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.loc[indices]
    pandas_result = pandas_df.loc[indices]
    df_equals(modin_result, pandas_result)

    # See issue #80
    # df_equals(modin_df.loc[[1, 2], ['col1']], pandas_df.loc[[1, 2], ['col1']])
    df_equals(modin_df.loc[1:2, key1:key2], pandas_df.loc[1:2, key1:key2])

    # From issue #421
    df_equals(modin_df.loc[:, [key2, key1]], pandas_df.loc[:, [key2, key1]])
    df_equals(modin_df.loc[[2, 1], :], pandas_df.loc[[2, 1], :])

    # From issue #1023
    key1 = modin_df.columns[0]
    key2 = modin_df.columns[-2]
    df_equals(modin_df.loc[:, key1:key2], pandas_df.loc[:, key1:key2])

    # Write Item
    modin_df_copy = modin_df.copy()
    pandas_df_copy = pandas_df.copy()
    modin_df_copy.loc[[1, 2]] = 42
    pandas_df_copy.loc[[1, 2]] = 42
    df_equals(modin_df_copy, pandas_df_copy)

    # Write an item, selecting rows with a callable.
    modin_df_copy2 = modin_df.copy()
    pandas_df_copy2 = pandas_df.copy()
    modin_df_copy2.loc[lambda df: df[key1].isin(list(range(1000)))] = 42
    pandas_df_copy2.loc[lambda df: df[key1].isin(list(range(1000)))] = 42
    df_equals(modin_df_copy2, pandas_df_copy2)

    # Write an item, selecting rows with a callable and a column with a scalar.
    modin_df_copy3 = modin_df.copy()
    pandas_df_copy3 = pandas_df.copy()
    modin_df_copy3.loc[lambda df: df[key1].isin(list(range(1000))), key1] = 42
    pandas_df_copy3.loc[lambda df: df[key1].isin(list(range(1000))), key1] = 42
    df_equals(modin_df_copy3, pandas_df_copy3)

    # Disabled for `BaseOnPython` because of the issue with `getitem_array`:
    # https://github.com/modin-project/modin/issues/3701
    if get_current_execution() != "BaseOnPython":
        # From issue #1775
        df_equals(
            modin_df.loc[lambda df: df.iloc[:, 0].isin(list(range(1000)))],
            pandas_df.loc[lambda df: df.iloc[:, 0].isin(list(range(1000)))],
        )

        # Read values, selecting rows with a callable and a column with a scalar.
        df_equals(
            pandas_df.loc[lambda df: df[key1].isin(list(range(1000))), key1],
            modin_df.loc[lambda df: df[key1].isin(list(range(1000))), key1],
        )

    # From issue #1374
    with pytest.raises(KeyError):
        modin_df.loc["NO_EXIST"]


@pytest.mark.parametrize(
    "key_getter, value_getter",
    [
        pytest.param(
            lambda df, axis: (
                (slice(None), df.axes[axis][:2])
                if axis
                else (df.axes[axis][:2], slice(None))
            ),
            lambda df, axis: df.iloc[:, :1] if axis else df.iloc[:1, :],
            id="len(key)_>_len(value)",
        ),
        pytest.param(
            lambda df, axis: (
                (slice(None), df.axes[axis][:2])
                if axis
                else (df.axes[axis][:2], slice(None))
            ),
            lambda df, axis: df.iloc[:, :3] if axis else df.iloc[:3, :],
            id="len(key)_<_len(value)",
        ),
        pytest.param(
            lambda df, axis: (
                (slice(None), df.axes[axis][:2])
                if axis
                else (df.axes[axis][:2], slice(None))
            ),
            lambda df, axis: df.iloc[:, :2] if axis else df.iloc[:2, :],
            id="len(key)_==_len(value)",
        ),
    ],
)
@pytest.mark.parametrize("key_axis", [0, 1])
@pytest.mark.parametrize("reverse_value_index", [True, False])
@pytest.mark.parametrize("reverse_value_columns", [True, False])
def test_loc_4456(
    key_getter, value_getter, key_axis, reverse_value_index, reverse_value_columns
):
    data = test_data["float_nan_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    key = key_getter(pandas_df, key_axis)

    # `df.loc` doesn't work right for range-like indexers. Converting them to a list.
    # https://github.com/modin-project/modin/issues/4497
    if is_range_like(key[0]):
        key = (list(key[0]), key[1])
    if is_range_like(key[1]):
        key = (key[0], list(key[1]))

    value = pandas.DataFrame(
        np.random.randint(0, 100, size=pandas_df.shape),
        index=pandas_df.index,
        columns=pandas_df.columns,
    )
    pdf_value = value_getter(value, key_axis)
    mdf_value = value_getter(pd.DataFrame(value), key_axis)

    if reverse_value_index:
        pdf_value = pdf_value.reindex(index=pdf_value.index[::-1])
        mdf_value = mdf_value.reindex(index=mdf_value.index[::-1])
    if reverse_value_columns:
        pdf_value = pdf_value.reindex(columns=pdf_value.columns[::-1])
        mdf_value = mdf_value.reindex(columns=mdf_value.columns[::-1])

    eval_loc(modin_df, pandas_df, pdf_value, key)
    eval_loc(modin_df, pandas_df, (mdf_value, pdf_value), key)


def test_loc_6774():
    modin_df, pandas_df = create_test_dfs(
        {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}
    )
    pandas_df.loc[:, "c"] = [10, 20, 30, 40, 51]
    modin_df.loc[:, "c"] = [10, 20, 30, 40, 51]
    df_equals(modin_df, pandas_df)

    pandas_df.loc[2:, "y"] = [30, 40, 51]
    modin_df.loc[2:, "y"] = [30, 40, 51]
    df_equals(modin_df, pandas_df)

    pandas_df.loc[:, ["b", "c", "d"]] = (
        pd.DataFrame([[10, 20, 30, 40, 50], [10, 20, 30, 40], [10, 20, 30]])
        .transpose()
        .values
    )
    modin_df.loc[:, ["b", "c", "d"]] = (
        pd.DataFrame([[10, 20, 30, 40, 50], [10, 20, 30, 40], [10, 20, 30]])
        .transpose()
        .values
    )
    df_equals(modin_df, pandas_df)


def test_loc_5829():
    data = {"a": [1, 2, 3, 4, 5], "b": [11, 12, 13, 14, 15]}
    modin_df = pd.DataFrame(data, dtype=object)
    pandas_df = pandas.DataFrame(data, dtype=object)
    eval_loc(
        modin_df,
        pandas_df,
        value=np.array([[24, 34, 44], [25, 35, 45]]),
        key=([3, 4], ["c", "d", "e"]),
    )


def test_loc_7135():
    data = np.random.randint(0, 100, size=(2**16, 2**8))
    modin_df, pandas_df = create_test_dfs(data)
    key = len(pandas_df)
    eval_loc(
        modin_df,
        pandas_df,
        value=list(range(2**8)),
        key=key,
    )


# This tests the bug from https://github.com/modin-project/modin/issues/3736
def test_loc_setting_single_categorical_column():
    modin_df = pd.DataFrame({"status": ["a", "b", "c"]}, dtype="category")
    pandas_df = pandas.DataFrame({"status": ["a", "b", "c"]}, dtype="category")
    modin_df.loc[1:3, "status"] = "a"
    pandas_df.loc[1:3, "status"] = "a"
    df_equals(modin_df, pandas_df)


def test_loc_multi_index():
    modin_df = pd.read_csv(
        "modin/tests/pandas/data/blah.csv", header=[0, 1, 2, 3], index_col=0
    )
    pandas_df = pandas.read_csv(
        "modin/tests/pandas/data/blah.csv", header=[0, 1, 2, 3], index_col=0
    )

    df_equals(modin_df.loc[1], pandas_df.loc[1])
    df_equals(modin_df.loc[1, "Presidents"], pandas_df.loc[1, "Presidents"])
    df_equals(
        modin_df.loc[1, ("Presidents", "Pure mentions")],
        pandas_df.loc[1, ("Presidents", "Pure mentions")],
    )
    assert (
        modin_df.loc[1, ("Presidents", "Pure mentions", "IND", "all")]
        == pandas_df.loc[1, ("Presidents", "Pure mentions", "IND", "all")]
    )
    df_equals(modin_df.loc[(1, 2), "Presidents"], pandas_df.loc[(1, 2), "Presidents"])

    tuples = [
        ("bar", "one"),
        ("bar", "two"),
        ("bar", "three"),
        ("bar", "four"),
        ("baz", "one"),
        ("baz", "two"),
        ("baz", "three"),
        ("baz", "four"),
        ("foo", "one"),
        ("foo", "two"),
        ("foo", "three"),
        ("foo", "four"),
        ("qux", "one"),
        ("qux", "two"),
        ("qux", "three"),
        ("qux", "four"),
    ]

    modin_index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
    pandas_index = pandas.MultiIndex.from_tuples(tuples, names=["first", "second"])
    frame_data = np.random.randint(0, 100, size=(16, 100))
    modin_df = pd.DataFrame(
        frame_data,
        index=modin_index,
        columns=["col{}".format(i) for i in range(100)],
    )
    pandas_df = pandas.DataFrame(
        frame_data,
        index=pandas_index,
        columns=["col{}".format(i) for i in range(100)],
    )
    df_equals(modin_df.loc["bar", "col1"], pandas_df.loc["bar", "col1"])
    assert modin_df.loc[("bar", "one"), "col1"] == pandas_df.loc[("bar", "one"), "col1"]
    df_equals(
        modin_df.loc["bar", ("col1", "col2")],
        pandas_df.loc["bar", ("col1", "col2")],
    )

    # From issue #1456
    transposed_modin = modin_df.T
    transposed_pandas = pandas_df.T
    df_equals(
        transposed_modin.loc[transposed_modin.index[:-2], :],
        transposed_pandas.loc[transposed_pandas.index[:-2], :],
    )

    # From issue #1610
    df_equals(modin_df.loc[modin_df.index], pandas_df.loc[pandas_df.index])
    df_equals(modin_df.loc[modin_df.index[:7]], pandas_df.loc[pandas_df.index[:7]])


def test_loc_multi_index_with_tuples():
    arrays = [
        ["bar", "bar", "baz", "baz"],
        ["one", "two", "one", "two"],
    ]
    nrows = 5
    columns = pd.MultiIndex.from_tuples(zip(*arrays), names=["a", "b"])
    data = np.arange(0, nrows * len(columns)).reshape(nrows, len(columns))
    modin_df, pandas_df = create_test_dfs(data, columns=columns)
    eval_general(modin_df, pandas_df, lambda df: df.loc[:, ("bar", "two")])


def test_loc_multi_index_rows_with_tuples_5721():
    arrays = [
        ["bar", "bar", "baz", "baz"],
        ["one", "two", "one", "two"],
    ]
    ncols = 5
    index = pd.MultiIndex.from_tuples(zip(*arrays), names=["a", "b"])
    data = np.arange(0, ncols * len(index)).reshape(len(index), ncols)
    modin_df, pandas_df = create_test_dfs(data, index=index)
    eval_general(modin_df, pandas_df, lambda df: df.loc[("bar",)])
    eval_general(modin_df, pandas_df, lambda df: df.loc[("bar", "two")])


def test_loc_multi_index_level_two_has_same_name_as_column():
    eval_general(
        *create_test_dfs(
            pandas.DataFrame(
                [[0]], index=[pd.Index(["foo"]), pd.Index(["bar"])], columns=["bar"]
            )
        ),
        lambda df: df.loc[("foo", "bar")],
    )


def test_loc_multi_index_duplicate_keys():
    modin_df, pandas_df = create_test_dfs([1, 2], index=[["a", "a"], ["b", "b"]])
    eval_general(modin_df, pandas_df, lambda df: df.loc[("a", "b"), 0])
    eval_general(modin_df, pandas_df, lambda df: df.loc[("a", "b"), :])


def test_loc_multi_index_both_axes():
    multi_index = pd.MultiIndex.from_tuples(
        [("r0", "rA"), ("r1", "rB")], names=["Courses", "Fee"]
    )
    cols = pd.MultiIndex.from_tuples(
        [
            ("Gasoline", "Toyota"),
            ("Gasoline", "Ford"),
            ("Electric", "Tesla"),
            ("Electric", "Nio"),
        ]
    )
    data = [[100, 300, 900, 400], [200, 500, 300, 600]]
    modin_df, pandas_df = create_test_dfs(data, columns=cols, index=multi_index)
    eval_general(modin_df, pandas_df, lambda df: df.loc[("r0", "rA"), :])
    eval_general(modin_df, pandas_df, lambda df: df.loc[:, ("Gasoline", "Toyota")])


def test_loc_empty():
    pandas_df = pandas.DataFrame(index=range(5))
    modin_df = pd.DataFrame(index=range(5))

    df_equals(pandas_df.loc[1], modin_df.loc[1])
    pandas_df.loc[1] = 3
    modin_df.loc[1] = 3
    df_equals(pandas_df, modin_df)


@pytest.mark.parametrize("locator_name", ["iloc", "loc"])
def test_loc_iloc_2064(locator_name):
    modin_df, pandas_df = create_test_dfs(columns=["col1", "col2"])
    if locator_name == "iloc":
        expected_exception = IndexError(
            "index 1 is out of bounds for axis 0 with size 0"
        )
    else:
        _type = "int32" if os.name == "nt" else "int64"
        expected_exception = KeyError(
            f"None of [Index([1], dtype='{_type}')] are in the [index]"
        )
    eval_general(
        modin_df,
        pandas_df,
        lambda df: getattr(df, locator_name).__setitem__([1], [11, 22]),
        __inplace__=True,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("index", [["row1", "row2", "row3"]])
@pytest.mark.parametrize("columns", [["col1", "col2"]])
def test_loc_assignment(index, columns):
    md_df, pd_df = create_test_dfs(index=index, columns=columns)
    for i, ind in enumerate(index):
        for j, col in enumerate(columns):
            value_to_assign = int(str(i) + str(j))
            md_df.loc[ind][col] = value_to_assign
            pd_df.loc[ind][col] = value_to_assign
    df_equals(md_df, pd_df)


@pytest.mark.parametrize("left, right", [(2, 1), (6, 1), (lambda df: 70, 1), (90, 70)])
def test_loc_insert_row(left, right):
    # This test case comes from
    # https://github.com/modin-project/modin/issues/3764
    pandas_df = pandas.DataFrame([[1, 2, 3], [4, 5, 6]])
    modin_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])

    def _test_loc_rows(df):
        df.loc[left] = df.loc[right]
        return df

    expected_exception = None
    if right == 70:
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7024")
    eval_general(
        modin_df, pandas_df, _test_loc_rows, expected_exception=expected_exception
    )


@pytest.mark.parametrize(
    "columns", [10, (100, 102), (2, 6), [10, 11, 12], "a", ["b", "c", "d"]]
)
def test_loc_insert_col(columns):
    # This test case comes from
    # https://github.com/modin-project/modin/issues/3764
    pandas_df = pandas.DataFrame([[1, 2, 3], [4, 5, 6]])
    modin_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])

    if isinstance(columns, tuple) and len(columns) == 2:

        def _test_loc_cols(df):
            df.loc[:, columns[0] : columns[1]] = 1

    else:

        def _test_loc_cols(df):
            df.loc[:, columns] = 1

    eval_general(modin_df, pandas_df, _test_loc_cols)


@pytest.fixture
def loc_iter_dfs():
    columns = ["col1", "col2", "col3"]
    index = ["row1", "row2", "row3"]
    return create_test_dfs(
        {col: ([idx] * len(index)) for idx, col in enumerate(columns)},
        columns=columns,
        index=index,
    )


@pytest.mark.parametrize("reverse_order", [False, True])
@pytest.mark.parametrize("axis", [0, 1])
def test_loc_iter_assignment(loc_iter_dfs, reverse_order, axis):
    if reverse_order and axis:
        pytest.xfail(
            "Due to internal sorting of lookup values assignment order is lost, see GH-#2552"
        )

    md_df, pd_df = loc_iter_dfs

    select = [slice(None), slice(None)]
    select[axis] = sorted(pd_df.axes[axis][:-1], reverse=reverse_order)
    select = tuple(select)

    pd_df.loc[select] = pd_df.loc[select] + pd_df.loc[select]
    md_df.loc[select] = md_df.loc[select] + md_df.loc[select]
    df_equals(md_df, pd_df)


@pytest.mark.parametrize("reverse_order", [False, True])
@pytest.mark.parametrize("axis", [0, 1])
def test_loc_order(loc_iter_dfs, reverse_order, axis):
    md_df, pd_df = loc_iter_dfs

    select = [slice(None), slice(None)]
    select[axis] = sorted(pd_df.axes[axis][:-1], reverse=reverse_order)
    select = tuple(select)

    df_equals(pd_df.loc[select], md_df.loc[select])


@pytest.mark.gpu
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_loc_nested_assignment(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    key1 = modin_df.columns[0]
    key2 = modin_df.columns[1]

    modin_df[key1].loc[0] = 500
    pandas_df[key1].loc[0] = 500
    df_equals(modin_df, pandas_df)

    modin_df[key2].loc[0] = None
    pandas_df[key2].loc[0] = None
    df_equals(modin_df, pandas_df)


def test_iloc_assignment():
    modin_df = pd.DataFrame(index=["row1", "row2", "row3"], columns=["col1", "col2"])
    pandas_df = pandas.DataFrame(
        index=["row1", "row2", "row3"], columns=["col1", "col2"]
    )
    modin_df.iloc[0]["col1"] = 11
    modin_df.iloc[1]["col1"] = 21
    modin_df.iloc[2]["col1"] = 31
    modin_df.iloc[lambda df: 0]["col2"] = 12
    modin_df.iloc[1][lambda df: ["col2"]] = 22
    modin_df.iloc[lambda df: 2][lambda df: ["col2"]] = 32
    pandas_df.iloc[0]["col1"] = 11
    pandas_df.iloc[1]["col1"] = 21
    pandas_df.iloc[2]["col1"] = 31
    pandas_df.iloc[lambda df: 0]["col2"] = 12
    pandas_df.iloc[1][lambda df: ["col2"]] = 22
    pandas_df.iloc[lambda df: 2][lambda df: ["col2"]] = 32

    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_iloc_nested_assignment(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    key1 = modin_df.columns[0]
    key2 = modin_df.columns[1]

    modin_df[key1].iloc[0] = 500
    pandas_df[key1].iloc[0] = 500
    df_equals(modin_df, pandas_df)

    modin_df[key2].iloc[0] = None
    pandas_df[key2].iloc[0] = None
    df_equals(modin_df, pandas_df)


def test_iloc_empty():
    pandas_df = pandas.DataFrame(index=range(5))
    modin_df = pd.DataFrame(index=range(5))

    df_equals(pandas_df.iloc[1], modin_df.iloc[1])
    pandas_df.iloc[1] = 3
    modin_df.iloc[1] = 3
    df_equals(pandas_df, modin_df)


def test_iloc_loc_key_length_except():
    modin_ser, pandas_ser = pd.Series(0), pandas.Series(0)
    eval_general(
        modin_ser,
        pandas_ser,
        lambda ser: ser.iloc[0, 0],
        expected_exception=pandas.errors.IndexingError("Too many indexers"),
    )
    eval_general(
        modin_ser,
        pandas_ser,
        lambda ser: ser.loc[0, 0],
        expected_exception=pandas.errors.IndexingError("Too many indexers"),
    )


def test_loc_series():
    md_df, pd_df = create_test_dfs({"a": [1, 2], "b": [3, 4]})

    pd_df.loc[pd_df["a"] > 1, "b"] = np.log(pd_df["b"])
    md_df.loc[md_df["a"] > 1, "b"] = np.log(md_df["b"])

    df_equals(pd_df, md_df)


@pytest.mark.parametrize("locator_name", ["loc", "iloc"])
@pytest.mark.parametrize(
    "slice_indexer",
    [
        slice(None, None, -2),
        slice(1, 10, None),
        slice(None, 10, None),
        slice(10, None, None),
        slice(10, None, -2),
        slice(-10, None, -2),
        slice(None, 1_000_000_000, None),
    ],
)
def test_loc_iloc_slice_indexer(locator_name, slice_indexer):
    md_df, pd_df = create_test_dfs(test_data_values[0])
    # Shifting the index, so labels won't match its position
    shifted_index = pandas.RangeIndex(1, len(md_df) + 1)
    md_df.index = shifted_index
    pd_df.index = shifted_index

    eval_general(md_df, pd_df, lambda df: getattr(df, locator_name)[slice_indexer])


@pytest.mark.parametrize(
    "indexer_size",
    [
        1,
        2,
        NROWS,
        pytest.param(
            NROWS + 1,
            marks=pytest.mark.xfail(
                reason="https://github.com/modin-project/modin/issues/5739", strict=True
            ),
        ),
    ],
)
class TestLocRangeLikeIndexer:
    """Test cases related to https://github.com/modin-project/modin/issues/5702"""

    def test_range_index_getitem_single_value(self, indexer_size):
        eval_general(
            *create_test_dfs(test_data["int_data"]),
            lambda df: df.loc[pd.RangeIndex(indexer_size)],
        )

    def test_range_index_getitem_two_values(self, indexer_size):
        eval_general(
            *create_test_dfs(test_data["int_data"]),
            lambda df: df.loc[pd.RangeIndex(indexer_size), :],
        )

    def test_range_getitem_single_value(self, indexer_size):
        eval_general(
            *create_test_dfs(test_data["int_data"]),
            lambda df: df.loc[range(indexer_size)],
        )

    def test_range_getitem_two_values_5702(self, indexer_size):
        eval_general(
            *create_test_dfs(test_data["int_data"]),
            lambda df: df.loc[range(indexer_size), :],
        )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_pop(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if "empty_data" not in request.node.name:
        key = modin_df.columns[0]
        temp_modin_df = modin_df.copy()
        temp_pandas_df = pandas_df.copy()
        modin_popped = temp_modin_df.pop(key)
        pandas_popped = temp_pandas_df.pop(key)
        df_equals(modin_popped, pandas_popped)
        df_equals(temp_modin_df, temp_pandas_df)


def test_reindex():
    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 10, 11],
        "col4": [12, 13, 14, 15],
        "col5": [0, 0, 0, 0],
    }
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)

    df_equals(modin_df.reindex([0, 3, 2, 1]), pandas_df.reindex([0, 3, 2, 1]))
    df_equals(modin_df.reindex([0, 6, 2]), pandas_df.reindex([0, 6, 2]))
    df_equals(
        modin_df.reindex(["col1", "col3", "col4", "col2"], axis=1),
        pandas_df.reindex(["col1", "col3", "col4", "col2"], axis=1),
    )
    df_equals(
        modin_df.reindex(["col1", "col7", "col4", "col8"], axis=1),
        pandas_df.reindex(["col1", "col7", "col4", "col8"], axis=1),
    )
    df_equals(
        modin_df.reindex(index=[0, 1, 5], columns=["col1", "col7", "col4", "col8"]),
        pandas_df.reindex(index=[0, 1, 5], columns=["col1", "col7", "col4", "col8"]),
    )
    df_equals(
        modin_df.T.reindex(["col1", "col7", "col4", "col8"], axis=0),
        pandas_df.T.reindex(["col1", "col7", "col4", "col8"], axis=0),
    )


def test_reindex_4438():
    index = pd.date_range(end="1/1/2018", periods=3, freq="h", name="some meta")
    new_index = list(reversed(index))

    # index case
    modin_df = pd.DataFrame([1, 2, 3], index=index)
    pandas_df = pandas.DataFrame([1, 2, 3], index=index)
    new_modin_df = modin_df.reindex(new_index)
    new_pandas_df = pandas_df.reindex(new_index)
    df_equals(new_modin_df, new_pandas_df)

    # column case
    modin_df = pd.DataFrame(np.array([[1], [2], [3]]).T, columns=index)
    pandas_df = pandas.DataFrame(np.array([[1], [2], [3]]).T, columns=index)
    new_modin_df = modin_df.reindex(columns=new_index)
    new_pandas_df = pandas_df.reindex(columns=new_index)
    df_equals(new_modin_df, new_pandas_df)

    # multiindex case
    multi_index = pandas.MultiIndex.from_arrays(
        [("a", "b", "c"), ("a", "b", "c")], names=["first", "second"]
    )
    new_multi_index = list(reversed(multi_index))

    modin_df = pd.DataFrame([1, 2, 3], index=multi_index)
    pandas_df = pandas.DataFrame([1, 2, 3], index=multi_index)
    new_modin_df = modin_df.reindex(new_multi_index)
    new_pandas_df = pandas_df.reindex(new_multi_index)
    df_equals(new_modin_df, new_pandas_df)

    # multicolumn case
    modin_df = pd.DataFrame(np.array([[1], [2], [3]]).T, columns=multi_index)
    pandas_df = pandas.DataFrame(np.array([[1], [2], [3]]).T, columns=multi_index)
    new_modin_df = modin_df.reindex(columns=new_multi_index)
    new_pandas_df = pandas_df.reindex(columns=new_multi_index)
    df_equals(new_modin_df, new_pandas_df)

    # index + multiindex case
    modin_df = pd.DataFrame([1, 2, 3], index=index)
    pandas_df = pandas.DataFrame([1, 2, 3], index=index)
    new_modin_df = modin_df.reindex(new_multi_index)
    new_pandas_df = pandas_df.reindex(new_multi_index)
    df_equals(new_modin_df, new_pandas_df)


def test_reindex_like():
    o_data = [
        [24.3, 75.7, "high"],
        [31, 87.8, "high"],
        [22, 71.6, "medium"],
        [35, 95, "medium"],
    ]
    o_columns = ["temp_celsius", "temp_fahrenheit", "windspeed"]
    o_index = pd.date_range(start="2014-02-12", end="2014-02-15", freq="D")
    new_data = [[28, "low"], [30, "low"], [35.1, "medium"]]
    new_columns = ["temp_celsius", "windspeed"]
    new_index = pd.DatetimeIndex(["2014-02-12", "2014-02-13", "2014-02-15"])
    modin_df1 = pd.DataFrame(o_data, columns=o_columns, index=o_index)
    modin_df2 = pd.DataFrame(new_data, columns=new_columns, index=new_index)
    modin_result = modin_df2.reindex_like(modin_df1)

    pandas_df1 = pandas.DataFrame(o_data, columns=o_columns, index=o_index)
    pandas_df2 = pandas.DataFrame(new_data, columns=new_columns, index=new_index)
    pandas_result = pandas_df2.reindex_like(pandas_df1)
    df_equals(modin_result, pandas_result)


def test_rename_sanity():
    source_df = pandas.DataFrame(test_data["int_data"])[
        ["col1", "index", "col3", "col4"]
    ]
    mapping = {"col1": "a", "index": "b", "col3": "c", "col4": "d"}

    modin_df = pd.DataFrame(source_df)
    df_equals(modin_df.rename(columns=mapping), source_df.rename(columns=mapping))

    renamed2 = source_df.rename(columns=str.lower)
    df_equals(modin_df.rename(columns=str.lower), renamed2)

    modin_df = pd.DataFrame(renamed2)
    df_equals(modin_df.rename(columns=str.upper), renamed2.rename(columns=str.upper))

    # index
    data = {"A": {"foo": 0, "bar": 1}}

    # gets sorted alphabetical
    df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)
    assert_index_equal(
        modin_df.rename(index={"foo": "bar", "bar": "foo"}).index,
        df.rename(index={"foo": "bar", "bar": "foo"}).index,
    )

    assert_index_equal(
        modin_df.rename(index=str.upper).index, df.rename(index=str.upper).index
    )

    # Using the `mapper` functionality with `axis`
    assert_index_equal(
        modin_df.rename(str.upper, axis=0).index, df.rename(str.upper, axis=0).index
    )
    assert_index_equal(
        modin_df.rename(str.upper, axis=1).columns,
        df.rename(str.upper, axis=1).columns,
    )
    assert_index_equal(modin_df.rename(str.upper).index, df.rename(str.upper).index)

    # have to pass something
    with pytest.raises(TypeError):
        modin_df.rename()

    # partial columns
    source_df.rename(columns={"col3": "foo", "col4": "bar"})
    modin_df = pd.DataFrame(source_df)
    assert_index_equal(
        modin_df.rename(columns={"col3": "foo", "col4": "bar"}).index,
        source_df.rename(columns={"col3": "foo", "col4": "bar"}).index,
    )

    # other axis
    source_df.T.rename(index={"col3": "foo", "col4": "bar"})
    assert_index_equal(
        source_df.T.rename(index={"col3": "foo", "col4": "bar"}).index,
        modin_df.T.rename(index={"col3": "foo", "col4": "bar"}).index,
    )

    # index with name
    index = pandas.Index(["foo", "bar"], name="name")
    renamer = pandas.DataFrame(data, index=index)
    modin_df = pd.DataFrame(data, index=index)

    renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
    modin_renamed = modin_df.rename(index={"foo": "bar", "bar": "foo"})
    assert_index_equal(renamed.index, modin_renamed.index)

    assert renamed.index.name == modin_renamed.index.name


def test_rename_multiindex():
    tuples_index = [("foo1", "bar1"), ("foo2", "bar2")]
    tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")]
    index = pandas.MultiIndex.from_tuples(tuples_index, names=["foo", "bar"])
    columns = pandas.MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"])

    frame_data = [(0, 0), (1, 1)]
    df = pandas.DataFrame(frame_data, index=index, columns=columns)
    modin_df = pd.DataFrame(frame_data, index=index, columns=columns)

    #
    # without specifying level -> accross all levels
    renamed = df.rename(
        index={"foo1": "foo3", "bar2": "bar3"},
        columns={"fizz1": "fizz3", "buzz2": "buzz3"},
    )
    modin_renamed = modin_df.rename(
        index={"foo1": "foo3", "bar2": "bar3"},
        columns={"fizz1": "fizz3", "buzz2": "buzz3"},
    )
    assert_index_equal(renamed.index, modin_renamed.index)

    renamed = df.rename(
        index={"foo1": "foo3", "bar2": "bar3"},
        columns={"fizz1": "fizz3", "buzz2": "buzz3"},
    )
    assert_index_equal(renamed.columns, modin_renamed.columns)
    assert renamed.index.names == modin_renamed.index.names
    assert renamed.columns.names == modin_renamed.columns.names

    #
    # with specifying a level

    # dict
    renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0)
    modin_renamed = modin_df.rename(
        columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0
    )
    assert_index_equal(renamed.columns, modin_renamed.columns)
    renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz")
    modin_renamed = modin_df.rename(
        columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz"
    )
    assert_index_equal(renamed.columns, modin_renamed.columns)

    renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1)
    modin_renamed = modin_df.rename(
        columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1
    )
    assert_index_equal(renamed.columns, modin_renamed.columns)
    renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz")
    modin_renamed = modin_df.rename(
        columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz"
    )
    assert_index_equal(renamed.columns, modin_renamed.columns)

    # function
    func = str.upper
    renamed = df.rename(columns=func, level=0)
    modin_renamed = modin_df.rename(columns=func, level=0)
    assert_index_equal(renamed.columns, modin_renamed.columns)
    renamed = df.rename(columns=func, level="fizz")
    modin_renamed = modin_df.rename(columns=func, level="fizz")
    assert_index_equal(renamed.columns, modin_renamed.columns)

    renamed = df.rename(columns=func, level=1)
    modin_renamed = modin_df.rename(columns=func, level=1)
    assert_index_equal(renamed.columns, modin_renamed.columns)
    renamed = df.rename(columns=func, level="buzz")
    modin_renamed = modin_df.rename(columns=func, level="buzz")
    assert_index_equal(renamed.columns, modin_renamed.columns)

    # index
    renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
    modin_renamed = modin_df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
    assert_index_equal(modin_renamed.index, renamed.index)


@pytest.mark.xfail(reason="Pandas does not pass this test")
def test_rename_nocopy():
    source_df = pandas.DataFrame(test_data["int_data"])[
        ["col1", "index", "col3", "col4"]
    ]
    modin_df = pd.DataFrame(source_df)
    modin_renamed = modin_df.rename(columns={"col3": "foo"}, copy=False)
    modin_renamed["foo"] = 1
    assert (modin_df["col3"] == 1).all()


def test_rename_inplace():
    source_df = pandas.DataFrame(test_data["int_data"])[
        ["col1", "index", "col3", "col4"]
    ]
    modin_df = pd.DataFrame(source_df)

    df_equals(
        modin_df.rename(columns={"col3": "foo"}),
        source_df.rename(columns={"col3": "foo"}),
    )

    frame = source_df.copy()
    modin_frame = modin_df.copy()
    frame.rename(columns={"col3": "foo"}, inplace=True)
    modin_frame.rename(columns={"col3": "foo"}, inplace=True)

    df_equals(modin_frame, frame)


def test_rename_bug():
    # rename set ref_locs, and set_index was not resetting
    frame_data = {0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]}
    df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    df = df.rename(columns={0: "a"})
    df = df.rename(columns={1: "b"})
    df = df.set_index(["a", "b"])
    df.columns = ["2001-01-01"]

    modin_df = modin_df.rename(columns={0: "a"})
    modin_df = modin_df.rename(columns={1: "b"})
    modin_df = modin_df.set_index(["a", "b"])
    modin_df.columns = ["2001-01-01"]

    df_equals(modin_df, df)


def test_index_to_datetime_using_set_index():
    data = {"YEAR": ["1992", "1993", "1994"], "ALIENS": [1, 99, 1]}
    modin_df_years = pd.DataFrame(data=data)
    df_years = pandas.DataFrame(data=data)
    modin_df_years = modin_df_years.set_index("YEAR")
    df_years = df_years.set_index("YEAR")
    modin_datetime_index = pd.to_datetime(modin_df_years.index, format="%Y")
    pandas_datetime_index = pandas.to_datetime(df_years.index, format="%Y")

    modin_df_years.index = modin_datetime_index
    df_years.index = pandas_datetime_index

    modin_df_years.set_index(modin_datetime_index)
    df_years.set_index(pandas_datetime_index)
    df_equals(modin_df_years, df_years)


def test_rename_axis():
    data = {"num_legs": [4, 4, 2], "num_arms": [0, 0, 2]}
    index = ["dog", "cat", "monkey"]
    modin_df = pd.DataFrame(data, index)
    pandas_df = pandas.DataFrame(data, index)
    df_equals(modin_df.rename_axis("animal"), pandas_df.rename_axis("animal"))
    df_equals(
        modin_df.rename_axis("limbs", axis="columns"),
        pandas_df.rename_axis("limbs", axis="columns"),
    )

    modin_df.rename_axis("limbs", axis="columns", inplace=True)
    pandas_df.rename_axis("limbs", axis="columns", inplace=True)
    df_equals(modin_df, pandas_df)

    new_index = pd.MultiIndex.from_product(
        [["mammal"], ["dog", "cat", "monkey"]], names=["type", "name"]
    )
    modin_df.index = new_index
    pandas_df.index = new_index

    df_equals(
        modin_df.rename_axis(index={"type": "class"}),
        pandas_df.rename_axis(index={"type": "class"}),
    )
    df_equals(
        modin_df.rename_axis(columns=str.upper),
        pandas_df.rename_axis(columns=str.upper),
    )
    df_equals(
        modin_df.rename_axis(columns=[str.upper(o) for o in modin_df.columns.names]),
        pandas_df.rename_axis(columns=[str.upper(o) for o in pandas_df.columns.names]),
    )

    with pytest.raises(ValueError):
        df_equals(
            modin_df.rename_axis(str.upper, axis=1),
            pandas_df.rename_axis(str.upper, axis=1),
        )


def test_rename_axis_inplace():
    test_frame = pandas.DataFrame(test_data["int_data"])
    modin_df = pd.DataFrame(test_frame)

    result = test_frame.copy()
    modin_result = modin_df.copy()
    no_return = result.rename_axis("foo", inplace=True)
    modin_no_return = modin_result.rename_axis("foo", inplace=True)

    assert no_return is modin_no_return
    df_equals(modin_result, result)

    result = test_frame.copy()
    modin_result = modin_df.copy()
    no_return = result.rename_axis("bar", axis=1, inplace=True)
    modin_no_return = modin_result.rename_axis("bar", axis=1, inplace=True)

    assert no_return is modin_no_return
    df_equals(modin_result, result)


def test_rename_issue5600():
    # Check the issue for more details
    # https://github.com/modin-project/modin/issues/5600
    df = pd.DataFrame({"a": [1, 2]})
    df_renamed = df.rename(columns={"a": "new_a"}, copy=True, inplace=False)

    # Check that the source frame was untouched
    assert df.dtypes.keys().tolist() == ["a"]
    assert df.columns.tolist() == ["a"]

    assert df_renamed.dtypes.keys().tolist() == ["new_a"]
    assert df_renamed.columns.tolist() == ["new_a"]


def test_reorder_levels():
    data = np.random.randint(1, 100, 12)
    modin_df = pd.DataFrame(
        data,
        index=pd.MultiIndex.from_tuples(
            [
                (num, letter, color)
                for num in range(1, 3)
                for letter in ["a", "b", "c"]
                for color in ["Red", "Green"]
            ],
            names=["Number", "Letter", "Color"],
        ),
    )
    pandas_df = pandas.DataFrame(
        data,
        index=pandas.MultiIndex.from_tuples(
            [
                (num, letter, color)
                for num in range(1, 3)
                for letter in ["a", "b", "c"]
                for color in ["Red", "Green"]
            ],
            names=["Number", "Letter", "Color"],
        ),
    )
    df_equals(
        modin_df.reorder_levels(["Letter", "Color", "Number"]),
        pandas_df.reorder_levels(["Letter", "Color", "Number"]),
    )


def test_reindex_multiindex():
    data1, data2 = np.random.randint(1, 20, (5, 5)), np.random.randint(10, 25, 6)
    index = np.array(["AUD", "BRL", "CAD", "EUR", "INR"])
    modin_midx = pd.MultiIndex.from_product(
        [["Bank_1", "Bank_2"], ["AUD", "CAD", "EUR"]], names=["Bank", "Curency"]
    )
    pandas_midx = pandas.MultiIndex.from_product(
        [["Bank_1", "Bank_2"], ["AUD", "CAD", "EUR"]], names=["Bank", "Curency"]
    )
    modin_df1, modin_df2 = (
        pd.DataFrame(data=data1, index=index, columns=index),
        pd.DataFrame(data2, modin_midx),
    )
    pandas_df1, pandas_df2 = (
        pandas.DataFrame(data=data1, index=index, columns=index),
        pandas.DataFrame(data2, pandas_midx),
    )
    modin_df2.columns, pandas_df2.columns = ["Notional"], ["Notional"]
    md_midx = pd.MultiIndex.from_product([modin_df2.index.levels[0], modin_df1.index])
    pd_midx = pandas.MultiIndex.from_product(
        [pandas_df2.index.levels[0], pandas_df1.index]
    )
    # reindex without axis, index, or columns
    modin_result = modin_df1.reindex(md_midx, fill_value=0)
    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0)
    df_equals(modin_result, pandas_result)
    # reindex with only axis
    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0)
    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0)
    df_equals(modin_result, pandas_result)
    # reindex with axis and level
    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0, level=0)
    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0, level=0)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("test_async_reset_index", [False, True])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_reset_index(data, test_async_reset_index):
    modin_df, pandas_df = create_test_dfs(data)
    if test_async_reset_index:
        modin_df._query_compiler.set_frame_index_cache(None)
    modin_result = modin_df.reset_index(inplace=False)
    pandas_result = pandas_df.reset_index(inplace=False)
    df_equals(modin_result, pandas_result)

    modin_df_cp = modin_df.copy()
    pd_df_cp = pandas_df.copy()
    if test_async_reset_index:
        modin_df._query_compiler.set_frame_index_cache(None)
    modin_df_cp.reset_index(inplace=True)
    pd_df_cp.reset_index(inplace=True)
    df_equals(modin_df_cp, pd_df_cp)


@pytest.mark.parametrize(
    "data",
    [
        test_data["int_data"],
        test_data["float_nan_data"],
    ],
)
def test_reset_index_multiindex_groupby(data):
    # GH#4394
    modin_df, pandas_df = create_test_dfs(data)
    modin_df.index = pd.MultiIndex.from_tuples(
        [(i // 10, i // 5, i) for i in range(len(modin_df))]
    )
    pandas_df.index = pandas.MultiIndex.from_tuples(
        [(i // 10, i // 5, i) for i in range(len(pandas_df))]
    )
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.reset_index().groupby(list(df.columns[:2])).count(),
    )


@pytest.mark.parametrize("test_async_reset_index", [False, True])
@pytest.mark.parametrize(
    "data",
    [
        pytest.param(
            test_data["int_data"],
            marks=pytest.mark.exclude_by_default,
        ),
        test_data["float_nan_data"],
    ],
    ids=["int_data", "float_nan_data"],
)
@pytest.mark.parametrize("nlevels", [3])
@pytest.mark.parametrize("columns_multiindex", [True, False])
@pytest.mark.parametrize(
    "level",
    [
        "no_level",
        None,
        0,
        1,
        2,
        [2, 0],
        [2, 1],
        [1, 0],
        pytest.param(
            [2, 1, 2],
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            [0, 0, 0, 0],
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            ["level_name_1"],
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            ["level_name_2", "level_name_1"],
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            [2, "level_name_0"],
            marks=pytest.mark.exclude_by_default,
        ),
    ],
)
@pytest.mark.parametrize("col_level", ["no_col_level", 0, 1, 2])
@pytest.mark.parametrize("col_fill", ["no_col_fill", None, 0, "new"])
@pytest.mark.parametrize("drop", [False])
@pytest.mark.parametrize(
    "multiindex_levels_names_max_levels",
    [
        0,
        1,
        2,
        pytest.param(3, marks=pytest.mark.exclude_by_default),
        pytest.param(4, marks=pytest.mark.exclude_by_default),
    ],
)
@pytest.mark.parametrize(
    "none_in_index_names",
    [
        pytest.param(
            False,
            marks=pytest.mark.exclude_by_default,
        ),
        True,
        "mixed_1st_None",
        pytest.param(
            "mixed_2nd_None",
            marks=pytest.mark.exclude_by_default,
        ),
    ],
)
def test_reset_index_with_multi_index_no_drop(
    data,
    nlevels,
    columns_multiindex,
    level,
    col_level,
    col_fill,
    drop,
    multiindex_levels_names_max_levels,
    none_in_index_names,
    test_async_reset_index,
):
    data_rows = len(data[list(data.keys())[0]])
    index = generate_multiindex(data_rows, nlevels=nlevels)
    data_columns = len(data.keys())
    columns = (
        generate_multiindex(data_columns, nlevels=nlevels)
        if columns_multiindex
        else pandas.RangeIndex(0, data_columns)
    )
    # Replace original data columns with generated
    data = {columns[ind]: data[key] for ind, key in enumerate(data)}
    index.names = (
        [f"level_{i}" for i in range(index.nlevels)]
        if multiindex_levels_names_max_levels == 0
        else [
            (
                tuple(
                    [
                        f"level_{i}_name_{j}"
                        for j in range(
                            0,
                            max(
                                multiindex_levels_names_max_levels + 1 - index.nlevels,
                                0,
                            )
                            + i,
                        )
                    ]
                )
                if max(multiindex_levels_names_max_levels + 1 - index.nlevels, 0) + i
                > 0
                else f"level_{i}"
            )
            for i in range(index.nlevels)
        ]
    )

    if none_in_index_names is True:
        index.names = [None] * len(index.names)
    elif none_in_index_names:
        names_list = list(index.names)
        start_index = 0 if none_in_index_names == "mixed_1st_None" else 1
        names_list[start_index::2] = [None] * len(names_list[start_index::2])
        index.names = names_list

    modin_df = pd.DataFrame(data, index=index, columns=columns)
    pandas_df = pandas.DataFrame(data, index=index, columns=columns)

    if isinstance(level, list):
        level = [
            (
                index.names[int(x[len("level_name_") :])]
                if isinstance(x, str) and x.startswith("level_name_")
                else x
            )
            for x in level
        ]

    kwargs = {"drop": drop}
    if level != "no_level":
        kwargs["level"] = level
    if col_level != "no_col_level":
        kwargs["col_level"] = col_level
    if col_fill != "no_col_fill":
        kwargs["col_fill"] = col_fill
    if test_async_reset_index:
        modin_df._query_compiler.set_frame_index_cache(None)
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.reset_index(**kwargs),
        # https://github.com/modin-project/modin/issues/5960
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("test_async_reset_index", [False, True])
@pytest.mark.parametrize(
    "data",
    [
        pytest.param(
            test_data["int_data"],
            marks=pytest.mark.exclude_by_default,
        ),
        test_data["float_nan_data"],
    ],
    ids=["int_data", "float_nan_data"],
)
@pytest.mark.parametrize("nlevels", [3])
@pytest.mark.parametrize(
    "level",
    [
        "no_level",
        None,
        0,
        1,
        2,
        [2, 0],
        [2, 1],
        [1, 0],
        pytest.param(
            [2, 1, 2],
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            [0, 0, 0, 0],
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            ["level_name_1"],
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            ["level_name_2", "level_name_1"],
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            [2, "level_name_0"],
            marks=pytest.mark.exclude_by_default,
        ),
    ],
)
@pytest.mark.parametrize(
    "multiindex_levels_names_max_levels",
    [
        0,
        1,
        2,
        pytest.param(3, marks=pytest.mark.exclude_by_default),
        pytest.param(4, marks=pytest.mark.exclude_by_default),
    ],
)
@pytest.mark.parametrize(
    "none_in_index_names",
    [
        pytest.param(
            False,
            marks=pytest.mark.exclude_by_default,
        ),
        True,
        "mixed_1st_None",
        pytest.param(
            "mixed_2nd_None",
            marks=pytest.mark.exclude_by_default,
        ),
    ],
)
def test_reset_index_with_multi_index_drop(
    data,
    nlevels,
    level,
    multiindex_levels_names_max_levels,
    none_in_index_names,
    test_async_reset_index,
):
    test_reset_index_with_multi_index_no_drop(
        data,
        nlevels,
        True,
        level,
        "no_col_level",
        "no_col_fill",
        True,
        multiindex_levels_names_max_levels,
        none_in_index_names,
        test_async_reset_index,
    )


@pytest.mark.parametrize("test_async_reset_index", [False, True])
@pytest.mark.parametrize("index_levels_names_max_levels", [0, 1, 2])
def test_reset_index_with_named_index(
    index_levels_names_max_levels, test_async_reset_index
):
    modin_df = pd.DataFrame(test_data_values[0])
    pandas_df = pandas.DataFrame(test_data_values[0])

    index_name = (
        tuple([f"name_{j}" for j in range(0, index_levels_names_max_levels)])
        if index_levels_names_max_levels > 0
        else "NAME_OF_INDEX"
    )
    modin_df.index.name = pandas_df.index.name = index_name
    df_equals(modin_df, pandas_df)
    if test_async_reset_index:
        # The change in index is not automatically handled by Modin. See #3941.
        modin_df.index = modin_df.index
        modin_df.modin.to_pandas()

        modin_df._query_compiler.set_frame_index_cache(None)
    df_equals(modin_df.reset_index(drop=False), pandas_df.reset_index(drop=False))

    if test_async_reset_index:
        # The change in index is not automatically handled by Modin. See #3941.
        modin_df.index = modin_df.index
        modin_df.modin.to_pandas()

        modin_df._query_compiler.set_frame_index_cache(None)
    modin_df.reset_index(drop=True, inplace=True)
    pandas_df.reset_index(drop=True, inplace=True)
    df_equals(modin_df, pandas_df)

    modin_df = pd.DataFrame(test_data_values[0])
    pandas_df = pandas.DataFrame(test_data_values[0])
    modin_df.index.name = pandas_df.index.name = index_name
    if test_async_reset_index:
        # The change in index is not automatically handled by Modin. See #3941.
        modin_df.index = modin_df.index
        modin_df._to_pandas()

        modin_df._query_compiler.set_frame_index_cache(None)
    df_equals(modin_df.reset_index(drop=False), pandas_df.reset_index(drop=False))


@pytest.mark.parametrize("test_async_reset_index", [False, True])
@pytest.mark.parametrize(
    "index",
    [
        pandas.Index([11, 22, 33, 44], name="col0"),
        pandas.MultiIndex.from_product(
            [[100, 200], [300, 400]], names=["level1", "col0"]
        ),
    ],
    ids=["index", "multiindex"],
)
def test_reset_index_metadata_update(index, test_async_reset_index):
    modin_df, pandas_df = create_test_dfs({"col0": [0, 1, 2, 3]}, index=index)
    modin_df.columns = pandas_df.columns = ["col1"]
    if test_async_reset_index:
        # The change in index is not automatically handled by Modin. See #3941.
        modin_df.index = modin_df.index
        modin_df._to_pandas()

        modin_df._query_compiler.set_frame_index_cache(None)
    eval_general(modin_df, pandas_df, lambda df: df.reset_index())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
def test_sample(data, axis):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    with pytest.raises(ValueError):
        modin_df.sample(n=3, frac=0.4, axis=axis)

    with pytest.raises(KeyError):
        modin_df.sample(frac=0.5, weights="CoLuMn_No_ExIsT", axis=0)

    with pytest.raises(ValueError):
        modin_df.sample(frac=0.5, weights=modin_df.columns[0], axis=1)

    with pytest.raises(ValueError):
        modin_df.sample(
            frac=0.5, weights=[0.5 for _ in range(len(modin_df.index[:-1]))], axis=0
        )

    with pytest.raises(ValueError):
        modin_df.sample(
            frac=0.5,
            weights=[0.5 for _ in range(len(modin_df.columns[:-1]))],
            axis=1,
        )

    with pytest.raises(ValueError):
        modin_df.sample(n=-3, axis=axis)

    with pytest.raises(ValueError):
        modin_df.sample(frac=0.2, weights=pandas.Series(), axis=axis)

    if isinstance(axis, str):
        num_axis = pandas.DataFrame()._get_axis_number(axis)
    else:
        num_axis = axis

    # weights that sum to 1
    sums = sum(i % 2 for i in range(len(modin_df.axes[num_axis])))
    weights = [i % 2 / sums for i in range(len(modin_df.axes[num_axis]))]

    modin_result = modin_df.sample(
        frac=0.5, random_state=42, weights=weights, axis=axis
    )
    pandas_result = pandas_df.sample(
        frac=0.5, random_state=42, weights=weights, axis=axis
    )
    df_equals(modin_result, pandas_result)

    # weights that don't sum to 1
    weights = [i % 2 for i in range(len(modin_df.axes[num_axis]))]
    modin_result = modin_df.sample(
        frac=0.5, random_state=42, weights=weights, axis=axis
    )
    pandas_result = pandas_df.sample(
        frac=0.5, random_state=42, weights=weights, axis=axis
    )
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.sample(n=0, axis=axis)
    pandas_result = pandas_df.sample(n=0, axis=axis)
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.sample(frac=0.5, random_state=42, axis=axis)
    pandas_result = pandas_df.sample(frac=0.5, random_state=42, axis=axis)
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.sample(n=2, random_state=42, axis=axis)
    pandas_result = pandas_df.sample(n=2, random_state=42, axis=axis)
    df_equals(modin_result, pandas_result)

    # issue #1692, numpy RandomState object
    # We must create a new random state for each iteration because the values that
    # are selected will be impacted if the object has already been used.
    random_state = np.random.RandomState(42)
    modin_result = modin_df.sample(frac=0.5, random_state=random_state, axis=axis)

    random_state = np.random.RandomState(42)
    pandas_result = pandas_df.sample(frac=0.5, random_state=random_state, axis=axis)
    df_equals(modin_result, pandas_result)


def test_empty_sample():
    modin_df, pandas_df = create_test_dfs([1])
    # issue #4983
    # If we have a fraction of the dataset that results in n=0, we should
    # make sure that we don't pass in both n and frac to sample internally.
    eval_general(modin_df, pandas_df, lambda df: df.sample(frac=0.12))


def test_select_dtypes():
    frame_data = {
        "test1": list("abc"),
        "test2": np.arange(3, 6).astype("u1"),
        "test3": np.arange(8.0, 11.0, dtype="float64"),
        "test4": [True, False, True],
        "test5": pandas.date_range("now", periods=3).values,
        "test6": list(range(5, 8)),
    }
    df = pandas.DataFrame(frame_data)
    rd = pd.DataFrame(frame_data)

    include = np.float64, "integer"
    exclude = (np.bool_,)
    r = rd.select_dtypes(include=include, exclude=exclude)

    e = df[["test2", "test3", "test6"]]
    df_equals(r, e)

    r = rd.select_dtypes(include=np.bool_)
    e = df[["test4"]]
    df_equals(r, e)

    r = rd.select_dtypes(exclude=np.bool_)
    e = df[["test1", "test2", "test3", "test5", "test6"]]
    df_equals(r, e)

    try:
        pd.DataFrame().select_dtypes()
        assert False
    except ValueError:
        assert True


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("n", int_arg_values, ids=arg_keys("n", int_arg_keys))
def test_tail(data, n):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.tail(n), pandas_df.tail(n))
    df_equals(modin_df.tail(len(modin_df)), pandas_df.tail(len(pandas_df)))


def test_xs():
    # example is based on the doctest in the upstream pandas docstring
    data = {
        "num_legs": [4, 4, 2, 2],
        "num_wings": [0, 0, 2, 2],
        "class": ["mammal", "mammal", "mammal", "bird"],
        "animal": ["cat", "dog", "bat", "penguin"],
        "locomotion": ["walks", "walks", "flies", "walks"],
    }
    modin_df, pandas_df = create_test_dfs(data)

    def prepare_dataframes(df):
        # to make several partitions (only for Modin dataframe)
        df = (pd if isinstance(df, pd.DataFrame) else pandas).concat([df, df], axis=0)
        # looks like pandas is sorting the index whereas modin is not, performing a join operation.
        df = df.reset_index(drop=True)
        df = df.join(df, rsuffix="_y")
        return df.set_index(["class", "animal", "locomotion"])

    modin_df = prepare_dataframes(modin_df)
    pandas_df = prepare_dataframes(pandas_df)
    eval_general(modin_df, pandas_df, lambda df: df.xs("mammal"))
    eval_general(modin_df, pandas_df, lambda df: df.xs("cat", level=1))
    eval_general(modin_df, pandas_df, lambda df: df.xs("num_legs", axis=1))
    eval_general(
        modin_df, pandas_df, lambda df: df.xs("cat", level=1, drop_level=False)
    )
    eval_general(modin_df, pandas_df, lambda df: df.xs(("mammal", "cat")))
    eval_general(
        modin_df, pandas_df, lambda df: df.xs(("mammal", "cat"), drop_level=False)
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___getitem__(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    key = modin_df.columns[0]
    modin_col = modin_df.__getitem__(key)
    assert isinstance(modin_col, pd.Series)

    pd_col = pandas_df[key]
    df_equals(pd_col, modin_col)

    slices = [
        (None, -1),
        (-1, None),
        (1, 2),
        (1, None),
        (None, 1),
        (1, -1),
        (-3, -1),
        (1, -1, 2),
        (-1, 1, -1),
        (None, None, 2),
    ]

    # slice test
    for slice_param in slices:
        s = slice(*slice_param)
        df_equals(modin_df[s], pandas_df[s])

    # Test empty
    df_equals(pd.DataFrame([])[:10], pandas.DataFrame([])[:10])


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___getitem_bool_indexers(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    indices = [i % 3 == 0 for i in range(len(modin_df.index))]
    columns = [i % 5 == 0 for i in range(len(modin_df.columns))]

    # Key is a list of booleans
    modin_result = modin_df.loc[indices, columns]
    pandas_result = pandas_df.loc[indices, columns]
    df_equals(modin_result, pandas_result)

    # Key is a Modin or pandas series of booleans
    df_equals(
        modin_df.loc[pd.Series(indices), pd.Series(columns, index=modin_df.columns)],
        pandas_df.loc[
            pandas.Series(indices), pandas.Series(columns, index=modin_df.columns)
        ],
    )


def test_getitem_empty_mask():
    # modin-project/modin#517
    modin_frames = []
    pandas_frames = []
    data1 = np.random.randint(0, 100, size=(100, 4))
    mdf1 = pd.DataFrame(data1, columns=list("ABCD"))
    pdf1 = pandas.DataFrame(data1, columns=list("ABCD"))
    modin_frames.append(mdf1)
    pandas_frames.append(pdf1)

    data2 = np.random.randint(0, 100, size=(100, 4))
    mdf2 = pd.DataFrame(data2, columns=list("ABCD"))
    pdf2 = pandas.DataFrame(data2, columns=list("ABCD"))
    modin_frames.append(mdf2)
    pandas_frames.append(pdf2)

    data3 = np.random.randint(0, 100, size=(100, 4))
    mdf3 = pd.DataFrame(data3, columns=list("ABCD"))
    pdf3 = pandas.DataFrame(data3, columns=list("ABCD"))
    modin_frames.append(mdf3)
    pandas_frames.append(pdf3)

    modin_data = pd.concat(modin_frames)
    pandas_data = pandas.concat(pandas_frames)
    df_equals(
        modin_data[[False for _ in modin_data.index]],
        pandas_data[[False for _ in modin_data.index]],
    )


def test_getitem_datetime_slice():
    data = {"data": range(1000)}
    index = pd.date_range("2017/1/4", periods=1000)
    modin_df = pd.DataFrame(data=data, index=index)
    pandas_df = pandas.DataFrame(data=data, index=index)

    s = slice("2017-01-06", "2017-01-09")
    df_equals(modin_df[s], pandas_df[s])


def test_getitem_same_name():
    data = [
        [1, 2, 3, 4],
        [5, 6, 7, 8],
        [9, 10, 11, 12],
        [13, 14, 15, 16],
        [17, 18, 19, 20],
    ]
    columns = ["c1", "c2", "c1", "c3"]
    modin_df = pd.DataFrame(data, columns=columns)
    pandas_df = pandas.DataFrame(data, columns=columns)
    df_equals(modin_df["c1"], pandas_df["c1"])
    df_equals(modin_df["c2"], pandas_df["c2"])
    df_equals(modin_df[["c1", "c2"]], pandas_df[["c1", "c2"]])
    df_equals(modin_df["c3"], pandas_df["c3"])


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___getattr__(request, data):
    modin_df = pd.DataFrame(data)

    if "empty_data" not in request.node.name:
        key = modin_df.columns[0]
        modin_df.__getattr__(key)

        col = modin_df.__getattr__("col1")
        assert isinstance(col, pd.Series)

        col = getattr(modin_df, "col1")
        assert isinstance(col, pd.Series)

        # Check that lookup in column doesn't override other attributes
        df2 = modin_df.rename(index=str, columns={key: "columns"})
        assert isinstance(df2.columns, pandas.Index)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___setitem__(data):
    eval_setitem(*create_test_dfs(data), loc=-1, value=1)
    eval_setitem(
        *create_test_dfs(data), loc=-1, value=lambda df: type(df)(df[df.columns[0]])
    )

    nrows = len(data[list(data.keys())[0]])
    arr = np.arange(nrows * 2).reshape(-1, 2)

    eval_setitem(*create_test_dfs(data), loc=-1, value=arr)
    eval_setitem(*create_test_dfs(data), col="___NON EXISTENT COLUMN", value=arr.T[0])
    eval_setitem(*create_test_dfs(data), loc=0, value=np.arange(nrows))

    modin_df = pd.DataFrame(columns=data.keys())
    pandas_df = pandas.DataFrame(columns=data.keys())

    for col in modin_df.columns:
        modin_df[col] = np.arange(1000)

    for col in pandas_df.columns:
        pandas_df[col] = np.arange(1000)

    df_equals(modin_df, pandas_df)

    # Test df assignment to a columns selection
    modin_df[modin_df.columns[[0, -1]]] = modin_df[modin_df.columns[[0, -1]]]
    pandas_df[pandas_df.columns[[0, -1]]] = pandas_df[pandas_df.columns[[0, -1]]]
    df_equals(modin_df, pandas_df)

    # Test series assignment to column
    modin_df = pd.DataFrame(columns=modin_df.columns)
    pandas_df = pandas.DataFrame(columns=pandas_df.columns)
    modin_df[modin_df.columns[-1]] = modin_df[modin_df.columns[0]]
    pandas_df[pandas_df.columns[-1]] = pandas_df[pandas_df.columns[0]]
    df_equals(modin_df, pandas_df)

    if not sys.version_info.major == 3 and sys.version_info.minor > 6:
        # This test doesn't work correctly on Python 3.6
        # Test 2d ndarray assignment to column
        modin_df = pd.DataFrame(data)
        pandas_df = pandas.DataFrame(data)
        modin_df["new_col"] = modin_df[[modin_df.columns[0]]].values
        pandas_df["new_col"] = pandas_df[[pandas_df.columns[0]]].values
        df_equals(modin_df, pandas_df)
        assert isinstance(modin_df["new_col"][0], type(pandas_df["new_col"][0]))

    modin_df[1:5] = 10
    pandas_df[1:5] = 10
    df_equals(modin_df, pandas_df)

    # Transpose test
    modin_df = pd.DataFrame(data).T
    pandas_df = pandas.DataFrame(data).T

    modin_df[modin_df.columns[0]] = 0
    pandas_df[pandas_df.columns[0]] = 0
    df_equals(modin_df, pandas_df)

    modin_df.columns = [str(i) for i in modin_df.columns]
    pandas_df.columns = [str(i) for i in pandas_df.columns]

    modin_df[modin_df.columns[0]] = 0
    pandas_df[pandas_df.columns[0]] = 0

    df_equals(modin_df, pandas_df)

    modin_df[modin_df.columns[0]][modin_df.index[0]] = 12345
    pandas_df[pandas_df.columns[0]][pandas_df.index[0]] = 12345
    df_equals(modin_df, pandas_df)

    modin_df[1:5] = 10
    pandas_df[1:5] = 10
    df_equals(modin_df, pandas_df)


def test___setitem__partitions_aligning():
    # from issue #2390
    modin_df = pd.DataFrame({"a": [1, 2, 3]})
    pandas_df = pandas.DataFrame({"a": [1, 2, 3]})
    modin_df["b"] = pd.Series([4, 5, 6, 7, 8])
    pandas_df["b"] = pandas.Series([4, 5, 6, 7, 8])
    df_equals(modin_df, pandas_df)

    # from issue #2442
    data = {"a": [1, 2, 3, 4]}
    # Index with duplicated timestamp
    index = pandas.to_datetime(["2020-02-06", "2020-02-06", "2020-02-22", "2020-03-26"])

    md_df, pd_df = create_test_dfs(data, index=index)
    # Setting new column
    pd_df["b"] = pandas.Series(np.arange(4))
    md_df["b"] = pd.Series(np.arange(4))
    df_equals(md_df, pd_df)

    # Setting existing column
    pd_df["b"] = pandas.Series(np.arange(4))
    md_df["b"] = pd.Series(np.arange(4))
    df_equals(md_df, pd_df)

    pd_df["a"] = pandas.Series(np.arange(4))
    md_df["a"] = pd.Series(np.arange(4))
    df_equals(md_df, pd_df)


def test___setitem__with_mismatched_partitions():
    with ensure_clean(".csv") as fname:
        np.savetxt(fname, np.random.randint(0, 100, size=(200_000, 99)), delimiter=",")
        modin_df = pd.read_csv(fname)
        pandas_df = pandas.read_csv(fname)
        modin_df["new"] = pd.Series(list(range(len(modin_df))))
        pandas_df["new"] = pandas.Series(list(range(len(pandas_df))))
        df_equals(modin_df, pandas_df)


def test___setitem__mask():
    # DataFrame mask:
    data = test_data["int_data"]
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    mean = int((RAND_HIGH + RAND_LOW) / 2)
    pandas_df[pandas_df > mean] = -50
    modin_df[modin_df > mean] = -50

    df_equals(modin_df, pandas_df)

    # Array mask:
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)
    array = (pandas_df > mean).to_numpy()

    modin_df[array] = -50
    pandas_df[array] = -50

    df_equals(modin_df, pandas_df)

    # Array mask of wrong size:
    with pytest.raises(ValueError):
        array = np.array([[1, 2], [3, 4]])
        modin_df[array] = 20


@pytest.mark.parametrize(
    "data",
    [
        {},
        {"id": [], "max_speed": [], "health": []},
        {"id": [1], "max_speed": [2], "health": [3]},
        {"id": [4, 40, 400], "max_speed": [111, 222, 333], "health": [33, 22, 11]},
    ],
    ids=["empty_frame", "empty_cols", "1_length_cols", "2_length_cols"],
)
@pytest.mark.parametrize(
    "value",
    [[11, 22], [11, 22, 33]],
    ids=["2_length_val", "3_length_val"],
)
@pytest.mark.parametrize("convert_to_series", [False, True])
@pytest.mark.parametrize("new_col_id", [123, "new_col"], ids=["integer", "string"])
def test_setitem_on_empty_df(data, value, convert_to_series, new_col_id):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    def applyier(df):
        if convert_to_series:
            converted_value = (
                pandas.Series(value)
                if isinstance(df, pandas.DataFrame)
                else pd.Series(value)
            )
        else:
            converted_value = value
        df[new_col_id] = converted_value
        return df

    expected_exception = None
    if not convert_to_series:
        values_length = len(value)
        index_length = len(pandas_df.index)
        expected_exception = ValueError(
            f"Length of values ({values_length}) does not match length of index ({index_length})"
        )

    eval_general(
        modin_df,
        pandas_df,
        applyier,
        expected_exception=expected_exception,
        __inplace__=True,
    )
    # Because of https://github.com/modin-project/modin/issues/7600,
    # df_equals does not check dtypes equality for empty frames.
    assert_dtypes_equal(modin_df, pandas_df)


def test_setitem_on_empty_df_4407():
    data = {}
    index = pd.date_range(end="1/1/2018", periods=0, freq="D")
    column = pd.date_range(end="1/1/2018", periods=1, freq="h")[0]
    modin_df = pd.DataFrame(data, columns=index)
    pandas_df = pandas.DataFrame(data, columns=index)

    modin_df[column] = pd.Series([1])
    pandas_df[column] = pandas.Series([1])

    df_equals(modin_df, pandas_df)
    assert modin_df.columns.freq == pandas_df.columns.freq


def test_setitem_on_empty_df_does_not_change_other_dtypes_5961():
    def _do_setitem(df):
        df["col0"] = df["col0"].astype(float)

    modin_df, pandas_df = create_test_dfs(pandas.DataFrame(columns=["col0", "col1"]))

    _do_setitem(modin_df)
    _do_setitem(pandas_df)
    # Because of  https://github.com/modin-project/modin/issues/7600, we cannot
    # use df_equals to check dtypes equality.
    assert_dtypes_equal(modin_df, pandas_df)


def test___setitem__unhashable_list():
    # from #3258 and #3291
    cols = ["a", "b"]
    modin_df = pd.DataFrame([[0, 0]], columns=cols)
    modin_df[cols] = modin_df[cols]
    pandas_df = pandas.DataFrame([[0, 0]], columns=cols)
    pandas_df[cols] = pandas_df[cols]
    df_equals(modin_df, pandas_df)


def test_setitem_unhashable_key():
    source_modin_df, source_pandas_df = create_test_dfs(test_data["float_nan_data"])
    row_count = source_modin_df.shape[0]

    def _make_copy(df1, df2):
        return df1.copy(deep=True), df2.copy(deep=True)

    for key in (["col1", "col2"], ["new_col1", "new_col2"]):
        # 1d list case
        value = [1, 2]
        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)
        eval_setitem(modin_df, pandas_df, value, key)

        # 2d list case
        value = [[1, 2]] * row_count
        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)
        eval_setitem(modin_df, pandas_df, value, key)

        # pandas DataFrame case
        df_value = pandas.DataFrame(value, columns=["value_col1", "value_col2"])
        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)
        eval_setitem(modin_df, pandas_df, df_value, key)

        # numpy array case
        value = df_value.to_numpy()
        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)
        eval_setitem(modin_df, pandas_df, value, key)

        # pandas Series case
        value = df_value["value_col1"]
        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)
        eval_setitem(
            modin_df,
            pandas_df,
            value,
            key[:1],
            expected_exception=ValueError("Columns must be same length as key"),
        )

        # pandas Index case
        value = df_value.index
        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)
        eval_setitem(
            modin_df,
            pandas_df,
            value,
            key[:1],
            expected_exception=ValueError("Columns must be same length as key"),
        )

        # scalar case
        value = 3
        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)
        eval_setitem(modin_df, pandas_df, value, key)

        # test failed case: ValueError('Columns must be same length as key')
        eval_setitem(
            modin_df,
            pandas_df,
            df_value[["value_col1"]],
            key,
            expected_exception=ValueError("Columns must be same length as key"),
        )


def test_setitem_2d_insertion():
    def build_value_picker(modin_value, pandas_value):
        """Build a function that returns either Modin or pandas DataFrame depending on the passed frame."""
        return lambda source_df, *args, **kwargs: (
            modin_value
            if isinstance(source_df, (pd.DataFrame, pd.Series))
            else pandas_value
        )

    modin_df, pandas_df = create_test_dfs(test_data["int_data"])

    # Easy case - key and value.columns are equal
    modin_value, pandas_value = create_test_dfs(
        {"new_value1": np.arange(len(modin_df)), "new_value2": np.arange(len(modin_df))}
    )
    eval_setitem(
        modin_df,
        pandas_df,
        build_value_picker(modin_value, pandas_value),
        col=["new_value1", "new_value2"],
    )

    # Key and value.columns have equal values but in different order
    new_columns = ["new_value3", "new_value4"]
    modin_value.columns, pandas_value.columns = new_columns, new_columns
    eval_setitem(
        modin_df,
        pandas_df,
        build_value_picker(modin_value, pandas_value),
        col=["new_value4", "new_value3"],
    )

    # Key and value.columns have different values
    new_columns = ["new_value5", "new_value6"]
    modin_value.columns, pandas_value.columns = new_columns, new_columns
    eval_setitem(
        modin_df,
        pandas_df,
        build_value_picker(modin_value, pandas_value),
        col=["__new_value5", "__new_value6"],
    )

    # Key and value.columns have different lengths, testing that both raise the same exception
    eval_setitem(
        modin_df,
        pandas_df,
        build_value_picker(modin_value.iloc[:, [0]], pandas_value.iloc[:, [0]]),
        col=["new_value7", "new_value8"],
        expected_exception=ValueError("Columns must be same length as key"),
    )


@pytest.mark.parametrize("does_value_have_different_columns", [True, False])
def test_setitem_2d_update(does_value_have_different_columns):
    def test(dfs, iloc):
        """Update columns on the given numeric indices."""
        df1, df2 = dfs
        cols1 = df1.columns[iloc].tolist()
        cols2 = df2.columns[iloc].tolist()
        df1[cols1] = df2[cols2]
        return df1

    modin_df, pandas_df = create_test_dfs(test_data["int_data"])
    modin_df2, pandas_df2 = create_test_dfs(test_data["int_data"])
    modin_df2 *= 10
    pandas_df2 *= 10

    if does_value_have_different_columns:
        new_columns = [f"{col}_new" for col in modin_df.columns]
        modin_df2.columns = new_columns
        pandas_df2.columns = new_columns

    modin_dfs = (modin_df, modin_df2)
    pandas_dfs = (pandas_df, pandas_df2)

    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 2])
    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, -1])
    eval_general(
        modin_dfs, pandas_dfs, test, iloc=slice(1, None)
    )  # (start=1, stop=None)
    eval_general(
        modin_dfs, pandas_dfs, test, iloc=slice(None, -2)
    )  # (start=None, stop=-2)
    eval_general(
        modin_dfs,
        pandas_dfs,
        test,
        iloc=[0, 1, 5, 6, 9, 10, -2, -1],
    )
    eval_general(
        modin_dfs,
        pandas_dfs,
        test,
        iloc=[5, 4, 0, 10, 1, -1],
    )
    eval_general(
        modin_dfs, pandas_dfs, test, iloc=slice(None, None, 2)
    )  # (start=None, stop=None, step=2)


def test___setitem__single_item_in_series():
    # Test assigning a single item in a Series for issue
    # https://github.com/modin-project/modin/issues/3860
    modin_series = pd.Series(99)
    pandas_series = pandas.Series(99)
    modin_series[:1] = pd.Series(100)
    pandas_series[:1] = pandas.Series(100)
    df_equals(modin_series, pandas_series)


def test___setitem__assigning_single_categorical_sets_correct_dtypes():
    # This test case comes from
    # https://github.com/modin-project/modin/issues/3895
    modin_df = pd.DataFrame({"categories": ["A"]})
    modin_df["categories"] = pd.Categorical(["A"])
    pandas_df = pandas.DataFrame({"categories": ["A"]})
    pandas_df["categories"] = pandas.Categorical(["A"])
    df_equals(modin_df, pandas_df)


def test_iloc_assigning_scalar_none_to_string_frame():
    # This test case comes from
    # https://github.com/modin-project/modin/issues/3981
    data = [["A"]]
    modin_df = pd.DataFrame(data, dtype="string")
    modin_df.iloc[0, 0] = None
    pandas_df = pandas.DataFrame(data, dtype="string")
    pandas_df.iloc[0, 0] = None
    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize(
    "value",
    [
        1,
        np.int32(1),
        1.0,
        "str val",
        pandas.Timestamp("1/4/2018"),
        np.datetime64(0, "ms"),
        True,
    ],
)
def test_loc_boolean_assignment_scalar_dtypes(value):
    modin_df, pandas_df = create_test_dfs(
        {
            "a": [1, 2, 3],
            "b": [3.0, 5.0, 6.0],
            "c": ["a", "b", "c"],
            "d": [1.0, "c", 2.0],
            "e": pandas.to_datetime(["1/1/2018", "1/2/2018", "1/3/2018"]),
            "f": [True, False, True],
        }
    )
    modin_idx, pandas_idx = pd.Series([False, True, True]), pandas.Series(
        [False, True, True]
    )

    modin_df.loc[modin_idx] = value
    pandas_df.loc[pandas_idx] = value
    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___len__(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    assert len(modin_df) == len(pandas_df)


def test_index_order():
    # see #1708 and #1869 for details
    df_modin, df_pandas = (
        pd.DataFrame(test_data["float_nan_data"]),
        pandas.DataFrame(test_data["float_nan_data"]),
    )
    rows_number = len(df_modin.index)
    level_0 = np.random.choice([x for x in range(10)], rows_number)
    level_1 = np.random.choice([x for x in range(10)], rows_number)
    index = pandas.MultiIndex.from_arrays([level_0, level_1])

    df_modin.index = index
    df_pandas.index = index

    for func in ["all", "any", "count"]:
        df_equals(
            getattr(df_modin, func)().index,
            getattr(df_pandas, func)().index,
        )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("sortorder", [0, 3, 5])
def test_multiindex_from_frame(data, sortorder):
    modin_df, pandas_df = create_test_dfs(data)

    def call_from_frame(df):
        if type(df).__module__.startswith("pandas"):
            return pandas.MultiIndex.from_frame(df, sortorder)
        else:
            return pd.MultiIndex.from_frame(df, sortorder)

    eval_general(modin_df, pandas_df, call_from_frame, comparator=assert_index_equal)


def test__getitem_bool_single_row_dataframe():
    # This test case comes from
    # https://github.com/modin-project/modin/issues/4845
    eval_general(pd, pandas, lambda lib: lib.DataFrame([1])[lib.Series([True])])


def test__getitem_bool_with_empty_partition():
    # This test case comes from
    # https://github.com/modin-project/modin/issues/5188

    size = MinRowPartitionSize.get()

    pandas_series = pandas.Series([True if i % 2 else False for i in range(size)])
    modin_series = pd.Series(pandas_series)

    pandas_df = pandas.DataFrame([i for i in range(size + 1)])
    pandas_df.iloc[size] = np.nan
    modin_df = pd.DataFrame(pandas_df)

    pandas_tmp_result = pandas_df.dropna()
    modin_tmp_result = modin_df.dropna()

    eval_general(
        modin_tmp_result,
        pandas_tmp_result,
        lambda df: (
            df[modin_series] if isinstance(df, pd.DataFrame) else df[pandas_series]
        ),
    )


# This is a very subtle bug that comes from:
# https://github.com/modin-project/modin/issues/4945
def test_lazy_eval_index():
    modin_df, pandas_df = create_test_dfs({"col0": [0, 1]})

    def func(df):
        df_copy = df[df["col0"] < 6].copy()
        # The problem here is that the index is not copied over so it needs
        # to get recomputed at some point. Our implementation of __setitem__
        # requires us to build a mask and insert the value from the right
        # handside into the new DataFrame. However, it's possible that we
        # won't have any new partitions, so we will end up computing an empty
        # index.
        df_copy["col0"] = df_copy["col0"].apply(lambda x: x + 1)
        return df_copy

    eval_general(modin_df, pandas_df, func)


def test_index_of_empty_frame():
    # Test on an empty frame created by user
    md_df, pd_df = create_test_dfs(
        {}, index=pandas.Index([], name="index name"), columns=["a", "b"]
    )
    assert md_df.empty and pd_df.empty
    df_equals(md_df.index, pd_df.index)

    # Test on an empty frame produced by Modin's logic
    data = test_data_values[0]
    md_df, pd_df = create_test_dfs(
        data, index=pandas.RangeIndex(len(next(iter(data.values()))), name="index name")
    )

    md_res = md_df.query(f"{md_df.columns[0]} > {RAND_HIGH}")
    pd_res = pd_df.query(f"{pd_df.columns[0]} > {RAND_HIGH}")

    assert md_res.empty and pd_res.empty
    df_equals(md_res.index, pd_res.index)


# https://github.com/modin-project/modin/issues/7405
@pytest.mark.parametrize("indexer", ["loc", "iloc"])
def test_loc_and_iloc_set_order(indexer):
    rng = np.random.default_rng(seed=0)
    is_loc = indexer == "loc"
    data = {"col": rng.integers(0, 100, size=100)}
    set_count = 20
    # Pick a bunch of unsorted row indices; may contain repeat values.
    row_indexer = rng.integers(0, 100, size=set_count)
    col_indexer = "col" if is_loc else 0
    set_data = range(100, 100 + set_count)
    md_df, pd_df = create_test_dfs(data)

    def get_helper(df):
        if is_loc:
            return df.loc[row_indexer, col_indexer]
        else:
            return df.iloc[row_indexer, col_indexer]

    # First, ensure loc/iloc read succeeds.
    eval_general(md_df, pd_df, get_helper)

    def set_helper(df):
        if is_loc:
            df.loc[row_indexer, col_indexer] = set_data
        else:
            df.iloc[row_indexer, col_indexer] = set_data

    # Second, check results of loc/iloc write.
    eval_general(
        md_df,
        pd_df,
        set_helper,
        __inplace__=True,
    )
    # Finally, check the result of a loc/iloc read again.
    eval_general(md_df, pd_df, get_helper)


def test_iloc_set_negative_index():
    rng = np.random.default_rng(seed=0)
    row_count = 50
    col_count = 80
    data = {f"col_{i}": rng.integers(0, 100, size=row_count) for i in range(col_count)}
    row_set_count = 20
    col_set_count = 30
    # Pick a bunch of unsorted row indices; may contain repeat values and negative numbers.
    row_indexer = rng.integers(-row_count, row_count, size=row_set_count)
    col_indexer = rng.integers(-col_count, col_count, size=col_set_count)
    set_data = np.reshape(
        range(100, 100 + row_set_count * col_set_count), (row_set_count, col_set_count)
    )
    md_df, pd_df = create_test_dfs(data)

    def get_helper(df):
        return df.iloc[row_indexer, col_indexer]

    # First, ensure loc/iloc read succeeds.
    eval_general(md_df, pd_df, get_helper)

    def set_helper(df):
        df.iloc[row_indexer, col_indexer] = set_data

    # Second, check results of loc/iloc write.
    eval_general(
        md_df,
        pd_df,
        set_helper,
        __inplace__=True,
    )
    # Finally, check the result of a loc/iloc read again.
    eval_general(md_df, pd_df, get_helper)


================================================
FILE: modin/tests/pandas/dataframe/test_iter.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import io
import warnings

import matplotlib
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions
from modin.pandas.utils import SET_DATAFRAME_ATTRIBUTE_WARNING
from modin.tests.pandas.utils import (
    RAND_HIGH,
    RAND_LOW,
    create_test_dfs,
    df_equals,
    eval_general,
    random_state,
    test_data,
    test_data_keys,
    test_data_values,
)
from modin.tests.test_utils import (
    current_execution_is_native,
    warns_that_defaulting_to_pandas_if,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")


@pytest.mark.parametrize("method", ["items", "iterrows"])
def test_items_iterrows(method):
    data = test_data["float_nan_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    for modin_item, pandas_item in zip(
        getattr(modin_df, method)(), getattr(pandas_df, method)()
    ):
        modin_index, modin_series = modin_item
        pandas_index, pandas_series = pandas_item
        df_equals(pandas_series, modin_series)
        assert pandas_index == modin_index


@pytest.mark.parametrize("name", [None, "NotPandas"])
def test_itertuples_name(name):
    data = test_data["float_nan_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    modin_it_custom = modin_df.itertuples(name=name)
    pandas_it_custom = pandas_df.itertuples(name=name)
    for modin_row, pandas_row in zip(modin_it_custom, pandas_it_custom):
        np.testing.assert_equal(modin_row, pandas_row)


def test_itertuples_multiindex():
    data = test_data["int_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    new_idx = pd.MultiIndex.from_tuples(
        [(i // 4, i // 2, i) for i in range(len(modin_df.columns))]
    )
    modin_df.columns = new_idx
    pandas_df.columns = new_idx
    modin_it_custom = modin_df.itertuples()
    pandas_it_custom = pandas_df.itertuples()
    for modin_row, pandas_row in zip(modin_it_custom, pandas_it_custom):
        np.testing.assert_equal(modin_row, pandas_row)


def test___iter__():
    modin_df = pd.DataFrame(test_data_values[0])
    pandas_df = pandas.DataFrame(test_data_values[0])

    modin_iterator = modin_df.__iter__()

    # Check that modin_iterator implements the iterator interface
    assert hasattr(modin_iterator, "__iter__")
    assert hasattr(modin_iterator, "next") or hasattr(modin_iterator, "__next__")

    pd_iterator = pandas_df.__iter__()
    assert list(modin_iterator) == list(pd_iterator)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___contains__(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    result = False
    key = "Not Exist"
    assert result == modin_df.__contains__(key)
    assert result == (key in modin_df)

    if "empty_data" not in request.node.name:
        result = True
        key = pandas_df.columns[0]
        assert result == modin_df.__contains__(key)
        assert result == (key in modin_df)


@pytest.mark.parametrize("expand_frame_repr", [False, True])
@pytest.mark.parametrize(
    "max_rows_columns",
    [(5, 5), (10, 10), (50, 50), (51, 51), (52, 52), (75, 75), (None, None)],
)
@pytest.mark.parametrize("frame_size", [101, 102])
def test_display_options_for___repr__(max_rows_columns, expand_frame_repr, frame_size):
    frame_data = random_state.randint(
        RAND_LOW, RAND_HIGH, size=(frame_size, frame_size)
    )
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)

    context_arg = [
        "display.max_rows",
        max_rows_columns[0],
        "display.max_columns",
        max_rows_columns[1],
        "display.expand_frame_repr",
        expand_frame_repr,
    ]
    with pd.option_context(*context_arg):
        modin_df_repr = repr(modin_df)
    with pandas.option_context(*context_arg):
        pandas_df_repr = repr(pandas_df)
    assert modin_df_repr == pandas_df_repr


def test___finalize__():
    data = test_data_values[0]
    # NOTE: __finalize__() defaults to pandas at the API layer.
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.DataFrame(data).__finalize__(None)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___copy__(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    modin_df_copy, pandas_df_copy = modin_df.__copy__(), pandas_df.__copy__()
    df_equals(modin_df_copy, pandas_df_copy)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___deepcopy__(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    modin_df_copy, pandas_df_copy = (
        modin_df.__deepcopy__(),
        pandas_df.__deepcopy__(),
    )
    df_equals(modin_df_copy, pandas_df_copy)


def test___repr__():
    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(1000, 100))
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    assert repr(pandas_df) == repr(modin_df)

    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(1000, 99))
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    assert repr(pandas_df) == repr(modin_df)

    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(1000, 101))
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    assert repr(pandas_df) == repr(modin_df)

    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(1000, 102))
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    assert repr(pandas_df) == repr(modin_df)

    # ___repr___ method has a different code path depending on
    # whether the number of rows is >60; and a different code path
    # depending on the number of columns is >20.
    # Previous test cases already check the case when cols>20
    # and rows>60. The cases that follow exercise the other three
    # combinations.
    # rows <= 60, cols > 20
    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(10, 100))
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)

    assert repr(pandas_df) == repr(modin_df)

    # rows <= 60, cols <= 20
    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(10, 10))
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)

    assert repr(pandas_df) == repr(modin_df)

    # rows > 60, cols <= 20
    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(100, 10))
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)

    assert repr(pandas_df) == repr(modin_df)

    # Empty
    pandas_df = pandas.DataFrame(columns=["col{}".format(i) for i in range(100)])
    modin_df = pd.DataFrame(columns=["col{}".format(i) for i in range(100)])

    assert repr(pandas_df) == repr(modin_df)

    # From Issue #1705
    string_data = """"time","device_id","lat","lng","accuracy","activity_1","activity_1_conf","activity_2","activity_2_conf","activity_3","activity_3_conf"
"2016-08-26 09:00:00.206",2,60.186805,24.821049,33.6080017089844,"STILL",75,"IN_VEHICLE",5,"ON_BICYCLE",5
"2016-08-26 09:00:05.428",5,60.192928,24.767222,5,"WALKING",62,"ON_BICYCLE",29,"RUNNING",6
"2016-08-26 09:00:05.818",1,60.166382,24.700443,3,"WALKING",75,"IN_VEHICLE",5,"ON_BICYCLE",5
"2016-08-26 09:00:15.816",1,60.166254,24.700671,3,"WALKING",75,"IN_VEHICLE",5,"ON_BICYCLE",5
"2016-08-26 09:00:16.413",5,60.193055,24.767427,5,"WALKING",85,"ON_BICYCLE",15,"UNKNOWN",0
"2016-08-26 09:00:20.578",3,60.152996,24.745216,3.90000009536743,"STILL",69,"IN_VEHICLE",31,"UNKNOWN",0"""
    pandas_df = pandas.read_csv(io.StringIO(string_data))
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_df = pd.read_csv(io.StringIO(string_data))
    assert repr(pandas_df) == repr(modin_df)


def test___repr__does_not_raise_attribute_column_warning():
    # See https://github.com/modin-project/modin/issues/5380
    df = pd.DataFrame([1])
    with warnings.catch_warnings():
        warnings.filterwarnings(action="error", message=SET_DATAFRAME_ATTRIBUTE_WARNING)
        repr(df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_inplace_series_ops(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    if len(modin_df.columns) > len(pandas_df.columns):
        col0 = modin_df.columns[0]
        col1 = modin_df.columns[1]
        pandas_df[col1].dropna(inplace=True)
        modin_df[col1].dropna(inplace=True)
        df_equals(modin_df, pandas_df)

        pandas_df[col0].fillna(0, inplace=True)
        modin_df[col0].fillna(0, inplace=True)
        df_equals(modin_df, pandas_df)


# Note: Tests setting an attribute that is not an existing column label
def test___setattr__not_column():
    pandas_df = pandas.DataFrame([1, 2, 3])
    modin_df = pd.DataFrame([1, 2, 3])

    pandas_df.new_col = [4, 5, 6]
    modin_df.new_col = [4, 5, 6]

    df_equals(modin_df, pandas_df)

    # While `new_col` is not a column of the dataframe,
    # it should be accessible with __getattr__.
    assert modin_df.new_col == pandas_df.new_col


def test___setattr__mutating_column():
    # Use case from issue #4577
    pandas_df = pandas.DataFrame([[1]], columns=["col0"])
    modin_df = pd.DataFrame([[1]], columns=["col0"])

    # Replacing a column with a list should mutate the column in place.
    pandas_df.col0 = [3]
    modin_df.col0 = [3]

    df_equals(modin_df, pandas_df)
    # Check that the col0 attribute reflects the value update.
    df_equals(modin_df.col0, pandas_df.col0)

    pandas_df.col0 = pandas.Series([5])
    modin_df.col0 = pd.Series([5])

    # Check that the col0 attribute reflects this update
    df_equals(modin_df, pandas_df)

    pandas_df.loc[0, "col0"] = 4
    modin_df.loc[0, "col0"] = 4

    # Check that the col0 attribute reflects update via loc
    df_equals(modin_df, pandas_df)
    assert modin_df.col0.equals(modin_df["col0"])

    # Check that attempting to add a new col via attributes raises warning
    # and adds the provided list as a new attribute and not a column.
    with pytest.warns(
        UserWarning,
        match=SET_DATAFRAME_ATTRIBUTE_WARNING,
    ):
        modin_df.col1 = [4]

    with warnings.catch_warnings():
        warnings.filterwarnings(
            action="error",
            message=SET_DATAFRAME_ATTRIBUTE_WARNING,
        )
        modin_df.col1 = [5]
        modin_df.new_attr = 6
        modin_df.col0 = 7

    assert "new_attr" in dir(
        modin_df
    ), "Modin attribute was not correctly added to the df."
    assert (
        "new_attr" not in modin_df
    ), "New attribute was not correctly added to columns."
    assert modin_df.new_attr == 6, "Modin attribute value was set incorrectly."
    assert isinstance(
        modin_df.col0, pd.Series
    ), "Scalar was not broadcasted properly to an existing column."


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_isin(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    val = [1, 2, 3, 4]
    pandas_result = pandas_df.isin(val)
    modin_result = modin_df.isin(val)

    df_equals(modin_result, pandas_result)


def test_isin_with_modin_objects():
    modin_df1, pandas_df1 = create_test_dfs({"a": [1, 2], "b": [3, 4]})
    modin_series, pandas_series = pd.Series([1, 4, 5, 6]), pandas.Series([1, 4, 5, 6])

    eval_general(
        (modin_df1, modin_series),
        (pandas_df1, pandas_series),
        lambda srs: srs[0].isin(srs[1]),
    )

    modin_df2 = modin_series.to_frame("a")
    pandas_df2 = pandas_series.to_frame("a")

    eval_general(
        (modin_df1, modin_df2),
        (pandas_df1, pandas_df2),
        lambda srs: srs[0].isin(srs[1]),
    )

    # Check case when indices are not matching
    modin_df1, pandas_df1 = create_test_dfs({"a": [1, 2], "b": [3, 4]}, index=[10, 11])

    eval_general(
        (modin_df1, modin_series),
        (pandas_df1, pandas_series),
        lambda srs: srs[0].isin(srs[1]),
    )
    eval_general(
        (modin_df1, modin_df2),
        (pandas_df1, pandas_df2),
        lambda srs: srs[0].isin(srs[1]),
    )


================================================
FILE: modin/tests/pandas/dataframe/test_join_sort.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import warnings

import matplotlib
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import Engine, NPartitions, StorageFormat
from modin.pandas.io import to_pandas
from modin.tests.pandas.utils import (
    arg_keys,
    axis_keys,
    axis_values,
    bool_arg_keys,
    bool_arg_values,
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    generate_multiindex,
    random_state,
    rotate_decimal_digits_or_symbols,
    test_data,
    test_data_keys,
    test_data_values,
)
from modin.tests.test_utils import (
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)

# Initialize env for storage format detection in @pytest.mark.*
pd.DataFrame()


def df_equals_and_sort(df1, df2):
    """Sort dataframe's rows and run ``df_equals()`` for them."""
    df1 = df1.sort_values(by=df1.columns.tolist(), ignore_index=True)
    df2 = df2.sort_values(by=df2.columns.tolist(), ignore_index=True)
    df_equals(df1, df2)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_combine(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    modin_df.combine(modin_df + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2)
    pandas_df.combine(
        pandas_df + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2
    )


@pytest.mark.parametrize(
    "test_data, test_data2",
    [
        (
            np.random.randint(0, 100, size=(64, 64)),
            np.random.randint(0, 100, size=(128, 64)),
        ),
        (
            np.random.randint(0, 100, size=(128, 64)),
            np.random.randint(0, 100, size=(64, 64)),
        ),
        (
            np.random.randint(0, 100, size=(64, 64)),
            np.random.randint(0, 100, size=(64, 128)),
        ),
        (
            np.random.randint(0, 100, size=(64, 128)),
            np.random.randint(0, 100, size=(64, 64)),
        ),
    ],
)
def test_join(test_data, test_data2):
    modin_df = pd.DataFrame(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"),
    )
    pandas_df = pandas.DataFrame(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pandas.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"),
    )
    modin_df2 = pd.DataFrame(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"),
    )
    pandas_df2 = pandas.DataFrame(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pandas.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"),
    )

    hows = ["inner", "left", "right", "outer"]
    ons = ["col33", "col34"]
    sorts = [False, True]
    assert len(ons) == len(sorts), "the loop below is designed for this condition"
    for i in range(len(hows)):
        for j in range(len(ons)):
            modin_result = modin_df.join(
                modin_df2,
                how=hows[i],
                on=ons[j],
                sort=sorts[j],
                lsuffix="_caller",
                rsuffix="_other",
            )
            pandas_result = pandas_df.join(
                pandas_df2,
                how=hows[i],
                on=ons[j],
                sort=sorts[j],
                lsuffix="_caller",
                rsuffix="_other",
            )
            if sorts[j]:
                # sorting in `join` is implemented through range partitioning technique
                # therefore the order of the rows after it does not match the pandas,
                # so additional sorting is needed in order to get the same result as for pandas
                df_equals_and_sort(modin_result, pandas_result)
            else:
                df_equals(modin_result, pandas_result)

    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 0, 1],
        "col4": [2, 4, 5, 6],
    }

    modin_df = pd.DataFrame(frame_data)
    pandas_df = pandas.DataFrame(frame_data)

    frame_data2 = {"col5": [0], "col6": [1]}
    modin_df2 = pd.DataFrame(frame_data2)
    pandas_df2 = pandas.DataFrame(frame_data2)

    join_types = ["left", "right", "outer", "inner"]
    for how in join_types:
        modin_join = modin_df.join(modin_df2, how=how)
        pandas_join = pandas_df.join(pandas_df2, how=how)
        df_equals(modin_join, pandas_join)

    frame_data3 = {"col7": [1, 2, 3, 5, 6, 7, 8]}

    modin_df3 = pd.DataFrame(frame_data3)
    pandas_df3 = pandas.DataFrame(frame_data3)

    join_types = ["left", "outer", "inner"]
    for how in join_types:
        modin_join = modin_df.join([modin_df2, modin_df3], how=how)
        pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)
        df_equals(modin_join, pandas_join)


@pytest.mark.parametrize("how", ["left", "inner", "right"])
def test_join_empty(how):
    data = np.random.randint(0, 100, size=(64, 64))
    eval_general(
        *create_test_dfs(data),
        lambda df: df.join(df.iloc[:0], on=1, how=how, lsuffix="_caller"),
    )


def test_join_cross_6786():
    data = [[7, 8, 9], [10, 11, 12]]
    modin_df, pandas_df = create_test_dfs(data, columns=["x", "y", "z"])

    modin_join = modin_df.join(
        modin_df[["x"]].set_axis(["p", "q"], axis=0), how="cross", lsuffix="p"
    )
    pandas_join = pandas_df.join(
        pandas_df[["x"]].set_axis(["p", "q"], axis=0), how="cross", lsuffix="p"
    )
    df_equals(modin_join, pandas_join)


def test_join_5203():
    data = np.ones([2, 4])
    kwargs = {"columns": ["a", "b", "c", "d"]}
    modin_dfs, pandas_dfs = [None] * 3, [None] * 3
    for idx in range(len(modin_dfs)):
        modin_dfs[idx], pandas_dfs[idx] = create_test_dfs(data, **kwargs)

    for dfs in (modin_dfs, pandas_dfs):
        with pytest.raises(
            ValueError,
            match="Joining multiple DataFrames only supported for joining on index",
        ):
            dfs[0].join([dfs[1], dfs[2]], how="inner", on="a")


def test_join_6602():
    abbreviations = pd.Series(
        ["Major League Baseball", "National Basketball Association"],
        index=["MLB", "NBA"],
    )
    teams = pd.DataFrame(
        {
            "name": ["Mariners", "Lakers"] * 50,
            "league_abbreviation": ["MLB", "NBA"] * 50,
        }
    )

    with warnings.catch_warnings():
        # check that join doesn't show UserWarning
        warnings.filterwarnings(
            "error", "Distributing <class 'dict'> object", category=UserWarning
        )
        teams.set_index("league_abbreviation").join(abbreviations.rename("league_name"))


@pytest.mark.parametrize(
    "test_data, test_data2",
    [
        (
            np.random.randint(0, 100, size=(64, 64)),
            np.random.randint(0, 100, size=(128, 64)),
        ),
        (
            np.random.randint(0, 100, size=(128, 64)),
            np.random.randint(0, 100, size=(64, 64)),
        ),
        (
            np.random.randint(0, 100, size=(64, 64)),
            np.random.randint(0, 100, size=(64, 128)),
        ),
        (
            np.random.randint(0, 100, size=(64, 128)),
            np.random.randint(0, 100, size=(64, 64)),
        ),
    ],
)
def test_merge(test_data, test_data2):
    modin_df = pd.DataFrame(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"),
    )
    pandas_df = pandas.DataFrame(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pandas.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"),
    )
    modin_df2 = pd.DataFrame(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"),
    )
    pandas_df2 = pandas.DataFrame(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pandas.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"),
    )

    hows = ["left", "inner", "right"]
    ons = ["col33", ["col33", "col34"]]
    sorts = [False, True]
    assert len(ons) == len(sorts), "the loop below is designed for this condition"
    for i in range(len(hows)):
        for j in range(len(ons)):
            modin_result = modin_df.merge(
                modin_df2, how=hows[i], on=ons[j], sort=sorts[j]
            )
            pandas_result = pandas_df.merge(
                pandas_df2, how=hows[i], on=ons[j], sort=sorts[j]
            )
            # FIXME: https://github.com/modin-project/modin/issues/2246
            df_equals_and_sort(modin_result, pandas_result)

            modin_result = modin_df.merge(
                modin_df2,
                how=hows[i],
                left_on="key",
                right_on="key",
                sort=sorts[j],
            )
            pandas_result = pandas_df.merge(
                pandas_df2,
                how=hows[i],
                left_on="key",
                right_on="key",
                sort=sorts[j],
            )
            # FIXME: https://github.com/modin-project/modin/issues/2246
            df_equals_and_sort(modin_result, pandas_result)

    # Test for issue #1771
    modin_df = pd.DataFrame({"name": np.arange(40)})
    modin_df2 = pd.DataFrame({"name": [39], "position": [0]})
    pandas_df = pandas.DataFrame({"name": np.arange(40)})
    pandas_df2 = pandas.DataFrame({"name": [39], "position": [0]})
    modin_result = modin_df.merge(modin_df2, on="name", how="inner")
    pandas_result = pandas_df.merge(pandas_df2, on="name", how="inner")
    # FIXME: https://github.com/modin-project/modin/issues/2246
    df_equals_and_sort(modin_result, pandas_result)

    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 0, 1],
        "col4": [2, 4, 5, 6],
    }

    modin_df = pd.DataFrame(frame_data)
    pandas_df = pandas.DataFrame(frame_data)

    frame_data2 = {"col1": [0, 1, 2], "col2": [1, 5, 6]}
    modin_df2 = pd.DataFrame(frame_data2)
    pandas_df2 = pandas.DataFrame(frame_data2)

    join_types = ["outer", "inner"]
    for how in join_types:
        # Defaults
        modin_result = modin_df.merge(modin_df2, how=how)
        pandas_result = pandas_df.merge(pandas_df2, how=how)
        # FIXME: https://github.com/modin-project/modin/issues/2246
        df_equals_and_sort(modin_result, pandas_result)

        # left_on and right_index
        modin_result = modin_df.merge(
            modin_df2, how=how, left_on="col1", right_index=True
        )
        pandas_result = pandas_df.merge(
            pandas_df2, how=how, left_on="col1", right_index=True
        )
        # FIXME: https://github.com/modin-project/modin/issues/2246
        df_equals_and_sort(modin_result, pandas_result)

        # left_index and right_on
        modin_result = modin_df.merge(
            modin_df2, how=how, left_index=True, right_on="col1"
        )
        pandas_result = pandas_df.merge(
            pandas_df2, how=how, left_index=True, right_on="col1"
        )
        # FIXME: https://github.com/modin-project/modin/issues/2246
        df_equals_and_sort(modin_result, pandas_result)

        # left_on and right_on col1
        modin_result = modin_df.merge(
            modin_df2, how=how, left_on="col1", right_on="col1"
        )
        pandas_result = pandas_df.merge(
            pandas_df2, how=how, left_on="col1", right_on="col1"
        )
        # FIXME: https://github.com/modin-project/modin/issues/2246
        df_equals_and_sort(modin_result, pandas_result)

        # left_on and right_on col2
        modin_result = modin_df.merge(
            modin_df2, how=how, left_on="col2", right_on="col2"
        )
        pandas_result = pandas_df.merge(
            pandas_df2, how=how, left_on="col2", right_on="col2"
        )
        # FIXME: https://github.com/modin-project/modin/issues/2246
        df_equals_and_sort(modin_result, pandas_result)

        # left_index and right_index
        modin_result = modin_df.merge(
            modin_df2, how=how, left_index=True, right_index=True
        )
        pandas_result = pandas_df.merge(
            pandas_df2, how=how, left_index=True, right_index=True
        )
        # FIXME: https://github.com/modin-project/modin/issues/2246
        df_equals_and_sort(modin_result, pandas_result)

    # Cannot merge a Series without a name
    ps = pandas.Series(frame_data2.get("col1"))
    ms = pd.Series(frame_data2.get("col1"))
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.merge(ms if isinstance(df, pd.DataFrame) else ps),
        # FIXME: https://github.com/modin-project/modin/issues/2246
        comparator=df_equals_and_sort,
        expected_exception=ValueError("Cannot merge a Series without a name"),
    )

    # merge a Series with a name
    ps = pandas.Series(frame_data2.get("col1"), name="col1")
    ms = pd.Series(frame_data2.get("col1"), name="col1")
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.merge(ms if isinstance(df, pd.DataFrame) else ps),
        # FIXME: https://github.com/modin-project/modin/issues/2246
        comparator=df_equals_and_sort,
    )

    with pytest.raises(TypeError):
        modin_df.merge("Non-valid type")


@pytest.mark.parametrize("how", ["left", "inner", "right"])
def test_merge_empty(how):
    data = np.random.randint(0, 100, size=(64, 64))
    eval_general(*create_test_dfs(data), lambda df: df.merge(df.iloc[:0], how=how))


def test_merge_with_mi_columns():
    modin_df1, pandas_df1 = create_test_dfs(
        {
            ("col0", "a"): [1, 2, 3, 4],
            ("col0", "b"): [2, 3, 4, 5],
            ("col1", "a"): [3, 4, 5, 6],
        }
    )

    modin_df2, pandas_df2 = create_test_dfs(
        {
            ("col0", "a"): [1, 2, 3, 4],
            ("col0", "c"): [2, 3, 4, 5],
            ("col1", "a"): [3, 4, 5, 6],
        }
    )

    eval_general(
        (modin_df1, modin_df2),
        (pandas_df1, pandas_df2),
        lambda dfs: dfs[0].merge(dfs[1], on=[("col0", "a")]),
    )


@pytest.mark.parametrize("has_index_cache", [True, False])
def test_merge_on_index(has_index_cache):
    modin_df1, pandas_df1 = create_test_dfs(
        {
            "idx_key1": [1, 2, 3, 4],
            "idx_key2": [2, 3, 4, 5],
            "idx_key3": [3, 4, 5, 6],
            "data_col1": [10, 2, 3, 4],
            "col_key1": [3, 4, 5, 6],
            "col_key2": [3, 4, 5, 6],
        }
    )

    modin_df1 = modin_df1.set_index(["idx_key1", "idx_key2"])
    pandas_df1 = pandas_df1.set_index(["idx_key1", "idx_key2"])

    modin_df2, pandas_df2 = create_test_dfs(
        {
            "idx_key1": [4, 3, 2, 1],
            "idx_key2": [5, 4, 3, 2],
            "idx_key3": [6, 5, 4, 3],
            "data_col2": [10, 2, 3, 4],
            "col_key1": [6, 5, 4, 3],
            "col_key2": [6, 5, 4, 3],
        }
    )

    modin_df2 = modin_df2.set_index(["idx_key2", "idx_key3"])
    pandas_df2 = pandas_df2.set_index(["idx_key2", "idx_key3"])

    def setup_cache():
        if has_index_cache:
            modin_df1.index  # triggering index materialization
            modin_df2.index
            assert modin_df1._query_compiler.frame_has_index_cache
            assert modin_df2._query_compiler.frame_has_index_cache
        else:
            # Propagate deferred indices to partitions
            # The change in index is not automatically handled by Modin. See #3941.
            modin_df1.index = modin_df1.index
            modin_df1._to_pandas()
            modin_df1._query_compiler.set_frame_index_cache(None)
            modin_df2.index = modin_df2.index
            modin_df2._to_pandas()
            modin_df2._query_compiler.set_frame_index_cache(None)

    for on in (
        ["col_key1", "idx_key1"],
        ["col_key1", "idx_key2"],
        ["col_key1", "idx_key3"],
        ["idx_key1"],
        ["idx_key2"],
        ["idx_key3"],
    ):
        setup_cache()
        eval_general(
            (modin_df1, modin_df2),
            (pandas_df1, pandas_df2),
            lambda dfs: dfs[0].merge(dfs[1], on=on),
        )

    for left_on, right_on in (
        (["idx_key1"], ["col_key1"]),
        (["col_key1"], ["idx_key3"]),
        (["idx_key1"], ["idx_key3"]),
        (["idx_key2"], ["idx_key2"]),
        (["col_key1", "idx_key2"], ["col_key2", "idx_key2"]),
    ):
        setup_cache()
        eval_general(
            (modin_df1, modin_df2),
            (pandas_df1, pandas_df2),
            lambda dfs: dfs[0].merge(dfs[1], left_on=left_on, right_on=right_on),
        )


@pytest.mark.parametrize(
    "left_index", [[], ["key"], ["key", "b"], ["key", "b", "c"], ["b"], ["b", "c"]]
)
@pytest.mark.parametrize(
    "right_index", [[], ["key"], ["key", "e"], ["key", "e", "f"], ["e"], ["e", "f"]]
)
def test_merge_on_single_index(left_index, right_index):
    """
    Test ``.merge()`` method when merging on a single column, that is located in an index level of one of the frames.
    """
    modin_df1, pandas_df1 = create_test_dfs(
        {"b": [3, 4, 4, 5], "key": [1, 1, 2, 2], "c": [2, 3, 2, 2], "d": [2, 1, 3, 1]}
    )
    if len(left_index):
        modin_df1 = modin_df1.set_index(left_index)
        pandas_df1 = pandas_df1.set_index(left_index)

    modin_df2, pandas_df2 = create_test_dfs(
        {"e": [3, 4, 4, 5], "f": [2, 3, 2, 2], "key": [1, 1, 2, 2], "h": [2, 1, 3, 1]}
    )
    if len(right_index):
        modin_df2 = modin_df2.set_index(right_index)
        pandas_df2 = pandas_df2.set_index(right_index)
    eval_general(
        (modin_df1, modin_df2),
        (pandas_df1, pandas_df2),
        lambda dfs: dfs[0].merge(dfs[1], on="key"),
    )


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("ascending", [False, True])
@pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"])
def test_sort_index(axis, ascending, na_position):
    data = test_data["float_nan_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    # Change index value so sorting will actually make a difference
    if axis == 0:
        length = len(modin_df.index)
        for df in [modin_df, pandas_df]:
            df.index = [(i - length / 2) % length for i in range(length)]

    dfs = [modin_df, pandas_df]
    # Add NaNs to sorted index
    for idx in range(len(dfs)):
        sort_index = dfs[idx].axes[axis]
        dfs[idx] = dfs[idx].set_axis(
            [np.nan if i % 2 == 0 else sort_index[i] for i in range(len(sort_index))],
            axis=axis,
            copy=False,
        )
    modin_df, pandas_df = dfs

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.sort_index(
            axis=axis, ascending=ascending, na_position=na_position
        ),
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
def test_sort_index_inplace(axis):
    data = test_data["int_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    for df in [modin_df, pandas_df]:
        df.sort_index(axis=axis, inplace=True)
    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize(
    "sort_remaining", bool_arg_values, ids=arg_keys("sort_remaining", bool_arg_keys)
)
def test_sort_multiindex(sort_remaining):
    data = test_data["int_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    for index in ["index", "columns"]:
        new_index = generate_multiindex(len(getattr(modin_df, index)))
        for df in [modin_df, pandas_df]:
            setattr(df, index, new_index)

    for kwargs in [{"level": 0}, {"axis": 0}, {"axis": 1}]:
        with warns_that_defaulting_to_pandas_if(
            not df_or_series_using_native_execution(modin_df)
        ):
            df_equals(
                modin_df.sort_index(sort_remaining=sort_remaining, **kwargs),
                pandas_df.sort_index(sort_remaining=sort_remaining, **kwargs),
            )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "by",
    [
        pytest.param(
            "first",
            marks=pytest.mark.exclude_by_default,
        ),
        pytest.param(
            "first,last",
            marks=pytest.mark.exclude_by_default,
        ),
        "first,last,middle",
    ],
)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize(
    "ascending",
    [False, True] + ["list_first_True", "list_first_False"],
    ids=arg_keys(
        "ascending", ["False", "True"] + ["list_first_True", "list_first_False"]
    ),
)
@pytest.mark.parametrize(
    "inplace", bool_arg_values, ids=arg_keys("inplace", bool_arg_keys)
)
@pytest.mark.parametrize(
    "kind",
    [
        pytest.param(
            "mergesort",
            marks=pytest.mark.exclude_by_default,
        ),
        "quicksort",
        pytest.param(
            "heapsort",
            marks=pytest.mark.exclude_by_default,
        ),
    ],
)
@pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"])
@pytest.mark.parametrize(
    "ignore_index",
    bool_arg_values,
    ids=arg_keys("ignore_index", bool_arg_keys),
)
@pytest.mark.parametrize("key", [None, rotate_decimal_digits_or_symbols])
def test_sort_values(
    data, by, axis, ascending, inplace, kind, na_position, ignore_index, key
):
    if ascending is None:
        pytest.skip("None is not a valid value for ascending.")
    if (axis == 1 or axis == "columns") and ignore_index:
        pytest.skip("Pandas bug #39426 which is fixed in Pandas 1.3")

    if ascending is None and key is not None:
        pytest.skip("Pandas bug #41318")

    if "multiindex" in by:
        index = generate_multiindex(len(data[list(data.keys())[0]]), nlevels=2)
        columns = generate_multiindex(len(data.keys()), nlevels=2)
        data = {columns[ind]: data[key] for ind, key in enumerate(data)}
    else:
        index = None
        columns = None

    modin_df = pd.DataFrame(data, index=index, columns=columns)
    pandas_df = pandas.DataFrame(data, index=index, columns=columns)

    index = modin_df.index if axis == 1 or axis == "columns" else modin_df.columns

    # Parse "by" spec
    by_list = []
    for b in by.split(","):
        if b == "first":
            by_list.append(index[0])
        elif b == "last":
            by_list.append(index[-1])
        elif b == "middle":
            by_list.append(index[len(index) // 2])
        elif b.startswith("multiindex_level"):
            by_list.append(index.names[int(b[len("multiindex_level") :])])
        else:
            raise Exception('Unknown "by" specifier:' + b)

    # Create "ascending" list
    if ascending in ["list_first_True", "list_first_False"]:
        start = 0 if ascending == "list_first_False" else 1
        ascending = [i & 1 > 0 for i in range(start, len(by_list) + start)]

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.sort_values(
            by_list,
            axis=axis,
            ascending=ascending,
            inplace=inplace,
            kind=kind,
            na_position=na_position,
            ignore_index=ignore_index,
            key=key,
        ),
        __inplace__=inplace,
    )


def test_sort_values_descending_with_only_two_bins():
    # test case from https://github.com/modin-project/modin/issues/5781
    part1 = pd.DataFrame({"a": [1, 2, 3, 4]})
    part2 = pd.DataFrame({"a": [5, 6, 7, 8]})

    modin_df = pd.concat([part1, part2])
    pandas_df = modin_df._to_pandas()

    if StorageFormat.get() == "Pandas":
        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)

    eval_general(
        modin_df, pandas_df, lambda df: df.sort_values(by="a", ascending=False)
    )


@pytest.mark.parametrize("ignore_index", [True, False])
def test_sort_values_preserve_index_names(ignore_index):
    modin_df, pandas_df = create_test_dfs(
        np.random.choice(128, 128, replace=False).reshape((128, 1))
    )

    pandas_df.index.names, pandas_df.columns.names = ["custom_name"], ["custom_name"]
    modin_df.index.names, modin_df.columns.names = ["custom_name"], ["custom_name"]
    # workaround for #1618 to actually propagate index change
    modin_df.index = modin_df.index
    modin_df.columns = modin_df.columns

    def comparator(df1, df2):
        assert df1.index.names == df2.index.names
        assert df1.columns.names == df2.columns.names
        df_equals(df1, df2)

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.sort_values(df.columns[0], ignore_index=ignore_index),
        comparator=comparator,
    )


@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values_with_one_partition(ascending):
    # Test case from https://github.com/modin-project/modin/issues/5859
    modin_df, pandas_df = create_test_dfs(
        np.array([["hello", "goodbye"], ["hello", "Hello"]])
    )

    if StorageFormat.get() == "Pandas":
        assert modin_df._query_compiler._modin_frame._partitions.shape == (1, 1)

    eval_general(
        modin_df, pandas_df, lambda df: df.sort_values(by=1, ascending=ascending)
    )


def test_sort_overpartitioned_df():
    # First we test when the final df will have only 1 row and column partition.
    data = [[4, 5, 6], [1, 2, 3]]
    modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(drop=True)
    pandas_df = pandas.DataFrame(data)

    eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))

    # Next we test when the final df will only have 1 row, but starts with multiple column
    # partitions.
    data = [list(range(100)), list(range(100, 200))]
    modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(drop=True)
    pandas_df = pandas.DataFrame(data)

    eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))

    # Next we test when the final df will have multiple row partitions.
    data = np.random.choice(650, 650, replace=False).reshape((65, 10))
    modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(drop=True)
    pandas_df = pandas.DataFrame(data)

    eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))

    old_nptns = NPartitions.get()
    NPartitions.put(24)
    try:
        # Next we test when there's only one row per partition.
        data = np.random.choice(650, 650, replace=False).reshape((65, 10))
        modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(
            drop=True
        )
        pandas_df = pandas.DataFrame(data)

        eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))

        # And again, when there's more than one column partition.
        data = np.random.choice(6500, 6500, replace=False).reshape((65, 100))
        modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(
            drop=True
        )
        pandas_df = pandas.DataFrame(data)

        eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))

        # Additionally, we should test when we have a number of partitions
        # that doesn't divide cleanly into our desired number of partitions.
        # In this case, we start with 17 partitions, and want 2.
        NPartitions.put(21)
        data = np.random.choice(6500, 6500, replace=False).reshape((65, 100))
        modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(
            drop=True
        )
        pandas_df = pandas.DataFrame(data)

        eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))

    finally:
        NPartitions.put(old_nptns)


def test_sort_values_with_duplicates():
    modin_df = pd.DataFrame({"col": [2, 1, 1]}, index=[1, 1, 0])
    pandas_df = pandas.DataFrame({"col": [2, 1, 1]}, index=[1, 1, 0])

    key = modin_df.columns[0]
    modin_result = modin_df.sort_values(key, inplace=False)
    pandas_result = pandas_df.sort_values(key, inplace=False)
    df_equals(modin_result, pandas_result)

    modin_df.sort_values(key, inplace=True)
    pandas_df.sort_values(key, inplace=True)
    df_equals(modin_df, pandas_df)


def test_sort_values_with_string_index():
    modin_df = pd.DataFrame({"col": [25, 17, 1]}, index=["ccc", "bbb", "aaa"])
    pandas_df = pandas.DataFrame({"col": [25, 17, 1]}, index=["ccc", "bbb", "aaa"])

    key = modin_df.columns[0]
    modin_result = modin_df.sort_values(key, inplace=False)
    pandas_result = pandas_df.sort_values(key, inplace=False)
    df_equals(modin_result, pandas_result)

    modin_df.sort_values(key, inplace=True)
    pandas_df.sort_values(key, inplace=True)
    df_equals(modin_df, pandas_df)


@pytest.mark.skipif(
    StorageFormat.get() != "Pandas",
    reason="We only need to test this case where sort does not default to pandas.",
)
@pytest.mark.parametrize("ascending", [True, False], ids=["True", "False"])
@pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"])
def test_sort_values_with_only_one_non_na_row_in_partition(ascending, na_position):
    pandas_df = pandas.DataFrame(
        np.random.rand(1000, 100), columns=[f"col {i}" for i in range(100)]
    )
    # Need to ensure that one of the partitions has all NA values except for one row
    pandas_df.iloc[340:] = np.nan
    pandas_df.iloc[-1] = -4.0
    modin_df = pd.DataFrame(pandas_df)
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.sort_values(
            "col 3", ascending=ascending, na_position=na_position
        ),
    )


@pytest.mark.skipif(
    Engine.get() not in ("Ray", "Unidist", "Dask"),
    reason="We only need to test this case where sort does not default to pandas.",
)
def test_sort_values_with_sort_key_on_partition_boundary():
    modin_df = pd.DataFrame(
        np.random.rand(1000, 100), columns=[f"col {i}" for i in range(100)]
    )
    sort_key = modin_df.columns[modin_df._query_compiler._modin_frame.column_widths[0]]
    eval_general(modin_df, modin_df._to_pandas(), lambda df: df.sort_values(sort_key))


def test_where():
    columns = list("abcdefghij")

    frame_data = random_state.randn(100, 10)
    modin_df, pandas_df = create_test_dfs(frame_data, columns=columns)
    pandas_cond_df = pandas_df % 5 < 2
    modin_cond_df = modin_df % 5 < 2

    pandas_result = pandas_df.where(pandas_cond_df, -pandas_df)
    modin_result = modin_df.where(modin_cond_df, -modin_df)
    assert all((to_pandas(modin_result) == pandas_result).all())

    # test case when other is Series
    other_data = random_state.randn(len(pandas_df))
    modin_other, pandas_other = pd.Series(other_data), pandas.Series(other_data)
    pandas_result = pandas_df.where(pandas_cond_df, pandas_other, axis=0)
    modin_result = modin_df.where(modin_cond_df, modin_other, axis=0)
    df_equals(modin_result, pandas_result)

    # Test that we choose the right values to replace when `other` == `True`
    # everywhere.
    other_data = np.full(shape=pandas_df.shape, fill_value=True)
    modin_other, pandas_other = create_test_dfs(other_data, columns=columns)
    pandas_result = pandas_df.where(pandas_cond_df, pandas_other)
    modin_result = modin_df.where(modin_cond_df, modin_other)
    df_equals(modin_result, pandas_result)

    other = pandas_df.loc[3]
    pandas_result = pandas_df.where(pandas_cond_df, other, axis=1)
    modin_result = modin_df.where(modin_cond_df, other, axis=1)
    assert all((to_pandas(modin_result) == pandas_result).all())

    other = pandas_df["e"]
    pandas_result = pandas_df.where(pandas_cond_df, other, axis=0)
    modin_result = modin_df.where(modin_cond_df, other, axis=0)
    assert all((to_pandas(modin_result) == pandas_result).all())

    pandas_result = pandas_df.where(pandas_df < 2, True)
    modin_result = modin_df.where(modin_df < 2, True)
    assert all((to_pandas(modin_result) == pandas_result).all())


def test_where_different_axis_order():
    # Test `where` when `cond`, `df`, and `other` each have columns and index
    # in different orders.
    data = test_data["float_nan_data"]
    pandas_df = pandas.DataFrame(data)
    pandas_cond_df = pandas_df % 5 < 2
    pandas_cond_df = pandas_cond_df.reindex(
        columns=pandas_df.columns[::-1], index=pandas_df.index[::-1]
    )
    pandas_other_df = -pandas_df
    pandas_other_df = pandas_other_df.reindex(
        columns=pandas_df.columns[-1:].append(pandas_df.columns[:-1]),
        index=pandas_df.index[-1:].append(pandas_df.index[:-1]),
    )

    modin_df = pd.DataFrame(pandas_df)
    modin_cond_df = pd.DataFrame(pandas_cond_df)
    modin_other_df = pd.DataFrame(pandas_other_df)

    pandas_result = pandas_df.where(pandas_cond_df, pandas_other_df)
    modin_result = modin_df.where(modin_cond_df, modin_other_df)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("align_axis", ["index", "columns"])
@pytest.mark.parametrize("keep_shape", [False, True])
@pytest.mark.parametrize("keep_equal", [False, True])
def test_compare(align_axis, keep_shape, keep_equal):
    kwargs = {
        "align_axis": align_axis,
        "keep_shape": keep_shape,
        "keep_equal": keep_equal,
    }
    frame_data1 = random_state.randn(100, 10)
    frame_data2 = random_state.randn(100, 10)
    pandas_df = pandas.DataFrame(frame_data1, columns=list("abcdefghij"))
    pandas_df2 = pandas.DataFrame(frame_data2, columns=list("abcdefghij"))
    modin_df = pd.DataFrame(frame_data1, columns=list("abcdefghij"))
    modin_df2 = pd.DataFrame(frame_data2, columns=list("abcdefghij"))

    modin_result = modin_df.compare(modin_df2, **kwargs)
    pandas_result = pandas_df.compare(pandas_df2, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    modin_result = modin_df2.compare(modin_df, **kwargs)
    pandas_result = pandas_df2.compare(pandas_df, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    series_data1 = ["a", "b", "c", "d", "e"]
    series_data2 = ["a", "a", "c", "b", "e"]
    pandas_series1 = pandas.Series(series_data1)
    pandas_series2 = pandas.Series(series_data2)
    modin_series1 = pd.Series(series_data1)
    modin_series2 = pd.Series(series_data2)

    modin_result = modin_series1.compare(modin_series2, **kwargs)
    pandas_result = pandas_series1.compare(pandas_series2, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    modin_result = modin_series2.compare(modin_series1, **kwargs)
    pandas_result = pandas_series2.compare(pandas_series1, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)


@pytest.mark.parametrize(
    "params",
    [
        {"ascending": True},
        {"normalize": True},
        pytest.param(
            {"sort": False},
            marks=(
                pytest.mark.xfail(
                    reason="Known issue with sort=False in `groupby()` "
                    + "(https://github.com/modin-project/modin/issues/3571)",
                    strict=True,
                )
                if Engine.get() in ("Python", "Ray", "Dask", "Unidist")
                and StorageFormat.get() != "Base"
                else []
            ),
        ),
    ],
)
def test_value_counts(params):
    data = [[4, 1, 3, 2], [2, 5, 6, 5], [4, 3, 3, 5]]
    columns = ["col1", "col2", "col3", "col4"]

    eval_general(
        *create_test_dfs(data, columns=columns),
        lambda df: df["col1"].value_counts(**params),
    )


def test_value_counts_with_nulls():
    data = [[5, 6, None, 7, 7], [None, None, 5, 8]]
    eval_general(*create_test_dfs(data), lambda df: df[0].value_counts(dropna=False))


def test_value_counts_with_multiindex():
    data = [[1, 2, 2, 4]]
    index = pd.MultiIndex.from_arrays(
        arrays=[["a", "a", "b", "b"], [1, 2, 1, 2]], names=("l1", "l2")
    )

    eval_general(
        *create_test_dfs(data, index=index),
        lambda df: df[0].value_counts(),
    )


================================================
FILE: modin/tests/pandas/dataframe/test_map_metadata.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from decimal import Decimal

import matplotlib
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import MinRowPartitionSize, NPartitions, StorageFormat
from modin.core.dataframe.pandas.metadata import LazyProxyCategoricalDtype
from modin.core.storage_formats.pandas.utils import split_result_of_axis_func_pandas
from modin.pandas.testing import assert_index_equal, assert_series_equal
from modin.tests.pandas.utils import (
    RAND_HIGH,
    RAND_LOW,
    arg_keys,
    axis_keys,
    axis_values,
    bool_arg_keys,
    bool_arg_values,
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
    df_is_empty,
    eval_general,
    indices_keys,
    indices_values,
    name_contains,
    numeric_dfs,
    random_state,
    sort_if_range_partitioning,
    test_data,
    test_data_keys,
    test_data_values,
    test_data_with_duplicates_keys,
    test_data_with_duplicates_values,
    test_func_keys,
    test_func_values,
)
from modin.tests.test_utils import (
    current_execution_is_native,
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)
from modin.utils import get_current_execution

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


def eval_insert(modin_df, pandas_df, **kwargs):
    if "col" in kwargs and "column" not in kwargs:
        kwargs["column"] = kwargs.pop("col")
    _kwargs = {"loc": 0, "column": "New column"}
    _kwargs.update(kwargs)

    eval_general(
        modin_df,
        pandas_df,
        operation=lambda df, **kwargs: df.insert(**kwargs),
        __inplace__=True,
        **_kwargs,
    )


def test_indexing():
    modin_df = pd.DataFrame(
        dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=["a", "b", "c"]
    )
    pandas_df = pandas.DataFrame(
        dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=["a", "b", "c"]
    )

    modin_result = modin_df
    pandas_result = pandas_df
    df_equals(modin_result, pandas_result)

    modin_result = modin_df["b"]
    pandas_result = pandas_df["b"]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df[["b"]]
    pandas_result = pandas_df[["b"]]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df[["b", "a"]]
    pandas_result = pandas_df[["b", "a"]]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.loc["b"]
    pandas_result = pandas_df.loc["b"]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.loc[["b"]]
    pandas_result = pandas_df.loc[["b"]]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.loc[["b", "a"]]
    pandas_result = pandas_df.loc[["b", "a"]]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.loc[["b", "a"], ["a", "c"]]
    pandas_result = pandas_df.loc[["b", "a"], ["a", "c"]]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.loc[:, ["a", "c"]]
    pandas_result = pandas_df.loc[:, ["a", "c"]]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.loc[:, ["c"]]
    pandas_result = pandas_df.loc[:, ["c"]]
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.loc[[]]
    pandas_result = pandas_df.loc[[]]
    df_equals(modin_result, pandas_result)


def test_empty_df():
    df = pd.DataFrame(index=["a", "b"])
    df_is_empty(df)
    assert_index_equal(df.index, pd.Index(["a", "b"]))
    assert len(df.columns) == 0

    df = pd.DataFrame(columns=["a", "b"])
    df_is_empty(df)
    assert len(df.index) == 0
    assert_index_equal(df.columns, pd.Index(["a", "b"]))

    df = pd.DataFrame()
    df_is_empty(df)
    assert len(df.index) == 0
    assert len(df.columns) == 0

    df = pd.DataFrame(index=["a", "b"])
    df_is_empty(df)
    assert_index_equal(df.index, pd.Index(["a", "b"]))
    assert len(df.columns) == 0

    df = pd.DataFrame(columns=["a", "b"])
    df_is_empty(df)
    assert len(df.index) == 0
    assert_index_equal(df.columns, pd.Index(["a", "b"]))

    df = pd.DataFrame()
    df_is_empty(df)
    assert len(df.index) == 0
    assert len(df.columns) == 0

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = [1, 2, 3, 4, 5]
    pd_df["a"] = [1, 2, 3, 4, 5]
    df_equals(df, pd_df)

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = list("ABCDEF")
    pd_df["a"] = list("ABCDEF")
    df_equals(df, pd_df)

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = pd.Series([1, 2, 3, 4, 5])
    pd_df["a"] = pandas.Series([1, 2, 3, 4, 5])
    df_equals(df, pd_df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_abs(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_result = pandas_df.abs()
    except Exception as err:
        with pytest.raises(type(err)):
            modin_df.abs()
    else:
        modin_result = modin_df.abs()
        df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("axis", [None, 0, 1])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_add_prefix(data, axis):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    test_prefix = "TEST"
    new_modin_df = modin_df.add_prefix(test_prefix, axis=axis)
    new_pandas_df = pandas_df.add_prefix(test_prefix, axis=axis)
    df_equals(new_modin_df.columns, new_pandas_df.columns)
    # TODO(https://github.com/modin-project/modin/issues/3804):
    # make df_equals always check dtypes.
    df_equals(new_modin_df.dtypes, new_pandas_df.dtypes)


@pytest.mark.parametrize("axis", [None, 0, 1])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_add_suffix(data, axis):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    test_suffix = "TEST"
    new_modin_df = modin_df.add_suffix(test_suffix, axis=axis)
    new_pandas_df = pandas_df.add_suffix(test_suffix, axis=axis)

    df_equals(new_modin_df.columns, new_pandas_df.columns)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("testfunc", test_func_values, ids=test_func_keys)
@pytest.mark.parametrize(
    "na_action", [None, "ignore"], ids=["no_na_action", "ignore_na"]
)
def test_applymap(data, testfunc, na_action):
    modin_df, pandas_df = create_test_dfs(data)

    with pytest.raises(ValueError):
        x = 2
        modin_df.applymap(x)

    eval_general(modin_df, pandas_df, lambda df: df.applymap(testfunc, na_action))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("testfunc", test_func_values, ids=test_func_keys)
def test_applymap_numeric(request, data, testfunc):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if name_contains(request.node.name, numeric_dfs):
        try:
            pandas_result = pandas_df.applymap(testfunc)
        except Exception as err:
            with pytest.raises(type(err)):
                modin_df.applymap(testfunc)
        else:
            modin_result = modin_df.applymap(testfunc)
            df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_at(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    key1 = modin_df.columns[0]
    # Scalar
    df_equals(modin_df.at[0, key1], pandas_df.at[0, key1])

    # Series
    df_equals(modin_df.loc[0].at[key1], pandas_df.loc[0].at[key1])

    # Write Item
    modin_df_copy = modin_df.copy()
    pandas_df_copy = pandas_df.copy()
    modin_df_copy.at[1, key1] = modin_df.at[0, key1]
    pandas_df_copy.at[1, key1] = pandas_df.at[0, key1]
    df_equals(modin_df_copy, pandas_df_copy)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_axes(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    for modin_axis, pd_axis in zip(modin_df.axes, pandas_df.axes):
        assert np.array_equal(modin_axis, pd_axis)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_copy(data):
    modin_df = pd.DataFrame(data)

    # pandas_df is unused but there so there won't be confusing list comprehension
    # stuff in the pytest.mark.parametrize
    new_modin_df = modin_df.copy(deep=True)

    assert new_modin_df is not modin_df
    assert new_modin_df.index is not modin_df.index
    assert new_modin_df.columns is not modin_df.columns
    assert new_modin_df.dtypes is not modin_df.dtypes

    if get_current_execution() != "BaseOnPython" and not current_execution_is_native():
        assert np.array_equal(
            new_modin_df._query_compiler._modin_frame._partitions,
            modin_df._query_compiler._modin_frame._partitions,
        )
    df_equals(new_modin_df, modin_df)

    # Shallow copy tests
    modin_df = pd.DataFrame(data)
    modin_df_cp = modin_df.copy(deep=False)

    assert modin_df_cp is not modin_df
    assert modin_df_cp.index is modin_df.index
    assert modin_df_cp.columns is modin_df.columns
    # FIXME: we're different from pandas here as modin doesn't copy dtypes for a shallow copy
    # https://github.com/modin-project/modin/issues/5602
    # assert modin_df_cp.dtypes is not modin_df.dtypes

    modin_df[modin_df.columns[0]] = 0
    df_equals(modin_df, modin_df_cp)


def test_copy_empty_dataframe():
    df = pd.DataFrame(range(3))
    res = df[:0].copy()
    assert res.dtypes.equals(df.dtypes)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dtypes(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.dtypes, pandas_df.dtypes)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("key", indices_values, ids=indices_keys)
def test_get(data, key):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.get(key), pandas_df.get(key))
    df_equals(
        modin_df.get(key, default="default"), pandas_df.get(key, default="default")
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "dummy_na", bool_arg_values, ids=arg_keys("dummy_na", bool_arg_keys)
)
@pytest.mark.parametrize(
    "drop_first", bool_arg_values, ids=arg_keys("drop_first", bool_arg_keys)
)
def test_get_dummies(request, data, dummy_na, drop_first):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_result = pandas.get_dummies(
            pandas_df, dummy_na=dummy_na, drop_first=drop_first
        )
    except Exception as err:
        with pytest.raises(type(err)):
            pd.get_dummies(modin_df, dummy_na=dummy_na, drop_first=drop_first)
    else:
        modin_result = pd.get_dummies(
            modin_df, dummy_na=dummy_na, drop_first=drop_first
        )
        df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_isna(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    pandas_result = pandas_df.isna()
    modin_result = modin_df.isna()

    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_isnull(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    pandas_result = pandas_df.isnull()
    modin_result = modin_df.isnull()

    df_equals(modin_result, pandas_result)


def test_astype():
    td = pandas.DataFrame(test_data["int_data"])[["col1", "index", "col3", "col4"]]
    modin_df = pd.DataFrame(td.values, index=td.index, columns=td.columns)
    expected_df = pandas.DataFrame(td.values, index=td.index, columns=td.columns)

    modin_df_casted = modin_df.astype(np.int32)
    expected_df_casted = expected_df.astype(np.int32)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype(np.float64)
    expected_df_casted = expected_df.astype(np.float64)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype(str)
    expected_df_casted = expected_df.astype(str)
    df_equals(modin_df_casted, expected_df_casted)

    # pandas nullable dtype
    modin_df_casted = modin_df.astype("Float64")
    expected_df_casted = expected_df.astype("Float64")
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype("category")
    expected_df_casted = expected_df.astype("category")
    df_equals(modin_df_casted, expected_df_casted)

    dtype_dict = {"col1": np.int32, "index": np.int64, "col3": str}
    modin_df_casted = modin_df.astype(dtype_dict)
    expected_df_casted = expected_df.astype(dtype_dict)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df = pd.DataFrame(index=["row1"], columns=["col1"])
    modin_df["col1"]["row1"] = 11
    modin_df_casted = modin_df.astype(int)
    expected_df = pandas.DataFrame(index=["row1"], columns=["col1"])
    expected_df["col1"]["row1"] = 11
    expected_df_casted = expected_df.astype(int)
    df_equals(modin_df_casted, expected_df_casted)

    with pytest.raises(KeyError):
        modin_df.astype({"not_exists": np.uint8})

    # The dtypes series must have a unique index.
    eval_general(
        modin_df,
        expected_df,
        lambda df: df.astype(
            pd.Series([str, str], index=["col1", "col1"])
            if isinstance(df, pd.DataFrame)
            else pandas.Series([str, str], index=["col1", "col1"])
        ),
        expected_exception=ValueError(
            "cannot reindex on an axis with duplicate labels"
        ),
    )


@pytest.mark.parametrize("errors", ["raise", "ignore"])
def test_astype_errors(errors):
    data = {"a": ["a", 2, -1]}
    modin_df, pandas_df = create_test_dfs(data)
    expected_exception = None
    if errors == "raise":
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7025")
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.astype("int", errors=errors),
        # https://github.com/modin-project/modin/issues/5962
        comparator_kwargs={"check_dtypes": errors != "ignore"},
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("has_dtypes", [False, True])
def test_astype_copy(has_dtypes):
    data = [1]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
    if not has_dtypes:
        modin_df._query_compiler.set_frame_dtypes_cache(None)
    eval_general(modin_df, pandas_df, lambda df: df.astype(str, copy=False))

    # trivial case where copying can be avoided, behavior should match pandas
    s1 = pd.Series([1, 2])
    if not has_dtypes:
        modin_df._query_compiler.set_frame_dtypes_cache(None)
    s2 = s1.astype("int64", copy=False)
    s2[0] = 10
    df_equals(s1, s2)


@pytest.mark.parametrize("dtypes_are_dict", [True, False])
def test_astype_dict_or_series_multiple_column_partitions(dtypes_are_dict):
    # Test astype with a dtypes dict that is complex in that:
    # - It applies to columns spanning multiple column partitions
    # - Within a partition frame df:
    #   - dtypes.index is not a subset of df.columns
    #   - df.columns is not a subset of dtypes.index

    modin_df, pandas_df = create_test_dfs(test_data["int_data"])
    if dtypes_are_dict:
        new_dtypes = {}
    else:
        new_dtypes = pandas.Series()
    for i, column in enumerate(pandas_df.columns):
        if i % 3 == 1:
            new_dtypes[column] = "string"
        elif i % 3 == 2:
            new_dtypes[column] = float
    eval_general(modin_df, pandas_df, lambda df: df.astype(new_dtypes))


def test_astype_category():
    modin_df = pd.DataFrame(
        {"col1": ["A", "A", "B", "B", "A"], "col2": [1, 2, 3, 4, 5]}
    )
    pandas_df = pandas.DataFrame(
        {"col1": ["A", "A", "B", "B", "A"], "col2": [1, 2, 3, 4, 5]}
    )

    modin_result = modin_df.astype({"col1": "category"})
    pandas_result = pandas_df.astype({"col1": "category"})
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)

    modin_result = modin_df.astype("category")
    pandas_result = pandas_df.astype("category")
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)

    dtype = pd.CategoricalDtype(categories=["A", "B"])
    modin_result = modin_df.astype({"col1": dtype})
    pandas_result = pandas_df.astype({"col1": dtype})
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)

    dtype = pd.CategoricalDtype(categories=["A", "B"])
    modin_result = modin_df.astype(dtype)
    pandas_result = pandas_df.astype(dtype)
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)


def test_astype_category_large():
    series_length = 10_000
    modin_df = pd.DataFrame(
        {
            "col1": ["str{0}".format(i) for i in range(0, series_length)],
            "col2": [i for i in range(0, series_length)],
        }
    )
    pandas_df = pandas.DataFrame(
        {
            "col1": ["str{0}".format(i) for i in range(0, series_length)],
            "col2": [i for i in range(0, series_length)],
        }
    )

    modin_result = modin_df.astype({"col1": "category"})
    pandas_result = pandas_df.astype({"col1": "category"})
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)

    modin_result = modin_df.astype("category")
    pandas_result = pandas_df.astype("category")
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)

    dtype = pd.CategoricalDtype(categories=["str0", "str1"])
    modin_result = modin_df.astype({"col1": dtype})
    pandas_result = pandas_df.astype({"col1": dtype})
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)

    dtype = pd.CategoricalDtype(categories=["str0", "str1"])
    modin_result = modin_df.astype(dtype)
    pandas_result = pandas_df.astype(dtype)
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)


def test_astype_int64_to_astype_category_github_issue_6259():
    eval_general(
        *create_test_dfs(
            {"c0": [0, 1, 2, 3, 4], "par": ["foo", "boo", "bar", "foo", "boo"]},
            index=["a", "b", "c", "d", "e"],
        ),
        lambda df: df["c0"].astype("Int64").astype("category"),
    )


@pytest.mark.skipif(
    get_current_execution() == "BaseOnPython" or current_execution_is_native(),
    reason="BaseOnPython and NativeQueryCompiler don't have proxy categories",
)
class TestCategoricalProxyDtype:
    """This class contains test and test usilities for the ``LazyProxyCategoricalDtype`` class."""

    @staticmethod
    def _get_lazy_proxy():
        """
        Build a dataframe containing a column that has a proxy type and return
        this proxy together with an original dtype that this proxy is emulating.

        Returns
        -------
        (LazyProxyCategoricalDtype, pandas.CategoricalDtype, modin.pandas.DataFrame)
        """
        nchunks = 3
        pandas_df = pandas.DataFrame({"a": [1, 1, 2, 2, 3, 2], "b": [1, 2, 3, 4, 5, 6]})
        original_dtype = pandas_df.astype({"a": "category"}).dtypes["a"]

        chunks = split_result_of_axis_func_pandas(
            axis=0,
            num_splits=nchunks,
            result=pandas_df,
            min_block_size=MinRowPartitionSize.get(),
            length_list=[2, 2, 2],
        )

        if StorageFormat.get() == "Pandas":
            df = pd.concat([pd.DataFrame(chunk) for chunk in chunks])
            assert df._query_compiler._modin_frame._partitions.shape == (nchunks, 1)

            df = df.astype({"a": "category"})
            return df.dtypes["a"], original_dtype, df
        else:
            raise NotImplementedError()

    def test_update_proxy(self):
        """Verify that ``LazyProxyCategoricalDtype._update_proxy`` method works as expected."""
        lazy_proxy, _, _ = self._get_lazy_proxy()
        new_parent = pd.DataFrame({"a": [10, 20, 30]})._query_compiler._modin_frame

        assert isinstance(lazy_proxy, LazyProxyCategoricalDtype)
        # When we try to create a new proxy from the same arguments it should return itself
        assert (
            lazy_proxy._update_proxy(lazy_proxy._parent, lazy_proxy._column_name)
            is lazy_proxy
        )

        # When any of the arguments is changing we should create a new proxy
        proxy_with_new_column = lazy_proxy._update_proxy(
            lazy_proxy._parent, "other_column"
        )
        assert proxy_with_new_column is not lazy_proxy and isinstance(
            proxy_with_new_column, LazyProxyCategoricalDtype
        )

        # When any of the arguments is changing we should create a new proxy
        proxy_with_new_parent = lazy_proxy._update_proxy(
            new_parent, lazy_proxy._column_name
        )
        assert proxy_with_new_parent is not lazy_proxy and isinstance(
            proxy_with_new_parent, LazyProxyCategoricalDtype
        )

        lazy_proxy.categories  # trigger materialization
        # `._update_proxy` now should produce pandas Categoricals instead of a proxy as it already has materialized data
        assert (
            type(lazy_proxy._update_proxy(lazy_proxy._parent, lazy_proxy._column_name))
            == pandas.CategoricalDtype
        )

    def test_update_proxy_implicit(self):
        """
        Verify that a lazy proxy correctly updates its parent when passed from one parent to another.
        """
        lazy_proxy, _, parent = self._get_lazy_proxy()
        parent_frame = parent._query_compiler._modin_frame

        if StorageFormat.get() == "Pandas":
            assert lazy_proxy._parent is parent_frame
        else:
            raise NotImplementedError(
                f"The test is not implemented for {StorageFormat.get()} storage format"
            )

        # Making a copy of the dataframe, the new proxy should now start pointing to the new parent
        new_parent = parent.copy()
        new_parent_frame = new_parent._query_compiler._modin_frame
        new_lazy_proxy = new_parent_frame.dtypes[lazy_proxy._column_name]

        if StorageFormat.get() == "Pandas":
            # Make sure that the old proxy still pointing to the old parent
            assert lazy_proxy._parent is parent_frame
            assert new_lazy_proxy._parent is new_parent_frame
        else:
            raise NotImplementedError(
                f"The test is not implemented for {StorageFormat.get()} storage format"
            )

    def test_if_proxy_lazy(self):
        """Verify that proxy is able to pass simple comparison checks without triggering materialization."""
        lazy_proxy, actual_dtype, _ = self._get_lazy_proxy()

        assert isinstance(lazy_proxy, LazyProxyCategoricalDtype)
        assert not lazy_proxy._is_materialized

        assert lazy_proxy == "category"
        assert isinstance(lazy_proxy, pd.CategoricalDtype)
        assert isinstance(lazy_proxy, pandas.CategoricalDtype)
        assert str(lazy_proxy) == "category"
        assert str(lazy_proxy) == str(actual_dtype)
        assert not lazy_proxy.ordered
        assert not lazy_proxy._is_materialized

        # Further, there are all checks that materialize categories
        assert lazy_proxy == actual_dtype
        assert actual_dtype == lazy_proxy
        assert repr(lazy_proxy) == repr(actual_dtype)
        assert lazy_proxy.categories.equals(actual_dtype.categories)
        assert lazy_proxy._is_materialized

    def test_proxy_as_dtype(self):
        """Verify that proxy can be used as an actual dtype."""
        lazy_proxy, actual_dtype, _ = self._get_lazy_proxy()

        assert isinstance(lazy_proxy, LazyProxyCategoricalDtype)
        assert not lazy_proxy._is_materialized

        modin_df2, pandas_df2 = create_test_dfs({"c": [2, 2, 3, 4, 5, 6]})
        eval_general(
            (modin_df2, lazy_proxy),
            (pandas_df2, actual_dtype),
            lambda args: args[0].astype({"c": args[1]}),
        )

    def test_proxy_with_pandas_constructor(self):
        """Verify that users still can use pandas' constructor using `type(cat)(...)` notation."""
        lazy_proxy, _, _ = self._get_lazy_proxy()
        assert isinstance(lazy_proxy, LazyProxyCategoricalDtype)

        new_cat_values = pandas.Index([3, 4, 5])
        new_category_dtype = type(lazy_proxy)(categories=new_cat_values, ordered=True)
        assert not lazy_proxy._is_materialized
        assert new_category_dtype._is_materialized
        assert new_category_dtype.categories.equals(new_cat_values)
        assert new_category_dtype.ordered


def test_infer_objects_single_partition():
    data = {"a": ["s", 2, 3]}
    modin_df = pd.DataFrame(data).iloc[1:]
    pandas_df = pandas.DataFrame(data).iloc[1:]
    modin_result = modin_df.infer_objects()
    pandas_result = pandas_df.infer_objects()

    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)


@pytest.mark.parametrize(
    "infer_objects", bool_arg_values, ids=arg_keys("infer_objects", bool_arg_keys)
)
@pytest.mark.parametrize(
    "convert_string", bool_arg_values, ids=arg_keys("convert_string", bool_arg_keys)
)
@pytest.mark.parametrize(
    "convert_integer", bool_arg_values, ids=arg_keys("convert_integer", bool_arg_keys)
)
@pytest.mark.parametrize(
    "convert_boolean", bool_arg_values, ids=arg_keys("convert_boolean", bool_arg_keys)
)
@pytest.mark.parametrize(
    "convert_floating", bool_arg_values, ids=arg_keys("convert_floating", bool_arg_keys)
)
@pytest.mark.exclude_in_sanity
def test_convert_dtypes_single_partition(
    infer_objects, convert_string, convert_integer, convert_boolean, convert_floating
):
    # Sanity check, copied from pandas documentation:
    # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.convert_dtypes.html
    data = {
        "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
        "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
        "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
        "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
        "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
        "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
    }
    kwargs = {
        "infer_objects": infer_objects,
        "convert_string": convert_string,
        "convert_integer": convert_integer,
        "convert_boolean": convert_boolean,
        "convert_floating": convert_floating,
    }
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    modin_result = modin_df.convert_dtypes(**kwargs)
    pandas_result = pandas_df.convert_dtypes(**kwargs)
    assert modin_result.dtypes.equals(pandas_result.dtypes)


@pytest.mark.parametrize("dtype_backend", ["numpy_nullable", "pyarrow"])
def test_convert_dtypes_dtype_backend(dtype_backend):
    data = {
        "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
        "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
        "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
        "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
        "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
        "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
    }

    def comparator(df1, df2):
        df_equals(df1, df2)
        df_equals(df1.dtypes, df2.dtypes)

    eval_general(
        *create_test_dfs(data),
        lambda df: df.convert_dtypes(dtype_backend=dtype_backend),
        comparator=comparator,
    )


@pytest.mark.skipif(
    current_execution_is_native(),
    reason="NativeQueryCompiler does not contain partitions.",
)
def test_convert_dtypes_multiple_row_partitions():
    # Column 0 should have string dtype
    modin_part1 = pd.DataFrame(["a"]).convert_dtypes()
    # Column 0 should have an int dtype
    modin_part2 = pd.DataFrame([1]).convert_dtypes()
    modin_df = pd.concat([modin_part1, modin_part2])
    if StorageFormat.get() == "Pandas":
        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
    pandas_df = pandas.DataFrame(["a", 1], index=[0, 0])
    # The initial dataframes should be the same
    df_equals(modin_df, pandas_df)
    # TODO(https://github.com/modin-project/modin/pull/3805): delete
    # this assert once df_equals checks dtypes
    assert modin_df.dtypes.equals(pandas_df.dtypes)
    modin_result = modin_df.convert_dtypes()
    pandas_result = pandas_df.convert_dtypes()
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)


def test_convert_dtypes_5653():
    modin_part1 = pd.DataFrame({"col1": ["a", "b", "c", "d"]})
    modin_part2 = pd.DataFrame({"col1": [None, None, None, None]})
    modin_df = pd.concat([modin_part1, modin_part2])
    if StorageFormat.get() == "Pandas":
        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
    modin_df = modin_df.convert_dtypes()
    assert len(modin_df.dtypes) == 1
    assert modin_df.dtypes.iloc[0] == "string"


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("bound_type", ["list", "series"], ids=["list", "series"])
@pytest.mark.exclude_in_sanity
def test_clip(request, data, axis, bound_type):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if name_contains(request.node.name, numeric_dfs):
        ind_len = (
            len(modin_df.index)
            if not pandas.DataFrame()._get_axis_number(axis)
            else len(modin_df.columns)
        )
        # set bounds
        lower, upper = np.sort(random_state.randint(RAND_LOW, RAND_HIGH, 2))

        # test only upper scalar bound
        modin_result = modin_df.clip(None, upper, axis=axis)
        pandas_result = pandas_df.clip(None, upper, axis=axis)
        df_equals(modin_result, pandas_result)

        # test lower and upper scalar bound
        modin_result = modin_df.clip(lower, upper, axis=axis)
        pandas_result = pandas_df.clip(lower, upper, axis=axis)
        df_equals(modin_result, pandas_result)

        lower = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)
        upper = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)

        if bound_type == "series":
            modin_lower = pd.Series(lower)
            pandas_lower = pandas.Series(lower)
            modin_upper = pd.Series(upper)
            pandas_upper = pandas.Series(upper)
        else:
            modin_lower = pandas_lower = lower
            modin_upper = pandas_upper = upper

        # test lower and upper list bound on each column
        modin_result = modin_df.clip(modin_lower, modin_upper, axis=axis)
        pandas_result = pandas_df.clip(pandas_lower, pandas_upper, axis=axis)
        df_equals(modin_result, pandas_result)

        # test only upper list bound on each column
        modin_result = modin_df.clip(np.nan, modin_upper, axis=axis)
        pandas_result = pandas_df.clip(np.nan, pandas_upper, axis=axis)
        df_equals(modin_result, pandas_result)

        with pytest.raises(ValueError):
            modin_df.clip(lower=[1, 2, 3], axis=None)


def test_clip_4485():
    modin_result = pd.DataFrame([1]).clip([3])
    pandas_result = pandas.DataFrame([1]).clip([3])
    df_equals(modin_result, pandas_result)


def test_drop():
    frame_data = {"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]}
    simple = pandas.DataFrame(frame_data)
    modin_simple = pd.DataFrame(frame_data)
    df_equals(modin_simple.drop("A", axis=1), simple[["B"]])
    df_equals(modin_simple.drop(["A", "B"], axis="columns"), simple[[]])
    df_equals(modin_simple.drop([0, 1, 3], axis=0), simple.loc[[2], :])
    df_equals(modin_simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :])

    pytest.raises(KeyError, modin_simple.drop, 5)
    pytest.raises(KeyError, modin_simple.drop, "C", axis=1)
    pytest.raises(KeyError, modin_simple.drop, [1, 5])
    pytest.raises(KeyError, modin_simple.drop, ["A", "C"], axis=1)

    # errors = 'ignore'
    df_equals(modin_simple.drop(5, errors="ignore"), simple)
    df_equals(modin_simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :])
    df_equals(modin_simple.drop("C", axis=1, errors="ignore"), simple)
    df_equals(modin_simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]])

    # non-unique
    nu_df = pandas.DataFrame(
        zip(range(3), range(-3, 1), list("abc")), columns=["a", "a", "b"]
    )
    modin_nu_df = pd.DataFrame(nu_df)
    df_equals(modin_nu_df.drop("a", axis=1), nu_df[["b"]])
    df_equals(modin_nu_df.drop("b", axis="columns"), nu_df["a"])
    df_equals(modin_nu_df.drop([]), nu_df)

    nu_df = nu_df.set_index(pandas.Index(["X", "Y", "X"]))
    nu_df.columns = list("abc")
    modin_nu_df = pd.DataFrame(nu_df)
    df_equals(modin_nu_df.drop("X", axis="rows"), nu_df.loc[["Y"], :])
    df_equals(modin_nu_df.drop(["X", "Y"], axis=0), nu_df.loc[[], :])

    # inplace cache issue
    frame_data = random_state.randn(10, 3)
    df = pandas.DataFrame(frame_data, columns=list("abc"))
    modin_df = pd.DataFrame(frame_data, columns=list("abc"))
    expected = df[~(df.b > 0)]
    modin_df.drop(labels=df[df.b > 0].index, inplace=True)
    df_equals(modin_df, expected)

    midx = pd.MultiIndex(
        levels=[["lama", "cow", "falcon"], ["speed", "weight", "length"]],
        codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
    )
    df = pd.DataFrame(
        index=midx,
        columns=["big", "small"],
        data=[
            [45, 30],
            [200, 100],
            [1.5, 1],
            [30, 20],
            [250, 150],
            [1.5, 0.8],
            [320, 250],
            [1, 0.8],
            [0.3, 0.2],
        ],
    )
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(df)
    ):
        df.drop(index="length", level=1)


def test_drop_api_equivalence():
    # equivalence of the labels/axis and index/columns API's
    frame_data = [[1, 2, 3], [3, 4, 5], [5, 6, 7]]

    modin_df = pd.DataFrame(frame_data, index=["a", "b", "c"], columns=["d", "e", "f"])

    modin_df1 = modin_df.drop("a")
    modin_df2 = modin_df.drop(index="a")
    df_equals(modin_df1, modin_df2)

    modin_df1 = modin_df.drop("d", axis=1)
    modin_df2 = modin_df.drop(columns="d")
    df_equals(modin_df1, modin_df2)

    modin_df1 = modin_df.drop(labels="e", axis=1)
    modin_df2 = modin_df.drop(columns="e")
    df_equals(modin_df1, modin_df2)

    modin_df1 = modin_df.drop(["a"], axis=0)
    modin_df2 = modin_df.drop(index=["a"])
    df_equals(modin_df1, modin_df2)

    modin_df1 = modin_df.drop(["a"], axis=0).drop(["d"], axis=1)
    modin_df2 = modin_df.drop(index=["a"], columns=["d"])
    df_equals(modin_df1, modin_df2)

    with pytest.raises(ValueError):
        modin_df.drop(labels="a", index="b")

    with pytest.raises(ValueError):
        modin_df.drop(labels="a", columns="b")

    with pytest.raises(ValueError):
        modin_df.drop(axis=1)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_drop_transpose(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    modin_result = modin_df.T.drop(columns=[0, 1, 2])
    pandas_result = pandas_df.T.drop(columns=[0, 1, 2])
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.T.drop(index=["col3", "col1"])
    pandas_result = pandas_df.T.drop(index=["col3", "col1"])
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.T.drop(columns=[0, 1, 2], index=["col3", "col1"])
    pandas_result = pandas_df.T.drop(columns=[0, 1, 2], index=["col3", "col1"])
    df_equals(modin_result, pandas_result)


def test_droplevel():
    df = (
        pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
        .set_index([0, 1])
        .rename_axis(["a", "b"])
    )
    df.columns = pd.MultiIndex.from_tuples(
        [("c", "e"), ("d", "f")], names=["level_1", "level_2"]
    )
    df.droplevel("a")
    df.droplevel("level_2", axis=1)


@pytest.mark.parametrize(
    "data", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys
)
@pytest.mark.parametrize(
    "keep", ["last", "first", False], ids=["last", "first", "False"]
)
@pytest.mark.parametrize(
    "subset",
    [None, "col1", "name", ("col1", "col3"), ["col1", "col3", "col7"]],
    ids=["None", "string", "name", "tuple", "list"],
)
@pytest.mark.parametrize("ignore_index", [True, False], ids=["True", "False"])
@pytest.mark.exclude_in_sanity
def test_drop_duplicates(data, keep, subset, ignore_index):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_df.drop_duplicates(
            keep=keep, inplace=False, subset=subset, ignore_index=ignore_index
        )
    except Exception as err:
        with pytest.raises(type(err)):
            modin_df.drop_duplicates(
                keep=keep, inplace=False, subset=subset, ignore_index=ignore_index
            )
    else:
        sort_if_range_partitioning(
            pandas_df.drop_duplicates(
                keep=keep, inplace=False, subset=subset, ignore_index=ignore_index
            ),
            modin_df.drop_duplicates(
                keep=keep, inplace=False, subset=subset, ignore_index=ignore_index
            ),
        )

    try:
        pandas_df.drop_duplicates(
            keep=keep, inplace=True, subset=subset, ignore_index=ignore_index
        )
    except Exception as err:
        with pytest.raises(type(err)):
            modin_df.drop_duplicates(
                keep=keep, inplace=True, subset=subset, ignore_index=ignore_index
            )
    else:
        modin_df.drop_duplicates(
            keep=keep, inplace=True, subset=subset, ignore_index=ignore_index
        )
        sort_if_range_partitioning(modin_df, pandas_df)


def test_drop_duplicates_with_missing_index_values():
    data = {
        "columns": ["value", "time", "id"],
        "index": [
            4,
            5,
            6,
            7,
            8,
            9,
            10,
            11,
            12,
            13,
            14,
            15,
            20,
            21,
            22,
            23,
            24,
            25,
            26,
            27,
            32,
            33,
            34,
            35,
            36,
            37,
            38,
            39,
            40,
            41,
        ],
        "data": [
            ["3", 1279213398000.0, 88.0],
            ["3", 1279204682000.0, 88.0],
            ["0", 1245772835000.0, 448.0],
            ["0", 1270564258000.0, 32.0],
            ["0", 1267106669000.0, 118.0],
            ["7", 1300621123000.0, 5.0],
            ["0", 1251130752000.0, 957.0],
            ["0", 1311683506000.0, 62.0],
            ["9", 1283692698000.0, 89.0],
            ["9", 1270234253000.0, 64.0],
            ["0", 1285088818000.0, 50.0],
            ["0", 1218212725000.0, 695.0],
            ["2", 1383933968000.0, 348.0],
            ["0", 1368227625000.0, 257.0],
            ["1", 1454514093000.0, 446.0],
            ["1", 1428497427000.0, 134.0],
            ["1", 1459184936000.0, 568.0],
            ["1", 1502293302000.0, 599.0],
            ["1", 1491833358000.0, 829.0],
            ["1", 1485431534000.0, 806.0],
            ["8", 1351800505000.0, 101.0],
            ["0", 1357247721000.0, 916.0],
            ["0", 1335804423000.0, 370.0],
            ["24", 1327547726000.0, 720.0],
            ["0", 1332334140000.0, 415.0],
            ["0", 1309543100000.0, 30.0],
            ["18", 1309541141000.0, 30.0],
            ["0", 1298979435000.0, 48.0],
            ["14", 1276098160000.0, 59.0],
            ["0", 1233936302000.0, 109.0],
        ],
    }

    pandas_df = pandas.DataFrame(
        data["data"], index=data["index"], columns=data["columns"]
    )
    modin_df = pd.DataFrame(data["data"], index=data["index"], columns=data["columns"])
    modin_result = modin_df.sort_values(["id", "time"]).drop_duplicates(["id"])
    pandas_result = pandas_df.sort_values(["id", "time"]).drop_duplicates(["id"])
    sort_if_range_partitioning(modin_result, pandas_result)


def test_drop_duplicates_after_sort():
    data = [
        {"value": 1, "time": 2},
        {"value": 1, "time": 1},
        {"value": 2, "time": 1},
        {"value": 2, "time": 2},
    ]
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    modin_result = modin_df.sort_values(["value", "time"]).drop_duplicates(["value"])
    pandas_result = pandas_df.sort_values(["value", "time"]).drop_duplicates(["value"])
    sort_if_range_partitioning(modin_result, pandas_result)


def test_drop_duplicates_with_repeated_index_values():
    # This tests for issue #4467: https://github.com/modin-project/modin/issues/4467
    data = [[0], [1], [0]]
    index = [0, 0, 0]
    modin_df, pandas_df = create_test_dfs(data, index=index)
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.drop_duplicates(),
        comparator=sort_if_range_partitioning,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("how", ["any", "all"], ids=["any", "all"])
def test_dropna(data, axis, how):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    with pytest.raises(ValueError):
        modin_df.dropna(axis=axis, how="invalid")

    with pytest.raises(TypeError):
        modin_df.dropna(axis=axis, how=None, thresh=None)

    with pytest.raises(KeyError):
        modin_df.dropna(axis=axis, subset=["NotExists"], how=how)

    modin_result = modin_df.dropna(axis=axis, how=how)
    pandas_result = pandas_df.dropna(axis=axis, how=how)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dropna_inplace(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    pandas_result = pandas_df.dropna()
    modin_df.dropna(inplace=True)
    df_equals(modin_df, pandas_result)

    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    pandas_df.dropna(thresh=2, inplace=True)
    modin_df.dropna(thresh=2, inplace=True)
    df_equals(modin_df, pandas_df)

    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    pandas_df.dropna(axis=1, how="any", inplace=True)
    modin_df.dropna(axis=1, how="any", inplace=True)
    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dropna_multiple_axes(data):
    modin_df = pd.DataFrame(data)

    with pytest.raises(TypeError):
        modin_df.dropna(how="all", axis=[0, 1])
    with pytest.raises(TypeError):
        modin_df.dropna(how="all", axis=(0, 1))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dropna_subset(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if "empty_data" not in request.node.name:
        column_subset = modin_df.columns[0:2]
        df_equals(
            modin_df.dropna(how="all", subset=column_subset),
            pandas_df.dropna(how="all", subset=column_subset),
        )
        df_equals(
            modin_df.dropna(how="any", subset=column_subset),
            pandas_df.dropna(how="any", subset=column_subset),
        )

        row_subset = modin_df.index[0:2]
        df_equals(
            modin_df.dropna(how="all", axis=1, subset=row_subset),
            pandas_df.dropna(how="all", axis=1, subset=row_subset),
        )
        df_equals(
            modin_df.dropna(how="any", axis=1, subset=row_subset),
            pandas_df.dropna(how="any", axis=1, subset=row_subset),
        )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis,subset", [(0, list("EF")), (1, [4, 5])])
def test_dropna_subset_error(data, axis, subset):
    eval_general(
        *create_test_dfs(data),
        lambda df: df.dropna(axis=axis, subset=subset),
        expected_exception=KeyError(["E", "F"]),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("astype", ["category", "int32", "float"])
def test_insert_dtypes(data, astype, request):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    # categories with NaN works incorrect for now
    if astype == "category" and pandas_df.iloc[:, 0].isnull().any():
        return

    expected_exception = None
    if "int32-float_nan_data" in request.node.callspec.id:
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7026")
    eval_insert(
        modin_df,
        pandas_df,
        col="TypeSaver",
        value=lambda df: df.iloc[:, 0].astype(astype),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("loc", [-3, 0, 3])
def test_insert_loc(data, loc):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
    expected_exception = None
    if loc == -3:
        expected_exception = ValueError("unbounded slice")
    eval_insert(
        modin_df,
        pandas_df,
        loc=loc,
        value=lambda df: df.iloc[:, 0],
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_insert(data):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    eval_insert(
        modin_df, pandas_df, col="Duplicate", value=lambda df: df[df.columns[0]]
    )
    eval_insert(modin_df, pandas_df, col="Scalar", value=100)
    eval_insert(
        pd.DataFrame(columns=list("ab")),
        pandas.DataFrame(columns=list("ab")),
        col="Series insert",
        value=lambda df: df[df.columns[0]],
    )
    eval_insert(
        modin_df,
        pandas_df,
        col="DataFrame insert",
        value=lambda df: df[[df.columns[0]]],
    )
    eval_insert(
        modin_df,
        pandas_df,
        col="Different indices",
        value=lambda df: df[[df.columns[0]]].set_index(df.index[::-1]),
    )
    eval_insert(
        modin_df,
        pandas_df,
        col="2d list insert",
        value=lambda df: [[1, 2]] * len(df),
    )

    # Bad inserts
    eval_insert(
        modin_df,
        pandas_df,
        col="Bad Column",
        value=lambda df: df,
        expected_exception=ValueError(
            f"Expected a one-dimensional object, got a DataFrame with {len(pandas_df.columns)} columns instead."
        ),
    )
    eval_insert(
        modin_df,
        pandas_df,
        col="Too Short",
        value=lambda df: list(df[df.columns[0]])[:-1],
        expected_exception=ValueError(
            f"Length of values ({len(pandas_df)-1}) does not match length of index ({len(pandas_df)})"
        ),
    )
    eval_insert(
        modin_df,
        pandas_df,
        col=lambda df: df.columns[0],
        value=lambda df: df[df.columns[0]],
        expected_exception=ValueError("cannot insert 2d list insert, already exists"),
    )
    eval_insert(
        modin_df,
        pandas_df,
        loc=lambda df: len(df.columns) + 100,
        col="Bad Loc",
        value=100,
        expected_exception=IndexError(
            f"index {len(pandas_df.columns) + 100} is out of bounds for axis 0 with size {len(pandas_df.columns)}"
        ),
    )


def test_insert_4407():
    data = {"col1": [1, 2, 3], "col2": [2, 3, 4]}
    modin_df, pandas_df = create_test_dfs(data)

    def comparator(df1, df2):
        assert_series_equal(df1.dtypes, df2.dtypes, check_index=False)
        return df_equals(df1, df2)

    for idx, value in enumerate(
        (pandas_df.to_numpy(), np.array([[1]] * 3), np.array([[1, 2, 3], [4, 5, 6]]))
    ):
        expected_exception = None
        if idx == 0:
            expected_exception = ValueError(
                "Expected a 1D array, got an array with shape (3, 2)"
            )
        elif idx == 2:
            # FIXME: https://github.com/modin-project/modin/issues/7080
            expected_exception = False
        eval_insert(
            modin_df,
            pandas_df,
            loc=0,
            col=f"test_col{idx}",
            value=value,
            comparator=lambda df1, df2: comparator(df1, df2),
            expected_exception=expected_exception,
        )


def test_insert_modin_array():
    from modin.numpy import array

    data = {"col1": [1, 2, 3], "col2": [2, 3, 4]}
    modin_df1, modin_df2 = pd.DataFrame(data), pd.DataFrame(data)
    np_value = np.array([7, 7, 7])
    md_np_value = array(np_value)

    modin_df1.insert(1, "new_col", np_value)
    modin_df2.insert(1, "new_col", md_np_value)
    df_equals(modin_df1, modin_df2)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_ndim(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    assert modin_df.ndim == pandas_df.ndim


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_notna(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.notna(), pandas_df.notna())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_notnull(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.notnull(), pandas_df.notnull())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_round(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.round(), pandas_df.round())
    df_equals(modin_df.round(1), pandas_df.round(1))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
def test_set_axis(data, axis):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    x = pandas.DataFrame()._get_axis_number(axis)
    index = modin_df.columns if x else modin_df.index
    labels = ["{0}_{1}".format(index[i], i) for i in range(modin_df.shape[x])]

    eval_general(
        modin_df, pandas_df, lambda df: df.set_axis(labels, axis=axis, copy=True)
    )

    modin_df_copy = modin_df.copy()
    modin_df = modin_df.set_axis(labels, axis=axis, copy=False)

    # Check that the copy and original are different
    try:
        df_equals(modin_df, modin_df_copy)
    except AssertionError:
        assert True
    else:
        assert False

    pandas_df = pandas_df.set_axis(labels, axis=axis)
    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("drop", bool_arg_values, ids=arg_keys("drop", bool_arg_keys))
@pytest.mark.parametrize(
    "append", bool_arg_values, ids=arg_keys("append", bool_arg_keys)
)
def test_set_index(request, data, drop, append):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if "empty_data" not in request.node.name:
        key = modin_df.columns[0]
        modin_result = modin_df.set_index(key, drop=drop, append=append, inplace=False)
        pandas_result = pandas_df.set_index(
            key, drop=drop, append=append, inplace=False
        )
        df_equals(modin_result, pandas_result)

        modin_df_copy = modin_df.copy()
        modin_df.set_index(key, drop=drop, append=append, inplace=True)

        # Check that the copy and original are different
        try:
            df_equals(modin_df, modin_df_copy)
        except AssertionError:
            assert True
        else:
            assert False

        pandas_df.set_index(key, drop=drop, append=append, inplace=True)
        df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_shape(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    assert modin_df.shape == pandas_df.shape


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_size(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    assert modin_df.size == pandas_df.size


def test_squeeze():
    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 10, 11],
        "col4": [12, 13, 14, 15],
        "col5": [0, 0, 0, 0],
    }
    frame_data_2 = {"col1": [0, 1, 2, 3]}
    frame_data_3 = {
        "col1": [0],
        "col2": [4],
        "col3": [8],
        "col4": [12],
        "col5": [0],
    }
    frame_data_4 = {"col1": [2]}
    frame_data_5 = {"col1": ["string"]}
    # Different data for different cases
    pandas_df = pandas.DataFrame(frame_data).squeeze()
    modin_df = pd.DataFrame(frame_data).squeeze()
    df_equals(modin_df, pandas_df)

    pandas_df_2 = pandas.DataFrame(frame_data_2).squeeze()
    modin_df_2 = pd.DataFrame(frame_data_2).squeeze()
    df_equals(modin_df_2, pandas_df_2)

    pandas_df_3 = pandas.DataFrame(frame_data_3).squeeze()
    modin_df_3 = pd.DataFrame(frame_data_3).squeeze()
    df_equals(modin_df_3, pandas_df_3)

    pandas_df_4 = pandas.DataFrame(frame_data_4).squeeze()
    modin_df_4 = pd.DataFrame(frame_data_4).squeeze()
    df_equals(modin_df_4, pandas_df_4)

    pandas_df_5 = pandas.DataFrame(frame_data_5).squeeze()
    modin_df_5 = pd.DataFrame(frame_data_5).squeeze()
    df_equals(modin_df_5, pandas_df_5)

    data = [
        [
            pd.Timestamp("2019-01-02"),
            pd.Timestamp("2019-01-03"),
            pd.Timestamp("2019-01-04"),
            pd.Timestamp("2019-01-05"),
        ],
        [1, 1, 1, 2],
    ]
    df = pd.DataFrame(data, index=["date", "value"]).T
    pf = pandas.DataFrame(data, index=["date", "value"]).T
    df.set_index("date", inplace=True)
    pf.set_index("date", inplace=True)
    df_equals(df.iloc[0], pf.iloc[0])


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_transpose(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.T, pandas_df.T)
    df_equals(modin_df.transpose(), pandas_df.transpose())

    # Test for map across full axis for select indices
    df_equals(modin_df.T.dropna(), pandas_df.T.dropna())
    # Test for map across full axis
    df_equals(modin_df.T.nunique(), pandas_df.T.nunique())
    # Test for map across blocks
    df_equals(modin_df.T.notna(), pandas_df.T.notna())


@pytest.mark.parametrize(
    "data, other_data",
    [
        ({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "C": [7, 8, 9]}),
        ({"C": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "A": [7, 8, 9]}),
        (
            {"A": ["a", "b", "c"], "B": ["x", "y", "z"]},
            {"B": ["d", "e", "f", "g", "h", "i"]},
        ),
        ({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, np.nan, 6]}),
    ],
)
@pytest.mark.parametrize("errors", ["raise", "ignore"])
def test_update(data, other_data, errors):
    modin_df, pandas_df = create_test_dfs(data)
    other_modin_df, other_pandas_df = create_test_dfs(other_data)
    expected_exception = None
    if errors == "raise":
        expected_exception = ValueError("Data overlaps.")
    eval_general(
        modin_df,
        pandas_df,
        lambda df: (
            df.update(other_modin_df, errors=errors)
            if isinstance(df, pd.DataFrame)
            else df.update(other_pandas_df, errors=errors)
        ),
        __inplace__=True,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___neg__(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_result = pandas_df.__neg__()
    except Exception as err:
        with pytest.raises(type(err)):
            modin_df.__neg__()
    else:
        modin_result = modin_df.__neg__()
        df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___invert__(data, request):
    expected_exception = None
    if "float_nan_data" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7081
        expected_exception = False
    eval_general(
        *create_test_dfs(data), lambda df: ~df, expected_exception=expected_exception
    )


def test___invert___bool():
    data = [False]
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    modin_result = ~modin_df
    pandas_result = ~pandas_df
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___delitem__(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if "empty_data" not in request.node.name:
        key = pandas_df.columns[0]

        modin_df = modin_df.copy()
        pandas_df = pandas_df.copy()
        modin_df.__delitem__(key)
        pandas_df.__delitem__(key)
        df_equals(modin_df, pandas_df)

        # Issue 2027
        last_label = pandas_df.iloc[:, -1].name
        modin_df.__delitem__(last_label)
        pandas_df.__delitem__(last_label)
        df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___nonzero__(data):
    modin_df = pd.DataFrame(data)

    with pytest.raises(ValueError):
        # Always raises ValueError
        modin_df.__nonzero__()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___abs__(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_result = abs(pandas_df)
    except Exception as err:
        with pytest.raises(type(err)):
            abs(modin_df)
    else:
        modin_result = abs(modin_df)
        df_equals(modin_result, pandas_result)


def test___round__():
    data = test_data_values[0]
    eval_general(pd.DataFrame(data), pandas.DataFrame(data), lambda df: df.__round__())


@pytest.mark.parametrize(
    "get_index",
    [
        pytest.param(lambda idx: None, id="None_idx"),
        pytest.param(lambda idx: ["a", "b", "c"], id="No_intersection_idx"),
        pytest.param(lambda idx: idx, id="Equal_idx"),
        pytest.param(lambda idx: idx[::-1], id="Reversed_idx"),
    ],
)
@pytest.mark.parametrize(
    "get_columns",
    [
        pytest.param(lambda idx: None, id="None_idx"),
        pytest.param(lambda idx: ["a", "b", "c"], id="No_intersection_idx"),
        pytest.param(lambda idx: idx, id="Equal_idx"),
        pytest.param(lambda idx: idx[::-1], id="Reversed_idx"),
    ],
)
@pytest.mark.parametrize("dtype", [None, "str"])
@pytest.mark.exclude_in_sanity
def test_constructor_from_modin_series(get_index, get_columns, dtype):
    modin_df, pandas_df = create_test_dfs(test_data_values[0])

    modin_data = {f"new_col{i}": modin_df.iloc[:, i] for i in range(modin_df.shape[1])}
    pandas_data = {
        f"new_col{i}": pandas_df.iloc[:, i] for i in range(pandas_df.shape[1])
    }

    index = get_index(modin_df.index)
    columns = get_columns(list(modin_data.keys()))

    new_modin = pd.DataFrame(modin_data, index=index, columns=columns, dtype=dtype)
    new_pandas = pandas.DataFrame(
        pandas_data, index=index, columns=columns, dtype=dtype
    )
    df_equals(new_modin, new_pandas)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_constructor(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)
    df_equals(pandas_df, modin_df)

    pandas_df = pandas.DataFrame({k: pandas.Series(v) for k, v in data.items()})
    modin_df = pd.DataFrame({k: pd.Series(v) for k, v in data.items()})
    df_equals(pandas_df, modin_df)


def test_pyarrow_constructor():
    pa = pytest.importorskip("pyarrow")

    data = [[Decimal("3.19"), None], [None, Decimal("-1.23")]]
    df_equals(*create_test_dfs(data, dtype=pd.ArrowDtype(pa.decimal128(3, scale=2))))


@pytest.mark.parametrize(
    "data",
    [
        np.arange(1, 10000, dtype=np.float32),
        [
            pd.Series([1, 2, 3], dtype="int32"),
            pandas.Series([4, 5, 6], dtype="int64"),
            np.array([7, 8, 9], dtype=np.float32),
        ],
        pandas.Categorical([1, 2, 3, 4, 5]),
    ],
)
def test_constructor_dtypes(data):
    modin_df, pandas_df = create_test_dfs(data)
    df_equals(modin_df, pandas_df)


def test_constructor_columns_and_index():
    modin_df = pd.DataFrame(
        [[1, 1, 10], [2, 4, 20], [3, 7, 30]],
        index=[1, 2, 3],
        columns=["id", "max_speed", "health"],
    )
    pandas_df = pandas.DataFrame(
        [[1, 1, 10], [2, 4, 20], [3, 7, 30]],
        index=[1, 2, 3],
        columns=["id", "max_speed", "health"],
    )
    df_equals(modin_df, pandas_df)
    df_equals(pd.DataFrame(modin_df), pandas.DataFrame(pandas_df))
    df_equals(
        pd.DataFrame(modin_df, columns=["max_speed", "health"]),
        pandas.DataFrame(pandas_df, columns=["max_speed", "health"]),
    )
    df_equals(
        pd.DataFrame(modin_df, index=[1, 2]),
        pandas.DataFrame(pandas_df, index=[1, 2]),
    )
    df_equals(
        pd.DataFrame(modin_df, index=[1, 2], columns=["health"]),
        pandas.DataFrame(pandas_df, index=[1, 2], columns=["health"]),
    )
    df_equals(
        pd.DataFrame(modin_df.iloc[:, 0], index=[1, 2, 3]),
        pandas.DataFrame(pandas_df.iloc[:, 0], index=[1, 2, 3]),
    )
    df_equals(
        pd.DataFrame(modin_df.iloc[:, 0], columns=["NO_EXIST"]),
        pandas.DataFrame(pandas_df.iloc[:, 0], columns=["NO_EXIST"]),
    )
    with pytest.raises(NotImplementedError):
        pd.DataFrame(modin_df, index=[1, 2, 99999])
    with pytest.raises(NotImplementedError):
        pd.DataFrame(modin_df, columns=["NO_EXIST"])


def test_constructor_from_index():
    data = pd.Index([1, 2, 3], name="pricing_date")
    modin_df, pandas_df = create_test_dfs(data)
    df_equals(modin_df, pandas_df)


def test_insert_datelike_string_issue_7371():
    # When a new value is inserted into a frame, we call pandas.api.types.pandas_dtype(value) to
    # extract the dtype of an object like a pandas Series or numpy array. When a scalar value is passed,
    # this usually raises a TypeError, so we construct a local pandas Series from the object and
    # extract the dtype from there.
    # When the passed value is a date-like string, pandas will instead raise a ValueError because
    # it tries to parse it as a numpy structured dtype. After fixing GH#7371, we now catch
    # ValueError in addition to TypeError to handle this case.
    modin_df = pd.DataFrame({"a": [0]})
    modin_df["c"] = "2020-01-01"
    pandas_df = pandas.DataFrame({"a": [0]})
    pandas_df["c"] = "2020-01-01"
    df_equals(modin_df, pandas_df)


================================================
FILE: modin/tests/pandas/dataframe/test_pickle.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pickle

import numpy as np
import pytest

import modin.pandas as pd
from modin.config import PersistentPickle
from modin.tests.pandas.utils import create_test_dfs, df_equals


@pytest.fixture
def modin_df_non_empty():
    return pd.DataFrame({"col1": np.arange(1000), "col2": np.arange(2000, 3000)})


@pytest.fixture
def modin_df_empty():
    return pd.DataFrame()


@pytest.fixture
def modin_column(modin_df_non_empty):
    return modin_df_non_empty["col1"]


@pytest.fixture(params=[True, False])
def persistent(request):
    old = PersistentPickle.get()
    PersistentPickle.put(request.param)
    yield request.param
    PersistentPickle.put(old)


@pytest.mark.parametrize("modin_df_name", ["modin_df_non_empty", "modin_df_empty"])
def test_dataframe_pickle(request, modin_df_name):
    modin_df = request.getfixturevalue(modin_df_name)
    other = pickle.loads(pickle.dumps(modin_df))
    df_equals(modin_df, other)


def test__reduce__():
    # `DataFrame.__reduce__` will be called implicitly when lambda expressions are
    # pre-processed for the distributed engine.
    dataframe_data = ["Major League Baseball", "National Basketball Association"]
    abbr_md, abbr_pd = create_test_dfs(dataframe_data, index=["MLB", "NBA"])

    dataframe_data = {
        "name": ["Mariners", "Lakers"] * 500,
        "league_abbreviation": ["MLB", "NBA"] * 500,
    }
    teams_md, teams_pd = create_test_dfs(dataframe_data)

    result_md = (
        teams_md.set_index("name")
        .league_abbreviation.apply(lambda abbr: abbr_md[0].loc[abbr])
        .rename("league")
    )

    result_pd = (
        teams_pd.set_index("name")
        .league_abbreviation.apply(lambda abbr: abbr_pd[0].loc[abbr])
        .rename("league")
    )
    df_equals(result_md, result_pd)


def test_column_pickle(modin_column, modin_df_non_empty, persistent):
    dmp = pickle.dumps(modin_column)
    other = pickle.loads(dmp)
    df_equals(modin_column.to_frame(), other.to_frame())

    # make sure we don't pickle the whole frame if doing persistent storage
    if persistent:
        assert len(dmp) < len(pickle.dumps(modin_df_non_empty))


================================================
FILE: modin/tests/pandas/dataframe/test_reduce.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import matplotlib
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions
from modin.tests.pandas.utils import (
    arg_keys,
    axis_keys,
    axis_values,
    bool_arg_keys,
    bool_arg_values,
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
    df_equals_with_non_stable_indices,
    eval_general,
    int_arg_keys,
    int_arg_values,
    test_data,
    test_data_diff_dtype,
    test_data_keys,
    test_data_large_categorical_dataframe,
    test_data_values,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


@pytest.mark.parametrize("method", ["all", "any"])
@pytest.mark.parametrize("is_transposed", [False, True])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("data", [test_data["float_nan_data"]])
def test_all_any(data, axis, skipna, is_transposed, method):
    eval_general(
        *create_test_dfs(data),
        lambda df: getattr((df.T if is_transposed else df), method)(
            axis=axis, skipna=skipna, bool_only=None
        ),
    )


@pytest.mark.parametrize("method", ["all", "any"])
@pytest.mark.parametrize(
    "bool_only", bool_arg_values, ids=arg_keys("bool_only", bool_arg_keys)
)
def test_all_any_specific(bool_only, method):
    eval_general(
        *create_test_dfs(test_data_diff_dtype),
        lambda df: getattr(df, method)(bool_only=bool_only),
    )


@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize(
    "data", [test_data["float_nan_data"], test_data_large_categorical_dataframe]
)
def test_count(data, axis):
    eval_general(
        *create_test_dfs(data),
        lambda df: df.count(axis=axis),
    )


@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("dropna", [True, False])
def test_nunique(data, axis, dropna):
    eval_general(
        *create_test_dfs(data),
        lambda df: df.nunique(axis=axis, dropna=dropna),
    )


@pytest.mark.parametrize("numeric_only", [False, True])
def test_count_specific(numeric_only):
    eval_general(
        *create_test_dfs(test_data_diff_dtype),
        lambda df: df.count(numeric_only=numeric_only),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_count_dtypes(data):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.isna().count(axis=0),
    )


@pytest.mark.parametrize("percentiles", [None, 0.10, 0.11, 0.44, 0.78, 0.99])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_describe(data, percentiles):
    if percentiles is not None:
        percentiles = [percentiles]
    eval_general(
        *create_test_dfs(data),
        lambda df: df.describe(percentiles=percentiles),
    )


@pytest.mark.parametrize("has_numeric_column", [False, True])
def test_2195(has_numeric_column):
    data = {
        "categorical": pd.Categorical(["d"] * 10**2),
        "date": [np.datetime64("2000-01-01")] * 10**2,
    }

    if has_numeric_column:
        data.update({"numeric": [5] * 10**2})

    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.describe(),
    )


# Issue: https://github.com/modin-project/modin/issues/4641
def test_describe_column_partition_has_different_index():
    pandas_df = pandas.DataFrame(test_data["int_data"])
    # We add a string column to test the case where partitions with mixed data
    # types have different 'describe' rows, which causes an index mismatch.
    pandas_df["string_column"] = "abc"
    modin_df = pd.DataFrame(pandas_df)
    eval_general(modin_df, pandas_df, lambda df: df.describe(include="all"))


@pytest.mark.parametrize(
    "exclude,include",
    [
        ([np.float64], None),
        (np.float64, None),
        (None, [np.timedelta64, np.datetime64, np.object_, np.bool_]),
        (None, "all"),
        (None, np.number),
    ],
)
def test_describe_specific(exclude, include):
    eval_general(
        *create_test_dfs(test_data_diff_dtype),
        lambda df: df.drop("str_col", axis=1).describe(
            exclude=exclude, include=include
        ),
    )


@pytest.mark.parametrize("data", [test_data["int_data"]])
def test_describe_str(data):
    modin_df = pd.DataFrame(data).applymap(str)
    pandas_df = pandas.DataFrame(data).applymap(str)

    try:
        df_equals(modin_df.describe(), pandas_df.describe())
    except AssertionError:
        # We have to do this because we choose the highest count slightly differently
        # than pandas. Because there is no true guarantee which one will be first,
        # If they don't match, make sure that the `freq` is the same at least.
        df_equals(
            modin_df.describe().loc[["count", "unique", "freq"]],
            pandas_df.describe().loc[["count", "unique", "freq"]],
        )


def test_describe_dtypes():
    data = {
        "col1": list("abc"),
        "col2": list("abc"),
        "col3": list("abc"),
        "col4": [1, 2, 3],
    }
    eval_general(*create_test_dfs(data), lambda df: df.describe())


@pytest.mark.parametrize("method", ["idxmin", "idxmax"])
@pytest.mark.parametrize("is_transposed", [False, True])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("data", [test_data["float_nan_data"]])
def test_idxmin_idxmax(data, axis, skipna, is_transposed, method):
    eval_general(
        *create_test_dfs(data),
        lambda df: getattr((df.T if is_transposed else df), method)(
            axis=axis, skipna=skipna
        ),
    )


@pytest.mark.parametrize("axis", [0, 1])
def test_idxmin_idxmax_string_columns(axis):
    # https://github.com/modin-project/modin/issues/7093
    modin_df, pandas_df = create_test_dfs([["a", "b"]])
    eval_general(modin_df, pandas_df, lambda df: df.idxmax(axis=axis))
    eval_general(modin_df, pandas_df, lambda df: df.idxmin(axis=axis))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_last_valid_index(data):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
    assert modin_df.last_valid_index() == pandas_df.last_valid_index()


@pytest.mark.parametrize("index", bool_arg_values, ids=arg_keys("index", bool_arg_keys))
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_memory_usage(data, index):
    eval_general(*create_test_dfs(data), lambda df: df.memory_usage(index=index))


@pytest.mark.parametrize("method", ["min", "max", "mean"])
@pytest.mark.parametrize("is_transposed", [False, True])
@pytest.mark.parametrize("numeric_only", [False, True])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("data", [test_data["float_nan_data"]])
def test_min_max_mean(data, axis, skipna, numeric_only, is_transposed, method):
    eval_general(
        *create_test_dfs(data),
        lambda df: getattr((df.T if is_transposed else df), method)(
            axis=axis, skipna=skipna, numeric_only=numeric_only
        ),
    )


@pytest.mark.parametrize("method", ["prod", "product"])
@pytest.mark.parametrize("is_transposed", [False, True])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("data", [test_data["float_nan_data"]])
def test_prod(
    data,
    axis,
    skipna,
    is_transposed,
    method,
):
    eval_general(
        *create_test_dfs(data),
        lambda df, *args, **kwargs: getattr(df.T if is_transposed else df, method)(
            axis=axis,
            skipna=skipna,
        ),
    )

    # test for issue #1953
    arrays = [["1", "1", "2", "2"], ["1", "2", "3", "4"]]
    modin_df = pd.DataFrame(
        [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays
    )
    pandas_df = pandas.DataFrame(
        [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays
    )
    modin_result = modin_df.prod()
    pandas_result = pandas_df.prod()
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("is_transposed", [False, True])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("data", [test_data["float_nan_data"]])
def test_sum(data, axis, skipna, is_transposed, request):
    eval_general(
        *create_test_dfs(data),
        lambda df: (df.T if is_transposed else df).sum(
            axis=axis,
            skipna=skipna,
        ),
    )

    # test for issue #1953
    arrays = [["1", "1", "2", "2"], ["1", "2", "3", "4"]]
    modin_df = pd.DataFrame(
        [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays
    )
    pandas_df = pandas.DataFrame(
        [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays
    )
    modin_result = modin_df.sum()
    pandas_result = pandas_df.sum()
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"])
def test_dtype_consistency(dtype):
    # test for issue #6781
    res_dtype = pd.DataFrame([1, 2, 3, 4], dtype=dtype).sum().dtype
    assert res_dtype == pandas.api.types.pandas_dtype(dtype)


@pytest.mark.parametrize("fn", ["prod", "sum"])
@pytest.mark.parametrize("numeric_only", [False, True])
@pytest.mark.parametrize(
    "min_count", int_arg_values, ids=arg_keys("min_count", int_arg_keys)
)
def test_sum_prod_specific(fn, min_count, numeric_only):
    expected_exception = None
    if not numeric_only and fn == "prod":
        # FIXME: https://github.com/modin-project/modin/issues/7029
        expected_exception = False
    elif not numeric_only and fn == "sum":
        expected_exception = TypeError('can only concatenate str (not "int") to str')
    if numeric_only and fn == "sum":
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7029")
    if min_count == 5 and not numeric_only:
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7029")

    eval_general(
        *create_test_dfs(test_data_diff_dtype),
        lambda df: getattr(df, fn)(min_count=min_count, numeric_only=numeric_only),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("backend", [None, "pyarrow"])
def test_sum_prod_min_count(backend):
    md_df, pd_df = create_test_dfs(test_data["float_nan_data"], backend=backend)
    eval_general(md_df, pd_df, lambda df: df.prod(min_count=len(pd_df) + 1))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_sum_single_column(data):
    modin_df = pd.DataFrame(data).iloc[:, [0]]
    pandas_df = pandas.DataFrame(data).iloc[:, [0]]
    df_equals(modin_df.sum(), pandas_df.sum())
    df_equals(modin_df.sum(axis=1), pandas_df.sum(axis=1))


def test_sum_datetime64():
    pd_ser = pandas.date_range(start="1/1/2018", end="1/08/2018")
    modin_df, pandas_df = create_test_dfs({"A": pd_ser, "B": [1, 2, 3, 4, 5, 6, 7, 8]})
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.sum(),
        expected_exception=TypeError(
            "'DatetimeArray' with dtype datetime64[ns] does not support reduction 'sum'"
        ),
    )


def test_min_datetime64():
    pd_ser = pandas.date_range(start="1/1/2018", end="1/08/2018")
    modin_df, pandas_df = create_test_dfs({"A": pd_ser, "B": [1, 2, 3, 4, 5, 6, 7, 8]})
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.min(),
    )

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.min(axis=1),
        # pandas raises: `TypeError: '<=' not supported between instances of 'Timestamp' and 'int'`
        # while modin raises quite general: `TypeError("Cannot compare Numeric and Non-Numeric Types")`
        expected_exception=False,
    )


@pytest.mark.parametrize(
    "fn", ["max", "min", "median", "mean", "skew", "kurt", "sem", "std", "var"]
)
@pytest.mark.parametrize("axis", [0, 1, None])
@pytest.mark.parametrize("numeric_only", [False, True])
def test_reduce_specific(fn, numeric_only, axis):
    expected_exception = None
    if not numeric_only:
        if fn in ("max", "min"):
            if axis == 0:
                operator = ">=" if fn == "max" else "<="
                expected_exception = TypeError(
                    f"'{operator}' not supported between instances of 'str' and 'float'"
                )
            else:
                # FIXME: https://github.com/modin-project/modin/issues/7030
                expected_exception = False
        elif fn in ("skew", "kurt", "sem", "std", "var", "median", "mean"):
            # FIXME: https://github.com/modin-project/modin/issues/7030
            expected_exception = False

    eval_general(
        *create_test_dfs(test_data_diff_dtype),
        lambda df: getattr(df, fn)(numeric_only=numeric_only, axis=axis),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("subset_len", [1, 2])
@pytest.mark.parametrize("sort", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("normalize", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("dropna", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("ascending", [False, True])
def test_value_counts(subset_len, sort, normalize, dropna, ascending):
    def comparator(md_res, pd_res):
        if subset_len == 1:
            # 'pandas.DataFrame.value_counts' always returns frames with MultiIndex,
            # even when 'subset_len == 1' it returns MultiIndex with 'nlevels == 1'.
            # This behavior is expensive to mimic, so Modin 'value_counts' returns frame
            # with non-multi index in that case. That's why we flatten indices here.
            assert md_res.index.nlevels == pd_res.index.nlevels == 1
            for df in [md_res, pd_res]:
                df.index = df.index.get_level_values(0)

        if sort:
            # We sort indices for the result because of:
            # https://github.com/modin-project/modin/issues/1650
            df_equals_with_non_stable_indices(md_res, pd_res)
        else:
            df_equals(md_res.sort_index(), pd_res.sort_index())

    data = test_data_values[0]
    md_df, pd_df = create_test_dfs(data)
    # We're picking columns with different index signs to involve columns from different partitions
    subset = [pd_df.columns[-i if i % 2 else i] for i in range(subset_len)]

    eval_general(
        md_df,
        pd_df,
        lambda df: df.value_counts(
            subset=subset,
            sort=sort,
            normalize=normalize,
            dropna=dropna,
            ascending=ascending,
        ),
        comparator=comparator,
    )


def test_value_counts_categorical():
    # from issue #3571
    data = np.array(["a"] * 50000 + ["b"] * 10000 + ["c"] * 1000)
    random_state = np.random.RandomState(seed=42)
    random_state.shuffle(data)
    modin_df, pandas_df = create_test_dfs(
        {"col1": data, "col2": data}, dtype="category"
    )
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.value_counts(),
        comparator=df_equals,
    )


================================================
FILE: modin/tests/pandas/dataframe/test_udf.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import matplotlib
import numpy as np
import pandas
import pytest
from pandas.core.dtypes.common import is_list_like

import modin.pandas as pd
from modin.config import MinRowPartitionSize, NPartitions
from modin.core.storage_formats.pandas.query_compiler_caster import (
    _assert_casting_functions_wrap_same_implementation,
)
from modin.tests.pandas.utils import (
    UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS,
    agg_func_except_keys,
    agg_func_except_values,
    agg_func_keys,
    agg_func_values,
    arg_keys,
    bool_arg_keys,
    bool_arg_values,
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    query_func_keys,
    query_func_values,
    random_state,
    test_data,
    test_data_keys,
    test_data_values,
    udf_func_keys,
    udf_func_values,
)
from modin.tests.test_utils import (
    current_execution_is_native,
    warns_that_defaulting_to_pandas_if,
)
from modin.utils import get_current_execution

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
# of defaulting to pandas.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


def test_agg_dict():
    md_df, pd_df = create_test_dfs(test_data_values[0])
    agg_dict = {pd_df.columns[0]: "sum", pd_df.columns[-1]: ("sum", "count")}
    eval_general(md_df, pd_df, lambda df: df.agg(agg_dict))

    agg_dict = {
        "new_col1": (pd_df.columns[0], "sum"),
        "new_col2": (pd_df.columns[-1], "count"),
    }
    eval_general(md_df, pd_df, lambda df: df.agg(**agg_dict))


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
    "func",
    agg_func_values + agg_func_except_values,
    ids=agg_func_keys + agg_func_except_keys,
)
@pytest.mark.parametrize("op", ["agg", "apply"])
def test_agg_apply(axis, func, op, request):
    expected_exception = None
    if "sum sum" in request.node.callspec.id:
        expected_exception = pandas.errors.SpecificationError(
            "Function names must be unique if there is no new column names assigned"
        )
    elif "should raise AssertionError" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7031
        expected_exception = False
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: getattr(df, op)(func, axis),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize(
    "func",
    agg_func_values + agg_func_except_values,
    ids=agg_func_keys + agg_func_except_keys,
)
@pytest.mark.parametrize("op", ["agg", "apply"])
def test_agg_apply_axis_names(axis, func, op, request):
    expected_exception = None
    if "sum sum" in request.node.callspec.id:
        expected_exception = pandas.errors.SpecificationError(
            "Function names must be unique if there is no new column names assigned"
        )
    elif "should raise AssertionError" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7031
        expected_exception = False
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: getattr(df, op)(func, axis),
        expected_exception=expected_exception,
    )


def test_aggregate_alias():
    _assert_casting_functions_wrap_same_implementation(
        pd.DataFrame.agg, pd.DataFrame.aggregate
    )


def test_aggregate_error_checking():
    modin_df = pd.DataFrame(test_data["float_nan_data"])

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_df.aggregate({modin_df.columns[0]: "sum", modin_df.columns[1]: "mean"})

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_df.aggregate("arcsin")


@pytest.mark.parametrize(
    "func",
    agg_func_values + agg_func_except_values,
    ids=agg_func_keys + agg_func_except_keys,
)
def test_apply_key_error(func):
    if not (is_list_like(func) or callable(func) or isinstance(func, str)):
        pytest.xfail(
            reason="Because index materialization is expensive Modin first"
            + "checks the validity of the function itself and only then the engine level"
            + "checks the validity of the indices. Pandas order of such checks is reversed,"
            + "so we get different errors when both (function and index) are invalid."
        )
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: df.apply({"row": func}, axis=1),
        expected_exception=KeyError("Column(s) ['row'] do not exist"),
    )


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("func", ["kurt", "count", "sum", "mean", "all", "any"])
def test_apply_text_func(data, func, axis):
    func_kwargs = {"axis": axis}
    rows_number = len(next(iter(data.values())))  # length of the first data column
    level_0 = np.random.choice([0, 1, 2], rows_number)
    level_1 = np.random.choice([3, 4, 5], rows_number)
    index = pd.MultiIndex.from_arrays([level_0, level_1])

    eval_general(
        *create_test_dfs(data, index=index),
        lambda df, *args, **kwargs: df.apply(func, *args, **kwargs),
        **func_kwargs,
    )


@pytest.mark.parametrize(
    "column", ["A", ["A", "C"]], ids=arg_keys("column", ["A", ["A", "C"]])
)
@pytest.mark.parametrize(
    "ignore_index", bool_arg_values, ids=arg_keys("ignore_index", bool_arg_keys)
)
def test_explode_single_partition(column, ignore_index):
    # This test data has two columns where some items are lists that
    # explode() should expand. In some rows, the columns have list-like
    # elements that must be expanded, and in others, they have empty lists
    # or items that aren't list-like at all.
    data = {
        "A": [[0, 1, 2], "foo", [], [3, 4]],
        "B": 1,
        "C": [["a", "b", "c"], np.nan, [], ["d", "e"]],
    }
    eval_general(
        *create_test_dfs(data),
        lambda df: df.explode(column, ignore_index=ignore_index),
    )


@pytest.mark.parametrize(
    "column", ["A", ["A", "C"]], ids=arg_keys("column", ["A", ["A", "C"]])
)
@pytest.mark.parametrize(
    "ignore_index", bool_arg_values, ids=arg_keys("ignore_index", bool_arg_keys)
)
def test_explode_all_partitions(column, ignore_index):
    # Test explode with enough rows to fill all partitions. explode should
    # expand every row in the input data into two rows. It's especially
    # important that the input data has list-like elements that must be
    # expanded at the boundaries of the partitions, e.g. at row 31.
    num_rows = NPartitions.get() * MinRowPartitionSize.get()
    data = {"A": [[3, 4]] * num_rows, "C": [["a", "b"]] * num_rows}
    eval_general(
        *create_test_dfs(data),
        lambda df: df.explode(column, ignore_index=ignore_index),
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize("args", [(1,), ("_A",)])
def test_apply_args(axis, args):
    def apply_func(series, y):
        try:
            return series + y
        except TypeError:
            return series.map(str) + str(y)

    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: df.apply(apply_func, axis=axis, args=args),
    )


def test_apply_metadata():
    def add(a, b, c):
        return a + b + c

    data = {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}

    modin_df = pd.DataFrame(data)
    modin_df["add"] = modin_df.apply(
        lambda row: add(row["A"], row["B"], row["C"]), axis=1
    )

    pandas_df = pandas.DataFrame(data)
    pandas_df["add"] = pandas_df.apply(
        lambda row: add(row["A"], row["B"], row["C"]), axis=1
    )
    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("func", udf_func_values, ids=udf_func_keys)
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_apply_udf(data, func):
    eval_general(
        *create_test_dfs(data),
        lambda df, *args, **kwargs: df.apply(func, *args, **kwargs),
        other=lambda df: df,
    )


def test_apply_dict_4828():
    data = [[2, 4], [1, 3]]
    modin_df1, pandas_df1 = create_test_dfs(data)
    eval_general(
        modin_df1,
        pandas_df1,
        lambda df: df.apply({0: (lambda x: x**2)}),
    )
    eval_general(
        modin_df1,
        pandas_df1,
        lambda df: df.apply({0: (lambda x: x**2)}, axis=1),
    )

    # several partitions along axis 0
    modin_df2, pandas_df2 = create_test_dfs(data, index=[2, 3])
    modin_df3 = pd.concat([modin_df1, modin_df2], axis=0)
    pandas_df3 = pandas.concat([pandas_df1, pandas_df2], axis=0)
    eval_general(
        modin_df3,
        pandas_df3,
        lambda df: df.apply({0: (lambda x: x**2)}),
    )
    eval_general(
        modin_df3,
        pandas_df3,
        lambda df: df.apply({0: (lambda x: x**2)}, axis=1),
    )

    # several partitions along axis 1
    modin_df4, pandas_df4 = create_test_dfs(data, columns=[2, 3])
    modin_df5 = pd.concat([modin_df1, modin_df4], axis=1)
    pandas_df5 = pandas.concat([pandas_df1, pandas_df4], axis=1)
    eval_general(
        modin_df5,
        pandas_df5,
        lambda df: df.apply({0: (lambda x: x**2)}),
    )
    eval_general(
        modin_df5,
        pandas_df5,
        lambda df: df.apply({0: (lambda x: x**2)}, axis=1),
    )


def test_apply_modin_func_4635():
    data = [1]
    modin_df, pandas_df = create_test_dfs(data)
    df_equals(modin_df.apply(pd.Series.sum), pandas_df.apply(pandas.Series.sum))

    data = {"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}
    modin_df, pandas_df = create_test_dfs(data)
    modin_df = modin_df.set_index(["a"])
    pandas_df = pandas_df.set_index(["a"])

    df_equals(
        modin_df.groupby("a", group_keys=False).apply(pd.DataFrame.sample, n=1),
        pandas_df.groupby("a", group_keys=False).apply(pandas.DataFrame.sample, n=1),
    )


@pytest.mark.parametrize(
    "apply_function",
    (
        lambda df, function: function(df),
        lambda df, function: df.apply(function, axis=0),
        lambda df, function: df.apply(function, axis=1),
    ),
)
@pytest.mark.parametrize("function", UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS)
def test_apply_unary_numpy_universal_function_issue_7645(function, apply_function):
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: apply_function(df, function),
    )


def test_eval_df_use_case():
    frame_data = {"a": random_state.randn(10), "b": random_state.randn(10)}
    df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)

    # test eval for series results
    tmp_pandas = df.eval("arctan2(sin(a), b)", engine="python", parser="pandas")
    tmp_modin = modin_df.eval("arctan2(sin(a), b)", engine="python", parser="pandas")

    assert isinstance(tmp_modin, pd.Series)
    df_equals(tmp_modin, tmp_pandas)

    # Test not inplace assignments
    tmp_pandas = df.eval("e = arctan2(sin(a), b)", engine="python", parser="pandas")
    tmp_modin = modin_df.eval(
        "e = arctan2(sin(a), b)", engine="python", parser="pandas"
    )
    df_equals(tmp_modin, tmp_pandas)

    # Test inplace assignments
    df.eval("e = arctan2(sin(a), b)", engine="python", parser="pandas", inplace=True)
    modin_df.eval(
        "e = arctan2(sin(a), b)", engine="python", parser="pandas", inplace=True
    )
    # TODO: Use a series equality validator.
    df_equals(modin_df, df)


def test_eval_df_arithmetic_subexpression():
    frame_data = {"a": random_state.randn(10), "b": random_state.randn(10)}
    df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    df.eval("not_e = sin(a + b)", engine="python", parser="pandas", inplace=True)
    modin_df.eval("not_e = sin(a + b)", engine="python", parser="pandas", inplace=True)
    # TODO: Use a series equality validator.
    df_equals(modin_df, df)


def test_eval_groupby_transform():
    # see #5511 for details
    df = pd.DataFrame({"num": range(1, 1001), "group": ["A"] * 500 + ["B"] * 500})
    assert df.eval("num.groupby(group).transform('min')").unique().tolist() == [1, 501]


def test_eval_scalar():
    # see #4477 for details
    df = pd.DataFrame([[2]])
    assert df.eval("1") == 1


@pytest.mark.parametrize("engine", ("numexpr", "python"))
def test_eval_not_inplace_does_not_change_input_dataframe(engine):
    snow_df, pandas_df = create_test_dfs({"a": [1, 2, 3]})
    original_pandas = pandas_df.copy()
    snow_result = snow_df.eval("b = a + 1", inplace=False, engine=engine)
    pandas_result = pandas_df.eval("b = a + 1", inplace=False, engine=engine)
    df_equals(snow_df, original_pandas)
    df_equals(pandas_df, original_pandas)
    df_equals(snow_result, pandas_result)


TEST_VAR = 2


@pytest.mark.parametrize("method", ["query", "eval"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("local_var", [2])
@pytest.mark.parametrize("engine", ["python", "numexpr"])
def test_eval_and_query_with_local_and_global_var(method, data, engine, local_var):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
    op = "+" if method == "eval" else "<"
    for expr in (f"col1 {op} @local_var", f"col1 {op} @TEST_VAR"):
        df_equals(
            getattr(modin_df, method)(expr, engine=engine),
            getattr(pandas_df, method)(expr, engine=engine),
        )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_filter(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    by = {"items": ["col1", "col5"], "regex": "4$|3$", "like": "col"}
    df_equals(modin_df.filter(items=by["items"]), pandas_df.filter(items=by["items"]))

    df_equals(
        modin_df.filter(regex=by["regex"], axis=0),
        pandas_df.filter(regex=by["regex"], axis=0),
    )
    df_equals(
        modin_df.filter(regex=by["regex"], axis=1),
        pandas_df.filter(regex=by["regex"], axis=1),
    )

    df_equals(modin_df.filter(like=by["like"]), pandas_df.filter(like=by["like"]))

    with pytest.raises(TypeError):
        modin_df.filter(items=by["items"], regex=by["regex"])

    with pytest.raises(TypeError):
        modin_df.filter()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_pipe(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    n = len(modin_df.index)
    a, b, c = 2 % n, 0, 3 % n
    col = modin_df.columns[3 % len(modin_df.columns)]

    def h(x):
        return x.drop(columns=[col])

    def g(x, arg1=0):
        for _ in range(arg1):
            x = (pd if isinstance(x, pd.DataFrame) else pandas).concat((x, x))
        return x

    def f(x, arg2=0, arg3=0):
        return x.drop([arg2, arg3])

    df_equals(
        f(g(h(modin_df), arg1=a), arg2=b, arg3=c),
        (modin_df.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),
    )
    df_equals(
        (modin_df.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),
        (pandas_df.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("funcs", query_func_values, ids=query_func_keys)
@pytest.mark.parametrize("engine", ["python", "numexpr"])
def test_query(data, funcs, engine):
    if get_current_execution() == "BaseOnPython" and funcs != "col3 > col4":
        pytest.xfail(
            reason="In this case, we are faced with the problem of handling empty data frames - #4934"
        )
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_result = pandas_df.query(funcs, engine=engine)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_df.query(funcs, engine=engine)
    else:
        modin_result = modin_df.query(funcs, engine=engine)
        # `dtypes` must be evaluated after `query` so we need to check cache
        assert modin_result._query_compiler.frame_has_dtypes_cache
        df_equals(modin_result, pandas_result)
        df_equals(modin_result.dtypes, pandas_result.dtypes)


def test_query_named_index():
    eval_general(
        *(df.set_index("col1") for df in create_test_dfs(test_data["int_data"])),
        lambda df: df.query("col1 % 2 == 0 | col3 % 2 == 1"),
    )


def test_query_named_multiindex():
    eval_general(
        *(
            df.set_index(["col1", "col3"])
            for df in create_test_dfs(test_data["int_data"])
        ),
        lambda df: df.query("col1 % 2 == 1 | col3 % 2 == 1"),
    )


def test_query_multiindex_without_names():
    def make_df(without_index):
        new_df = without_index.set_index(["col1", "col3"])
        new_df.index.names = [None, None]
        return new_df

    eval_general(
        *(make_df(df) for df in create_test_dfs(test_data["int_data"])),
        lambda df: df.query("ilevel_0 % 2 == 0 | ilevel_1 % 2 == 1 | col4 % 2 == 1"),
    )


def test_empty_query():
    modin_df = pd.DataFrame([1, 2, 3, 4, 5])

    with pytest.raises(ValueError):
        modin_df.query("")


@pytest.mark.parametrize("engine", ["python", "numexpr"])
def test_query_after_insert(engine):
    modin_df = pd.DataFrame({"x": [-1, 0, 1, None], "y": [1, 2, None, 3]})
    modin_df["z"] = modin_df.eval("x / y")
    modin_df = modin_df.query("z >= 0", engine=engine)
    modin_result = modin_df.reset_index(drop=True)
    modin_result.columns = ["a", "b", "c"]

    pandas_df = pd.DataFrame({"x": [-1, 0, 1, None], "y": [1, 2, None, 3]})
    pandas_df["z"] = pandas_df.eval("x / y")
    pandas_df = pandas_df.query("z >= 0", engine=engine)
    pandas_result = pandas_df.reset_index(drop=True)
    pandas_result.columns = ["a", "b", "c"]

    df_equals(modin_result, pandas_result)
    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("engine", ["python", "numexpr"])
def test_query_with_element_access_issue_4580(engine):
    pdf = pandas.DataFrame({"a": [0, 1, 2]})
    # get two row partitions by concatenating
    df = pd.concat([pd.DataFrame(pdf[:1]), pd.DataFrame(pdf[1:])])
    eval_general(df, pdf, lambda df: df.query("a == a[0]", engine=engine))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "func", [lambda x: x + 1, [np.sqrt, np.exp]], ids=["lambda", "list_udfs"]
)
def test_transform(data, func, request):
    if "list_udfs" in request.node.callspec.id:
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/6998")
    eval_general(*create_test_dfs(data), lambda df: df.transform(func))


================================================
FILE: modin/tests/pandas/dataframe/test_window.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import matplotlib
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions
from modin.tests.pandas.utils import (
    arg_keys,
    axis_keys,
    axis_values,
    bool_arg_keys,
    bool_arg_values,
    create_test_dfs,
    df_equals,
    eval_general,
    int_arg_keys,
    int_arg_values,
    is_native_shallow_copy,
    name_contains,
    no_numeric_dfs,
    quantiles_keys,
    quantiles_values,
    random_state,
    test_data,
    test_data_keys,
    test_data_values,
    test_data_with_duplicates_keys,
    test_data_with_duplicates_values,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"])
def test_cumprod_cummin_cummax_cumsum(axis, skipna, method):
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: getattr(df, method)(axis=axis, skipna=skipna),
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"])
def test_cumprod_cummin_cummax_cumsum_transposed(axis, method):
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: getattr(df.T, method)(axis=axis),
    )


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("method", ["cummin", "cummax"])
def test_cummin_cummax_int_and_float(axis, method):
    data = {"col1": list(range(1000)), "col2": [i * 0.1 for i in range(1000)]}
    eval_general(*create_test_dfs(data), lambda df: getattr(df, method)(axis=axis))


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
    "periods", int_arg_values, ids=arg_keys("periods", int_arg_keys)
)
def test_diff(axis, periods):
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: df.diff(axis=axis, periods=periods),
    )


def test_diff_with_datetime_types():
    pandas_df = pandas.DataFrame(
        [[1, 2.0, 3], [4, 5.0, 6], [7, np.nan, 9], [10, 11.3, 12], [13, 14.5, 15]]
    )
    data = pandas.date_range("2018-01-01", periods=5, freq="h").values
    pandas_df = pandas.concat([pandas_df, pandas.Series(data)], axis=1)
    modin_df = pd.DataFrame(pandas_df)

    # Test `diff` with datetime type.
    pandas_result = pandas_df.diff()
    modin_result = modin_df.diff()
    df_equals(modin_result, pandas_result)

    # Test `diff` with timedelta type.
    td_pandas_result = pandas_result.diff()
    td_modin_result = modin_result.diff()
    df_equals(td_modin_result, td_pandas_result)


def test_diff_error_handling():
    df = pd.DataFrame([["a", "b", "c"]], columns=["col 0", "col 1", "col 2"])
    with pytest.raises(
        ValueError, match="periods must be an int. got <class 'str'> instead"
    ):
        df.diff(axis=0, periods="1")

    with pytest.raises(TypeError, match="unsupported operand type for -: got object"):
        df.diff()


@pytest.mark.parametrize("axis", ["rows", "columns"])
def test_diff_transposed(axis):
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: df.T.diff(axis=axis),
    )


@pytest.mark.parametrize(
    "data", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys
)
@pytest.mark.parametrize(
    "keep", ["last", "first", False], ids=["last", "first", "False"]
)
def test_duplicated(data, keep):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    pandas_result = pandas_df.duplicated(keep=keep)
    modin_result = modin_df.duplicated(keep=keep)
    df_equals(modin_result, pandas_result)

    import random

    subset = random.sample(
        list(pandas_df.columns), random.randint(1, len(pandas_df.columns))
    )
    pandas_result = pandas_df.duplicated(keep=keep, subset=subset)
    modin_result = modin_df.duplicated(keep=keep, subset=subset)

    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_ffill(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    df_equals(modin_df.ffill(), pandas_df.ffill())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "method",
    ["backfill", "bfill", "pad", "ffill", None],
    ids=["backfill", "bfill", "pad", "ffill", "None"],
)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("limit", int_arg_values, ids=int_arg_keys)
def test_fillna(data, method, axis, limit):
    # We are not testing when axis is over rows until pandas-17399 gets fixed.
    if axis != 1 and axis != "columns":
        modin_df = pd.DataFrame(data)
        pandas_df = pandas.DataFrame(data)

        try:
            pandas_result = pandas_df.fillna(0, method=method, axis=axis, limit=limit)
        except Exception as err:
            with pytest.raises(type(err)):
                modin_df.fillna(0, method=method, axis=axis, limit=limit)
        else:
            modin_result = modin_df.fillna(0, method=method, axis=axis, limit=limit)
            df_equals(modin_result, pandas_result)


def test_fillna_sanity():
    # with different dtype
    frame_data = [
        ["a", "a", np.nan, "a"],
        ["b", "b", np.nan, "b"],
        ["c", "c", np.nan, "c"],
    ]
    df = pandas.DataFrame(frame_data)

    result = df.fillna({2: "foo"})
    modin_df = pd.DataFrame(frame_data).fillna({2: "foo"})

    df_equals(modin_df, result)

    modin_df = pd.DataFrame(df)
    df.fillna({2: "foo"}, inplace=True)
    modin_df.fillna({2: "foo"}, inplace=True)
    df_equals(modin_df, result)

    frame_data = {
        "Date": [pandas.NaT, pandas.Timestamp("2014-1-1")],
        "Date2": [pandas.Timestamp("2013-1-1"), pandas.NaT],
    }
    df = pandas.DataFrame(frame_data)
    result = df.fillna(value={"Date": df["Date2"]})
    modin_df = pd.DataFrame(frame_data).fillna(value={"Date": df["Date2"]})
    df_equals(modin_df, result)

    frame_data = {"A": [pandas.Timestamp("2012-11-11 00:00:00+01:00"), pandas.NaT]}
    df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    df_equals(modin_df.fillna(method="pad"), df.fillna(method="pad"))

    frame_data = {"A": [pandas.NaT, pandas.Timestamp("2012-11-11 00:00:00+01:00")]}
    df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data).fillna(method="bfill")
    df_equals(modin_df, df.fillna(method="bfill"))


def test_fillna_downcast():
    # infer int64 from float64
    frame_data = {"a": [1.0, np.nan]}
    df = pandas.DataFrame(frame_data)
    result = df.fillna(0, downcast="infer")
    modin_df = pd.DataFrame(frame_data).fillna(0, downcast="infer")
    df_equals(modin_df, result)

    # infer int64 from float64 when fillna value is a dict
    df = pandas.DataFrame(frame_data)
    result = df.fillna({"a": 0}, downcast="infer")
    modin_df = pd.DataFrame(frame_data).fillna({"a": 0}, downcast="infer")
    df_equals(modin_df, result)


def test_fillna_4660():
    eval_general(
        *create_test_dfs({"a": ["a"], "b": ["b"], "c": [pd.NA]}, index=["row1"]),
        lambda df: df["c"].fillna(df["b"]),
    )


@pytest.mark.xfail(
    condition=is_native_shallow_copy(),
    reason="native pandas backend does not deep copy inputs by default",
    strict=True,
)
def test_fillna_inplace():
    frame_data = random_state.randn(10, 4)
    df = pandas.DataFrame(frame_data)
    df[1][:4] = np.nan
    df[3][-4:] = np.nan

    modin_df = pd.DataFrame(df)
    df.fillna(value=0, inplace=True)
    try:
        df_equals(modin_df, df)
    except AssertionError:
        pass
    else:
        assert False

    modin_df.fillna(value=0, inplace=True)
    df_equals(modin_df, df)

    modin_df = pd.DataFrame(df).fillna(value={0: 0}, inplace=True)
    assert modin_df is None

    df[1][:4] = np.nan
    df[3][-4:] = np.nan
    modin_df = pd.DataFrame(df)
    df.fillna(method="ffill", inplace=True)
    try:
        df_equals(modin_df, df)
    except AssertionError:
        pass
    else:
        assert False

    modin_df.fillna(method="ffill", inplace=True)
    df_equals(modin_df, df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("limit", [1, 2, 0.5, -1, -2, 1.5])
def test_frame_fillna_limit(data, limit):
    pandas_df = pandas.DataFrame(data)

    replace_pandas_series = pandas_df.columns.to_series().sample(frac=1)
    replace_dict = replace_pandas_series.to_dict()
    replace_pandas_df = pandas.DataFrame(
        {col: pandas_df.index.to_series() for col in pandas_df.columns},
        index=pandas_df.index,
    ).sample(frac=1)
    replace_modin_series = pd.Series(replace_pandas_series)
    replace_modin_df = pd.DataFrame(replace_pandas_df)

    index = pandas_df.index
    result = pandas_df[:2].reindex(index)
    modin_df = pd.DataFrame(result)

    if isinstance(limit, float):
        limit = int(len(modin_df) * limit)
    if limit is not None and limit < 0:
        limit = len(modin_df) + limit

    df_equals(
        modin_df.fillna(method="pad", limit=limit),
        result.fillna(method="pad", limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_dict, limit=limit),
        result.fillna(replace_dict, limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_modin_series, limit=limit),
        result.fillna(replace_pandas_series, limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_modin_df, limit=limit),
        result.fillna(replace_pandas_df, limit=limit),
    )

    result = pandas_df[-2:].reindex(index)
    modin_df = pd.DataFrame(result)
    df_equals(
        modin_df.fillna(method="backfill", limit=limit),
        result.fillna(method="backfill", limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_dict, limit=limit),
        result.fillna(replace_dict, limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_modin_series, limit=limit),
        result.fillna(replace_pandas_series, limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_modin_df, limit=limit),
        result.fillna(replace_pandas_df, limit=limit),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_frame_pad_backfill_limit(data):
    pandas_df = pandas.DataFrame(data)

    index = pandas_df.index

    result = pandas_df[:2].reindex(index)
    modin_df = pd.DataFrame(result)
    df_equals(
        modin_df.fillna(method="pad", limit=2), result.fillna(method="pad", limit=2)
    )

    result = pandas_df[-2:].reindex(index)
    modin_df = pd.DataFrame(result)
    df_equals(
        modin_df.fillna(method="backfill", limit=2),
        result.fillna(method="backfill", limit=2),
    )


def test_fillna_dtype_conversion():
    # make sure that fillna on an empty frame works
    df = pandas.DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
    modin_df = pd.DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
    df_equals(modin_df.fillna("nan"), df.fillna("nan"))

    frame_data = {"A": [1, np.nan], "B": [1.0, 2.0]}
    df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    for v in ["", 1, np.nan, 1.0]:
        df_equals(modin_df.fillna(v), df.fillna(v))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_fillna_skip_certain_blocks(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    # don't try to fill boolean, int blocks
    df_equals(modin_df.fillna(np.nan), pandas_df.fillna(np.nan))


def test_fillna_dict_series():
    frame_data = {
        "a": [np.nan, 1, 2, np.nan, np.nan],
        "b": [1, 2, 3, np.nan, np.nan],
        "c": [np.nan, 1, 2, 3, 4],
    }
    df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)

    df_equals(modin_df.fillna({"a": 0, "b": 5}), df.fillna({"a": 0, "b": 5}))

    df_equals(
        modin_df.fillna({"a": 0, "b": 5, "d": 7}),
        df.fillna({"a": 0, "b": 5, "d": 7}),
    )

    # Series treated same as dict
    df_equals(modin_df.fillna(modin_df.max()), df.fillna(df.max()))


def test_fillna_dataframe():
    frame_data = {
        "a": [np.nan, 1, 2, np.nan, np.nan],
        "b": [1, 2, 3, np.nan, np.nan],
        "c": [np.nan, 1, 2, 3, 4],
    }
    df = pandas.DataFrame(frame_data, index=list("VWXYZ"))
    modin_df = pd.DataFrame(frame_data, index=list("VWXYZ"))

    # df2 may have different index and columns
    df2 = pandas.DataFrame(
        {"a": [np.nan, 10, 20, 30, 40], "b": [50, 60, 70, 80, 90], "foo": ["bar"] * 5},
        index=list("VWXuZ"),
    )
    modin_df2 = pd.DataFrame(df2)

    # only those columns and indices which are shared get filled
    df_equals(modin_df.fillna(modin_df2), df.fillna(df2))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_fillna_columns(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(
        modin_df.fillna(method="ffill", axis=1),
        pandas_df.fillna(method="ffill", axis=1),
    )

    df_equals(
        modin_df.fillna(method="ffill", axis=1),
        pandas_df.fillna(method="ffill", axis=1),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_fillna_invalid_method(data):
    modin_df = pd.DataFrame(data)

    with pytest.raises(ValueError):
        modin_df.fillna(method="ffil")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_fillna_invalid_value(data):
    modin_df = pd.DataFrame(data)
    # list
    pytest.raises(TypeError, modin_df.fillna, [1, 2])
    # tuple
    pytest.raises(TypeError, modin_df.fillna, (1, 2))
    # frame with series
    pytest.raises(TypeError, modin_df.iloc[:, 0].fillna, modin_df)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_fillna_col_reordering(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.fillna(method="ffill"), pandas_df.fillna(method="ffill"))


def test_fillna_datetime_columns():
    frame_data = {
        "A": [-1, -2, np.nan],
        "B": pd.date_range("20130101", periods=3),
        "C": ["foo", "bar", None],
        "D": ["foo2", "bar2", None],
    }
    df = pandas.DataFrame(frame_data, index=pd.date_range("20130110", periods=3))
    modin_df = pd.DataFrame(frame_data, index=pd.date_range("20130110", periods=3))
    df_equals(modin_df.fillna("?"), df.fillna("?"))

    frame_data = {
        "A": [-1, -2, np.nan],
        "B": [
            pandas.Timestamp("2013-01-01"),
            pandas.Timestamp("2013-01-02"),
            pandas.NaT,
        ],
        "C": ["foo", "bar", None],
        "D": ["foo2", "bar2", None],
    }
    df = pandas.DataFrame(frame_data, index=pd.date_range("20130110", periods=3))
    modin_df = pd.DataFrame(frame_data, index=pd.date_range("20130110", periods=3))
    df_equals(modin_df.fillna("?"), df.fillna("?"))


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("method", ["median", "skew"])
def test_median_skew(axis, skipna, method):
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: getattr(df, method)(axis=axis, skipna=skipna),
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize("method", ["median", "skew"])
def test_median_skew_transposed(axis, method):
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: getattr(df.T, method)(axis=axis),
    )


@pytest.mark.parametrize("method", ["median", "skew", "std", "var", "sem"])
def test_median_skew_std_var_sem_1953(method):
    # See #1953 for details
    arrays = [["1", "1", "2", "2"], ["1", "2", "3", "4"]]
    data = [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]
    modin_df = pd.DataFrame(data, index=arrays)
    pandas_df = pandas.DataFrame(data, index=arrays)

    eval_general(modin_df, pandas_df, lambda df: getattr(df, method)())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("numeric_only", [False, True])
def test_mode(data, axis, numeric_only):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_result = pandas_df.mode(axis=axis, numeric_only=numeric_only)
    except Exception:
        with pytest.raises(TypeError):
            modin_df.mode(axis=axis, numeric_only=numeric_only)
    else:
        modin_result = modin_df.mode(axis=axis, numeric_only=numeric_only)
        df_equals(modin_result, pandas_result)


def test_nlargest():
    data = {
        "population": [
            59000000,
            65000000,
            434000,
            434000,
            434000,
            337000,
            11300,
            11300,
            11300,
        ],
        "GDP": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311],
        "alpha-2": ["IT", "FR", "MT", "MV", "BN", "IS", "NR", "TV", "AI"],
    }
    index = [
        "Italy",
        "France",
        "Malta",
        "Maldives",
        "Brunei",
        "Iceland",
        "Nauru",
        "Tuvalu",
        "Anguilla",
    ]
    modin_df = pd.DataFrame(data=data, index=index)
    pandas_df = pandas.DataFrame(data=data, index=index)
    df_equals(modin_df.nlargest(3, "population"), pandas_df.nlargest(3, "population"))


def test_nsmallest():
    data = {
        "population": [
            59000000,
            65000000,
            434000,
            434000,
            434000,
            337000,
            11300,
            11300,
            11300,
        ],
        "GDP": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311],
        "alpha-2": ["IT", "FR", "MT", "MV", "BN", "IS", "NR", "TV", "AI"],
    }
    index = [
        "Italy",
        "France",
        "Malta",
        "Maldives",
        "Brunei",
        "Iceland",
        "Nauru",
        "Tuvalu",
        "Anguilla",
    ]
    modin_df = pd.DataFrame(data=data, index=index)
    pandas_df = pandas.DataFrame(data=data, index=index)
    df_equals(
        modin_df.nsmallest(n=3, columns="population"),
        pandas_df.nsmallest(n=3, columns="population"),
    )
    df_equals(
        modin_df.nsmallest(n=2, columns=["population", "GDP"], keep="all"),
        pandas_df.nsmallest(n=2, columns=["population", "GDP"], keep="all"),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize(
    "dropna", bool_arg_values, ids=arg_keys("dropna", bool_arg_keys)
)
def test_nunique(data, axis, dropna):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    modin_result = modin_df.nunique(axis=axis, dropna=dropna)
    pandas_result = pandas_df.nunique(axis=axis, dropna=dropna)
    df_equals(modin_result, pandas_result)

    modin_result = modin_df.T.nunique(axis=axis, dropna=dropna)
    pandas_result = pandas_df.T.nunique(axis=axis, dropna=dropna)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("q", quantiles_values, ids=quantiles_keys)
def test_quantile(request, data, q):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if not name_contains(request.node.name, no_numeric_dfs):
        df_equals(modin_df.quantile(q), pandas_df.quantile(q))
        df_equals(modin_df.quantile(q, axis=1), pandas_df.quantile(q, axis=1))

        try:
            pandas_result = pandas_df.quantile(q, axis=1, numeric_only=False)
        except Exception as err:
            with pytest.raises(type(err)):
                modin_df.quantile(q, axis=1, numeric_only=False)
        else:
            modin_result = modin_df.quantile(q, axis=1, numeric_only=False)
            df_equals(modin_result, pandas_result)
    else:
        with pytest.raises(ValueError):
            modin_df.quantile(q)

    if not name_contains(request.node.name, no_numeric_dfs):
        df_equals(modin_df.T.quantile(q), pandas_df.T.quantile(q))
        df_equals(modin_df.T.quantile(q, axis=1), pandas_df.T.quantile(q, axis=1))

        try:
            pandas_result = pandas_df.T.quantile(q, axis=1, numeric_only=False)
        except Exception as err:
            with pytest.raises(type(err)):
                modin_df.T.quantile(q, axis=1, numeric_only=False)
        else:
            modin_result = modin_df.T.quantile(q, axis=1, numeric_only=False)
            df_equals(modin_result, pandas_result)
    else:
        with pytest.raises(ValueError):
            modin_df.T.quantile(q)


def test_quantile_7157():
    # for details: https://github.com/modin-project/modin/issues/7157
    n_rows = 100
    n_fcols = 10
    n_mcols = 5

    df1_md, df1_pd = create_test_dfs(
        random_state.rand(n_rows, n_fcols),
        columns=[f"feat_{i}" for i in range(n_fcols)],
    )
    df2_md, df2_pd = create_test_dfs(
        {
            "test_string1": ["test_string2" for _ in range(n_rows)]
            for _ in range(n_mcols)
        }
    )
    df3_md = pd.concat([df2_md, df1_md], axis=1)
    df3_pd = pandas.concat([df2_pd, df1_pd], axis=1)

    eval_general(df3_md, df3_pd, lambda df: df.quantile(0.25, numeric_only=True))
    eval_general(df3_md, df3_pd, lambda df: df.quantile((0.25,), numeric_only=True))
    eval_general(
        df3_md, df3_pd, lambda df: df.quantile((0.25, 0.75), numeric_only=True)
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize(
    "na_option", ["keep", "top", "bottom"], ids=["keep", "top", "bottom"]
)
def test_rank_transposed(axis, na_option):
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: df.rank(axis=axis, na_option=na_option),
    )


@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("ddof", int_arg_values, ids=arg_keys("ddof", int_arg_keys))
def test_sem_float_nan_only(skipna, ddof):
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: df.sem(skipna=skipna, ddof=ddof),
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize("ddof", int_arg_values, ids=arg_keys("ddof", int_arg_keys))
def test_sem_int_only(axis, ddof):
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: df.sem(axis=axis, ddof=ddof),
    )


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("method", ["std", "var"])
def test_std_var(axis, skipna, method):
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: getattr(df, method)(axis=axis, skipna=skipna),
    )


@pytest.mark.parametrize("axis", [0, 1, None])
def test_rank(axis):
    expected_exception = None
    if axis is None:
        expected_exception = ValueError("No axis named None for object type DataFrame")
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: df.rank(axis=axis),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize("ddof", int_arg_values, ids=arg_keys("ddof", int_arg_keys))
@pytest.mark.parametrize("method", ["std", "var"])
def test_std_var_transposed(axis, ddof, method):
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: getattr(df.T, method)(axis=axis, ddof=ddof),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_values(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    np.testing.assert_equal(modin_df.values, pandas_df.values)


================================================
FILE: modin/tests/pandas/extensions/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/pandas/extensions/conftest.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pytest

from modin.config import Backend, Engine, Execution, StorageFormat
from modin.core.execution.dispatching.factories import factories
from modin.core.execution.dispatching.factories.factories import BaseFactory, NativeIO
from modin.core.storage_formats.pandas.native_query_compiler import NativeQueryCompiler
from modin.pandas.api.extensions.extensions import _NON_EXTENDABLE_ATTRIBUTES


class Test1QueryCompiler(NativeQueryCompiler):
    storage_format = property(lambda self: "Test1_Storage_Format")
    engine = property(lambda self: "Test1_Engine")


class Test1IO(NativeIO):
    query_compiler_cls = Test1QueryCompiler


class Test1Factory(BaseFactory):

    @classmethod
    def prepare(cls):
        cls.io_cls = Test1IO


@pytest.fixture
def Backend1():
    factories.Test1_Storage_FormatOnTest1_EngineFactory = Test1Factory
    if "Backend1" not in Backend.choices:
        StorageFormat.add_option("Test1_storage_format")
        Engine.add_option("Test1_engine")
        Backend.register_backend(
            "Backend1",
            Execution(storage_format="Test1_Storage_Format", engine="Test1_Engine"),
        )
    return "Backend1"


@pytest.fixture(
    # sort the set of non-extendable attributes to make the sequence of test
    # cases deterministic for pytest-xdist.
    params=sorted(_NON_EXTENDABLE_ATTRIBUTES),
)
def non_extendable_attribute_name(request) -> str:
    return request.param


================================================
FILE: modin/tests/pandas/extensions/test_api_reexport.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


import pandas

import modin.pandas as pd


def test_extensions_does_not_overwrite_pandas_api():
    # Ensure that importing modin.pandas.api.extensions does not overwrite our re-export
    # of pandas.api submodules.
    import modin.pandas.api.extensions as ext

    # Top-level submodules should remain the same
    assert set(pd.api.__all__) == set(pandas.api.__all__)
    # Methods we define, like ext.register_dataframe_accessor should be different
    assert (
        ext.register_dataframe_accessor
        is not pandas.api.extensions.register_dataframe_accessor
    )
    # Methods from other submodules, like pd.api.types.is_bool_dtype, should be the same
    assert pd.api.types.is_bool_dtype is pandas.api.types.is_bool_dtype


================================================
FILE: modin/tests/pandas/extensions/test_base_extensions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import re

import pytest

import modin.pandas as pd
from modin.pandas.api.extensions import register_base_accessor
from modin.tests.pandas.utils import df_equals


@pytest.mark.parametrize("data_class", [pd.DataFrame, pd.Series])
class TestOverrideMethodForOneBackend:
    def test_add_simple_method(self, Backend1, data_class):
        expected_string_val = "Some string value"
        method_name = "new_method"
        modin_object = data_class([1, 2, 3]).set_backend(Backend1)

        @register_base_accessor(name=method_name, backend=Backend1)
        def my_method_implementation(self):
            return expected_string_val

        assert hasattr(data_class, method_name)
        assert getattr(modin_object, method_name)() == expected_string_val
        with pytest.raises(
            AttributeError,
            match=re.escape(
                f"{data_class.__name__} object has no attribute {method_name}"
            ),
        ):
            getattr(modin_object.set_backend("pandas"), method_name)()

    def test_add_non_method(self, Backend1, data_class):
        expected_val = 4
        attribute_name = "four"
        register_base_accessor(name=attribute_name, backend=Backend1)(expected_val)

        assert data_class().set_backend(Backend1).four == expected_val
        assert not hasattr(data_class().set_backend("pandas"), attribute_name)

    def test_method_uses_existing_methods(self, Backend1, data_class):
        modin_object = data_class([1, 2, 3]).set_backend(Backend1)
        method_name = "self_accessor"
        expected_result = modin_object.sum() / modin_object.count()

        @register_base_accessor(name=method_name, backend=Backend1)
        def my_average(self):
            return self.sum() / self.count()

        if data_class is pd.DataFrame:
            df_equals(modin_object.self_accessor(), expected_result)
        else:
            assert modin_object.self_accessor() == expected_result

    def test_override_existing_method(self, Backend1, data_class):
        modin_object = data_class([3, 2, 1])

        @register_base_accessor(name="copy", backend=Backend1)
        def my_copy(self, *args, **kwargs):
            return self + 1

        df_equals(modin_object.set_backend(Backend1).copy(), modin_object + 1)


@pytest.mark.parametrize("data_class", [pd.DataFrame, pd.Series])
@pytest.mark.parametrize("backend", ["pandas", "python_test"])
class TestOverrideMethodForAllBackends:
    def test_add_simple_method(self, backend, data_class):
        expected_string_val = "Some string value"
        method_name = "new_method"

        @register_base_accessor(name=method_name)
        def my_method_implementation(self):
            return expected_string_val

        modin_object = data_class([1, 2, 3]).set_backend(backend)

        assert getattr(modin_object, method_name)() == expected_string_val
        assert modin_object.new_method() == expected_string_val

    def test_add_non_method(self, data_class, backend):
        expected_val = 4
        attribute_name = "four"
        register_base_accessor(name=attribute_name)(expected_val)

        assert data_class().set_backend(backend).four == expected_val

    def test_method_uses_existing_methods(self, data_class, backend):
        modin_object = data_class([1, 2, 3]).set_backend(backend)
        method_name = "self_accessor"
        expected_result = modin_object.sum() / modin_object.count()

        @register_base_accessor(name=method_name)
        def my_average(self):
            return self.sum() / self.count()

        if data_class is pd.DataFrame:
            df_equals(modin_object.self_accessor(), expected_result)
        else:
            assert modin_object.self_accessor() == expected_result

    def test_override_existing_method(self, data_class, backend):
        modin_object = data_class([3, 2, 1])

        @register_base_accessor(name="copy")
        def my_copy(self, *args, **kwargs):
            return self + 1

        df_equals(modin_object.set_backend(backend).copy(), modin_object + 1)


class TestDunders:
    """
    Make sure to test that we override special "dunder" methods like __len__
    correctly. python calls these methods with DataFrame.__len__(obj)
    rather than getattr(obj, "__len__")().
    source: https://docs.python.org/3/reference/datamodel.html#special-lookup
    """

    @pytest.mark.parametrize("data_class", [pd.DataFrame, pd.Series])
    def test_len(self, Backend1, data_class):
        @register_base_accessor(name="__len__", backend=Backend1)
        def always_get_1(self):
            return 1

        modin_object = data_class([1, 2, 3])
        assert len(modin_object) == 3
        backend_object = modin_object.set_backend(Backend1)
        assert len(backend_object) == 1
        assert backend_object.__len__() == 1


@pytest.mark.parametrize("data_class", [pd.DataFrame, pd.Series])
class TestProperty:
    def test_override_loc_for_one_backend(self, Backend1, data_class):
        modin_object = data_class([1, 2, 3])

        @register_base_accessor(name="loc", backend=Backend1)
        @property
        def my_loc(self):
            return self.index[0]

        assert isinstance(modin_object.set_backend(Backend1).loc, int)
        assert modin_object.set_backend(Backend1).loc == 0

    @pytest.mark.parametrize("backend", ["pandas", "python_test"])
    def test_override_loc_for_all_backends(self, backend, data_class):
        @register_base_accessor(name="loc", backend=None)
        @property
        def my_loc(self):
            return self.index[0]

        modin_object = data_class([1, 2, 3])

        assert isinstance(modin_object.set_backend(backend).loc, int)
        assert modin_object.set_backend(backend).loc == 0

    def test_add_deletable_property(self, Backend1, data_class):
        # register a public property `public_property_name` that is backed by
        # a private attribute `private_property_name`.

        public_property_name = "property_name"
        private_property_name = "_property_name"

        def get_property(self):
            return getattr(self, private_property_name)

        def set_property(self, value):
            setattr(self, private_property_name, value)

        def del_property(self):
            delattr(self, private_property_name)

        register_base_accessor(name=public_property_name, backend=Backend1)(
            property(fget=get_property, fset=set_property, fdel=del_property)
        )

        modin_object = data_class({"a": [1, 2, 3], "b": [4, 5, 6]})
        assert not hasattr(modin_object, public_property_name)
        backend_object = modin_object.set_backend(Backend1)
        setattr(backend_object, public_property_name, "value")
        assert getattr(backend_object, public_property_name) == "value"
        delattr(backend_object, public_property_name)
        # check that the deletion works.
        assert not hasattr(backend_object, private_property_name)

    @pytest.mark.parametrize("backend", ["pandas", "python_test"])
    def test_add_deletable_property_for_all_backends(self, data_class, backend):
        # register a public property `public_property_name` that is backed by
        # a private attribute `private_property_name`.

        public_property_name = "property_name"
        private_property_name = "_property_name"

        def get_property(self):
            return getattr(self, private_property_name)

        def set_property(self, value):
            setattr(self, private_property_name, value)

        def del_property(self):
            delattr(self, private_property_name)

        register_base_accessor(name=public_property_name)(
            property(fget=get_property, fset=set_property, fdel=del_property)
        )

        modin_object = data_class({"a": [1, 2, 3], "b": [4, 5, 6]}).set_backend(backend)
        setattr(modin_object, public_property_name, "value")
        assert getattr(modin_object, public_property_name) == "value"
        delattr(modin_object, public_property_name)
        # check that the deletion works.
        assert not hasattr(modin_object, private_property_name)

    def test_get_property_that_raises_attribute_error_on_get_modin_issue_7562(
        self, data_class
    ):
        def get_property(self):
            raise AttributeError

        register_base_accessor(name="extension_property")(property(fget=get_property))
        modin_object = data_class()
        with pytest.raises(AttributeError):
            getattr(modin_object, "extension_property")

    def test_non_settable_extension_property(self, Backend1, data_class):
        modin_object = data_class([0])
        property_name = "property_name"
        register_base_accessor(name=property_name, backend=Backend1)(
            property(fget=(lambda self: 4))
        )

        assert not hasattr(modin_object, property_name)
        backend_object = modin_object.set_backend(Backend1)
        assert getattr(backend_object, property_name) == 4
        with pytest.raises(AttributeError):
            setattr(backend_object, property_name, "value")

    def test_delete_non_deletable_extension_property(self, Backend1, data_class):
        modin_object = data_class([0])
        property_name = "property_name"
        register_base_accessor(name=property_name, backend=Backend1)(
            property(fget=(lambda self: "value"))
        )

        assert not hasattr(modin_object, property_name)
        backend_object = modin_object.set_backend(Backend1)
        assert hasattr(backend_object, property_name)
        with pytest.raises(AttributeError):
            delattr(backend_object, property_name)


@pytest.mark.parametrize("data_class", [pd.DataFrame, pd.Series])
def test_deleting_extension_that_is_not_property_raises_attribute_error(
    Backend1, data_class
):
    expected_string_val = "Some string value"
    method_name = "new_method"

    @register_base_accessor(name=method_name, backend=Backend1)
    def my_method_implementation(self):
        return expected_string_val

    modin_object = data_class([0]).set_backend(Backend1)
    assert hasattr(data_class, method_name)
    with pytest.raises(AttributeError):
        delattr(modin_object, method_name)


def test_disallowed_extensions(Backend1, non_extendable_attribute_name):
    with pytest.raises(
        ValueError,
        match=re.escape(
            f"Cannot register an extension with the reserved name {non_extendable_attribute_name}."
        ),
    ):
        register_base_accessor(name=non_extendable_attribute_name, backend=Backend1)(
            "unused_value"
        )


================================================
FILE: modin/tests/pandas/extensions/test_dataframe_extensions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import re
from unittest import mock

import pandas
import pytest

import modin.pandas as pd
from modin.config import AutoSwitchBackend, Backend
from modin.config import context as config_context
from modin.pandas.api.extensions import register_dataframe_accessor

default___init__ = pd.DataFrame._extensions[None]["__init__"]


def test_dataframe_extension_simple_method(Backend1):
    expected_string_val = "Some string value"
    method_name = "new_method"
    df = pd.DataFrame([1, 2, 3]).set_backend(Backend1)

    @register_dataframe_accessor(name=method_name, backend=Backend1)
    def my_method_implementation(self):
        return expected_string_val

    assert hasattr(pd.DataFrame, method_name)
    assert df.new_method() == expected_string_val


def test_dataframe_extension_non_method(Backend1):
    expected_val = 4
    attribute_name = "four"
    register_dataframe_accessor(name=attribute_name, backend=Backend1)(expected_val)
    df = pd.DataFrame([1, 2, 3]).set_backend(Backend1)

    assert df.four == expected_val


def test_dataframe_extension_accessing_existing_methods(Backend1):
    df = pd.DataFrame([1, 2, 3]).set_backend(Backend1)
    method_name = "self_accessor"
    expected_result = df.sum() / df.count()

    @register_dataframe_accessor(name=method_name, backend=Backend1)
    def my_average(self):
        return self.sum() / self.count()

    assert df.self_accessor().equals(expected_result)


def test_dataframe_extension_overrides_existing_method(Backend1):
    df = pd.DataFrame([3, 2, 1])
    assert df.sort_values(0).iloc[0, 0] == 1

    @register_dataframe_accessor(name="sort_values", backend=Backend1)
    def my_sort_values(self):
        return self

    assert df.set_backend(Backend1).sort_values().iloc[0, 0] == 3


@pytest.mark.parametrize(
    "method_name",
    [
        "pow",
        "__pow__",
        "__ipow__",
    ],
)
def test_dataframe_extension_overrides_pow_github_issue_7495(method_name):
    register_dataframe_accessor(method_name, backend="Pandas")(
        lambda *args, **kwargs: 4
    )
    assert getattr(pd.DataFrame([1]).set_backend("Pandas"), method_name)() == 4


def test_override_pow_and__pow__to_different_implementations():
    register_dataframe_accessor("pow", backend="Pandas")(
        lambda *args, **kwargs: "pow_result"
    )
    register_dataframe_accessor("__pow__", backend="Pandas")(
        lambda *args, **kwargs: "__pow___result"
    )
    df = pd.DataFrame([1]).set_backend("pandas")
    assert df.pow() == "pow_result"
    assert df.__pow__() == "__pow___result"


def test_dataframe_extension_method_uses_superclass_method(Backend1):
    df = pd.DataFrame([3, 2, 1])
    assert df.sort_values(0).iloc[0, 0] == 1

    @register_dataframe_accessor(name="sort_values", backend=Backend1)
    def my_sort_values(self, by):
        return super(pd.DataFrame, self).sort_values(by=by, ascending=False)

    assert df.set_backend(Backend1).sort_values(by=0).iloc[0, 0] == 3


class TestOverride__init__:
    def test_override_one_backend_and_pass_no_query_compilers(self):
        default_backend = Backend.get()
        backend_init = mock.Mock(wraps=default___init__)
        register_dataframe_accessor(name="__init__", backend=default_backend)(
            backend_init
        )
        output_df = pd.DataFrame([1], index=["a"], columns=["b"])
        assert output_df.get_backend() == default_backend
        backend_init.assert_has_calls(
            [
                mock.call(output_df, [1], index=["a"], columns=["b"]),
                # There's a second, internal call to the dataframe constructor that
                # uses a different dataframe as `self`.
                mock.call(mock.ANY, query_compiler=output_df._query_compiler),
            ]
        )

    def test_override_one_backend_and_pass_query_compiler_kwarg(self):
        backend = "Pandas"
        backend_init = mock.Mock(wraps=default___init__)
        register_dataframe_accessor(name="__init__", backend=backend)(backend_init)

        with config_context(Backend=backend):
            input_df = pd.DataFrame()

        backend_init.reset_mock()
        output_df = pd.DataFrame(query_compiler=input_df._query_compiler)
        assert output_df.get_backend() == backend
        backend_init.assert_called_once_with(
            output_df, query_compiler=input_df._query_compiler
        )

    @pytest.mark.parametrize("input_backend", ["Python_Test", "Pandas"])
    def test_override_all_backends_and_pass_query_compiler_kwarg(self, input_backend):
        backend_init = mock.Mock(wraps=default___init__)
        register_dataframe_accessor(name="__init__")(backend_init)

        with config_context(Backend=input_backend):
            input_df = pd.DataFrame()

        backend_init.reset_mock()
        output_df = pd.DataFrame(query_compiler=input_df._query_compiler)
        assert output_df.get_backend() == input_backend
        backend_init.assert_called_once_with(
            output_df, query_compiler=input_df._query_compiler
        )


class TestDunders:
    """
    Make sure to test that we override special "dunder" methods like __len__
    correctly. python calls these methods with DataFrame.__len__(obj)
    rather than getattr(obj, "__len__")().
    source: https://docs.python.org/3/reference/datamodel.html#special-lookup
    """

    def test_len(self, Backend1):
        @register_dataframe_accessor(name="__len__", backend=Backend1)
        def always_get_1(self):
            return 1

        df = pd.DataFrame([1, 2, 3])
        assert len(df) == 3
        backend_df = df.set_backend(Backend1)
        assert len(backend_df) == 1
        assert backend_df.__len__() == 1

    def test_repr(self, Backend1):
        @register_dataframe_accessor(name="__repr__", backend=Backend1)
        def simple_repr(self) -> str:
            return "dataframe_string"

        df = pd.DataFrame([1, 2, 3])
        assert repr(df) == repr(df.modin.to_pandas())
        backend_df = df.set_backend(Backend1)
        assert repr(backend_df) == "dataframe_string"
        assert backend_df.__repr__() == "dataframe_string"


class TestProperty:
    def test_override_columns(self, Backend1):
        df = pd.DataFrame([["a", "b"]])

        def set_columns(self, new_columns):
            self._query_compiler.columns = [f"{v}_custom" for v in new_columns]

        register_dataframe_accessor(name="columns", backend=Backend1)(
            property(
                fget=(lambda self: self._query_compiler.columns[::-1]), fset=set_columns
            )
        )

        assert list(df.columns) == [0, 1]
        backend_df = df.set_backend(Backend1)
        assert list(backend_df.columns) == [1, 0]
        backend_df.columns = [2, 3]
        assert list(backend_df.columns) == [
            "3_custom",
            "2_custom",
        ]

    def test_search_for_missing_attribute_in_overridden_columns(self, Backend1):
        """
        Test a scenario where we override the columns getter, then search for a
        missing dataframe attribute. Modin should look in the dataframe's
        overridden columns for the attribute.
        """
        column_name = "column_name"
        column_getter = mock.Mock(wraps=(lambda self: self._query_compiler.columns))
        register_dataframe_accessor(name="columns", backend=Backend1)(
            property(fget=column_getter)
        )

        df = pd.DataFrame({column_name: ["a"]}).set_backend(Backend1)

        with pytest.raises(
            AttributeError,
            match="'DataFrame' object has no attribute 'non_existent_column'",
        ):
            getattr(df, "non_existent_column")
        column_getter.assert_called_once_with(df)

    def test_add_deletable_property(self, Backend1):
        public_property_name = "property_name"
        private_property_name = "_property_name"

        # register a public property `public_property_name` that is backed by
        # a private attribute `private_property_name`.

        def get_property(self):
            return getattr(self, private_property_name)

        def set_property(self, value):
            setattr(self, private_property_name, value)

        def del_property(self):
            delattr(self, private_property_name)

        register_dataframe_accessor(name=public_property_name, backend=Backend1)(
            property(get_property, set_property, del_property)
        )

        df = pd.DataFrame([0])
        assert not hasattr(df, public_property_name)
        backend_df = df.set_backend(Backend1)
        setattr(backend_df, public_property_name, "value")
        assert hasattr(backend_df, private_property_name)
        assert getattr(backend_df, private_property_name) == "value"
        delattr(backend_df, public_property_name)
        # check that the deletion works.
        assert not hasattr(backend_df, private_property_name)

    def test_non_settable_extension_property(self, Backend1):
        df = pd.DataFrame([0])
        property_name = "property_name"

        register_dataframe_accessor(name=property_name, backend=Backend1)(
            property(fget=(lambda self: 4))
        )

        assert not hasattr(df, property_name)
        backend_df = df.set_backend(Backend1)
        assert getattr(backend_df, property_name) == 4
        with pytest.raises(AttributeError):
            setattr(backend_df, property_name, "value")

    def test_delete_non_deletable_extension_property(self, Backend1):
        property_name = "property_name"

        register_dataframe_accessor(name=property_name, backend=Backend1)(
            property(fget=(lambda self: "value"))
        )

        df = pd.DataFrame([0])
        assert not hasattr(df, property_name)
        backend_df = df.set_backend(Backend1)
        assert hasattr(backend_df, property_name)
        with pytest.raises(AttributeError):
            delattr(backend_df, property_name)


def test_deleting_extension_that_is_not_property_raises_attribute_error(Backend1):
    expected_string_val = "Some string value"
    method_name = "new_method"

    @register_dataframe_accessor(name=method_name, backend=Backend1)
    def my_method_implementation(self):
        return expected_string_val

    df = pd.DataFrame([1, 2, 3]).set_backend(Backend1)
    assert hasattr(pd.DataFrame, method_name)
    assert df.new_method() == expected_string_val
    with pytest.raises(AttributeError):
        delattr(df, method_name)


def test_disallowed_extensions(Backend1, non_extendable_attribute_name):
    with pytest.raises(
        ValueError,
        match=re.escape(
            f"Cannot register an extension with the reserved name {non_extendable_attribute_name}."
        ),
    ):
        register_dataframe_accessor(
            name=non_extendable_attribute_name, backend=Backend1
        )("unused_value")


def test_correct_backend_with_pin(Backend1):
    # Ensures that the correct implementation is used when dispatching an operation on a pinned
    # frame, as an earlier implementation used the wrong extension method while preserving the
    # correct backend.

    assert not AutoSwitchBackend.get()

    @register_dataframe_accessor(name="__repr__", backend=Backend1)
    def my_repr(self):
        return "fake_repr"

    with config_context(Backend="Python_Test"):
        df = pd.DataFrame([1])
        assert df.get_backend() == "Python_Test"
        assert repr(df) == repr(pandas.DataFrame([1]))
        df.set_backend(Backend1, inplace=True)
        df.pin_backend(inplace=True)
        assert df.get_backend() == Backend1
        assert repr(df) == "fake_repr"


def test_get_extension_from_dataframe_that_is_on_non_default_backend_when_auto_switch_is_false(
    Backend1,
):
    assert not AutoSwitchBackend.get()
    with config_context(Backend=Backend1):
        pandas_df = pd.DataFrame([1, 2]).move_to("Pandas")
        register_dataframe_accessor("sum", backend="Pandas")(
            lambda df: "small_sum_result"
        )
        assert pandas_df.sum() == "small_sum_result"


================================================
FILE: modin/tests/pandas/extensions/test_groupby_extensions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from functools import cached_property

import pytest

import modin.pandas as pd
from modin.config import AutoSwitchBackend, Backend
from modin.config import context as config_context
from modin.pandas.api.extensions import (
    register_dataframe_groupby_accessor,
    register_series_groupby_accessor,
)
from modin.pandas.groupby import DataFrameGroupBy, SeriesGroupBy
from modin.tests.pandas.utils import default_to_pandas_ignore_string, df_equals
from modin.tests.test_utils import (
    current_execution_is_native,
    warns_that_defaulting_to_pandas_if,
)


@pytest.mark.parametrize(
    "get_groupby,register_accessor",
    (
        (lambda df: df.groupby("col0"), register_dataframe_groupby_accessor),
        (lambda df: df.groupby("col0")["col1"], register_series_groupby_accessor),
    ),
)
@config_context(Backend="Pandas")
@pytest.mark.parametrize("extension_backend", [None, "Pandas"])
@pytest.mark.parametrize("method_name", ["new_method", "sum"])
def test_add_simple_method(
    get_groupby, register_accessor, extension_backend, method_name
):
    expected_string_val = "expected_string_val"
    df = pd.DataFrame({"col0": [1, 2, 3], "col1": [4, 5, 6]})

    @register_accessor(method_name, backend=extension_backend)
    def new_method(self):
        return expected_string_val

    groupby = get_groupby(df)
    assert hasattr(groupby, method_name)
    assert getattr(groupby, method_name)() == expected_string_val


def test_dataframe_accessor_for_method_that_series_groupby_does_not_override():
    """
    Test sum(), a DataFrameGroupBy method that SeriesGroupBy inherits without overriding.

    Registering an extension method for DataFrameGroupBy should override sum()
    behavior for both DataFrameGroupBy and SeriesGroupBy.
    """
    # Check that SeriesGroupBy inherits sum() from DataFrameGroupBy, with the only
    # difference being that SeriesGroupBy's sum() is wrapped in a method for handling
    # extensions and casting.
    assert DataFrameGroupBy.sum is SeriesGroupBy.sum._wrapped_method_for_casting
    df = pd.DataFrame({"col0": [1, 2, 3], "col1": [4, 5, 6]})
    accessor_result = "test_result"
    register_dataframe_groupby_accessor("sum", backend=Backend.get())(
        lambda self, *args, **kwargs: accessor_result
    )
    groupby_sum_result = df.groupby("col0").sum()
    assert groupby_sum_result == accessor_result
    series_groupby_sum_result = df.groupby("col0")["col1"].sum()
    assert series_groupby_sum_result == accessor_result


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_dataframe_accessor_for_method_that_series_groupby_overrides():
    """
    Test describe(), a DataFrameGroupBy method that SeriesGroupBy overrides.

    Registering an extension method for DataFrameGroupBy should not affect
    SeriesGroupBy's describe() method.
    """
    # Check that SeriesGroupBy overrides describe().
    assert (
        DataFrameGroupBy.describe
        is not SeriesGroupBy.describe._wrapped_method_for_casting
    )
    df = pd.DataFrame({"col0": [1, 2, 3], "col1": [4, 5, 6]})
    original_series_groupby_describe_result = df.groupby("col0")["col1"].describe()
    accessor_result = "test_result"
    register_dataframe_groupby_accessor("describe", backend=Backend.get())(
        lambda self, *args, **kwargs: accessor_result
    )
    groupby_describe_result = df.groupby("col0").describe()
    assert groupby_describe_result == accessor_result
    series_groupby_describe_result = df.groupby("col0")["col1"].describe()
    df_equals(series_groupby_describe_result, original_series_groupby_describe_result)


@pytest.mark.parametrize(
    "get_groupby,register_accessor",
    (
        (lambda df: df.groupby("col0"), register_dataframe_groupby_accessor),
        (lambda df: df.groupby("col0")["col1"], register_series_groupby_accessor),
    ),
)
class TestProperty:

    @pytest.mark.parametrize("df_backend", ["Pandas", "Python_Test"])
    def test_add_read_only_property_for_all_backends(
        self, df_backend, get_groupby, register_accessor
    ):
        expected_string_val = "expected_string_val"
        property_name = "new_property"

        @register_dataframe_groupby_accessor(property_name)
        @property
        def new_property(self):
            return expected_string_val

        with config_context(Backend=df_backend):
            df = pd.DataFrame({"col0": [1, 2, 3], "col1": [4, 5, 6]})
            assert get_groupby(df).new_property == expected_string_val

            with pytest.raises(AttributeError):
                del df.groupby("col0").new_property

            with pytest.raises(AttributeError):
                df.groupby("col0").new_property = "new_value"

    def test_override_ngroups_getter_for_one_backend(
        self, get_groupby, register_accessor
    ):
        accessor_ngroups = -1
        property_name = "ngroups"

        @register_accessor(property_name, backend="Pandas")
        @property
        def ngroups(self):
            return accessor_ngroups

        pandas_df = pd.DataFrame({"col0": [1, 2, 3], "col1": [4, 5, 6]}).move_to(
            "pandas"
        )
        groupby = get_groupby(pandas_df)
        assert groupby.ngroups == accessor_ngroups

        # Check that the accessor doesn't work on the Python_Test backend.
        python_test_df = pandas_df.move_to("Python_Test")
        groupby = get_groupby(python_test_df)
        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            assert groupby.ngroups == 3

    def test_add_ngroups_setter_and_deleter_for_one_backend(
        self, get_groupby, register_accessor
    ):

        def _get_ngroups(self):
            return self._ngroups

        def _delete_ngroups(self):
            delattr(self, "_ngroups")

        def _set_ngroups(self, value):
            self._ngroups = value

        register_accessor("ngroups", backend="Pandas")(
            property(fget=_get_ngroups, fset=_set_ngroups, fdel=_delete_ngroups)
        )

        python_test_df = pd.DataFrame({"col0": [1, 2, 3], "col1": [4, 5, 6]}).move_to(
            "python_test"
        )

        python_test_groupby = get_groupby(python_test_df)

        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            assert python_test_groupby.ngroups == 3

        with pytest.raises(AttributeError):
            python_test_groupby.ngroups = 4

        with pytest.raises(AttributeError):
            del python_test_groupby.ngroups

        pandas_groupby = get_groupby(python_test_df.move_to("Pandas"))

        assert not hasattr(pandas_groupby, "ngroups")

        pandas_groupby.ngroups = -1

        assert pandas_groupby.ngroups == -1

        # Deleting ngroups should delete the private attribute _ngroups.
        del pandas_groupby.ngroups

        # now getting ngroups should raise an AttributeError because the
        # private attribute _ngroups is missing.
        assert not hasattr(pandas_groupby, "ngroups")

    def test_add_deletable_property_for_one_backend(
        self, get_groupby, register_accessor
    ):
        public_property_name = "property_name"
        private_property_name = "_property_name"

        # register a public property `public_property_name` that is backed by
        # a private attribute `private_property_name`.

        def get_property(self):
            return getattr(self, private_property_name)

        def set_property(self, value):
            setattr(self, private_property_name, value)

        def del_property(self):
            # Note that deleting the public property deletes the private
            # attribute, not the public property itself.
            delattr(self, private_property_name)

        register_accessor(name=public_property_name, backend="Pandas")(
            property(get_property, set_property, del_property)
        )

        python_test_df = pd.DataFrame({"col0": [1, 2, 3], "col1": [4, 5, 6]}).move_to(
            "python_test"
        )

        python_test_groupby = get_groupby(python_test_df)

        assert not hasattr(python_test_groupby, public_property_name)

        pandas_df = python_test_df.move_to("pandas")
        pandas_groupby = get_groupby(pandas_df)

        setattr(pandas_groupby, public_property_name, "value")
        assert getattr(pandas_groupby, public_property_name) == "value"
        delattr(pandas_groupby, public_property_name)
        assert not hasattr(pandas_groupby, private_property_name)

    @pytest.mark.filterwarnings(default_to_pandas_ignore_string)
    def test_override_cached_property(self, get_groupby, register_accessor):
        @cached_property
        def groups(self):
            return {"group": pd.Index(["test"])}

        register_accessor("groups", backend="Pandas")(groups)
        pandas_df = pd.DataFrame({"col0": [1], "col1": [2]}).move_to("pandas")
        assert get_groupby(pandas_df).groups == {"group": pd.Index(["test"])}


def test_deleting_extension_that_is_not_property_raises_attribute_error():
    expected_string_val = "Some string value"
    method_name = "new_method"

    @register_dataframe_groupby_accessor(name=method_name)
    def my_method_implementation(self):
        return expected_string_val

    groupby = pd.DataFrame({"col0": [1, 2, 3], "col1": [4, 5, 6]}).groupby("col0")
    assert hasattr(DataFrameGroupBy, method_name)
    assert getattr(groupby, method_name)() == expected_string_val
    with pytest.raises(AttributeError):
        delattr(groupby, method_name)


@pytest.mark.skipif(Backend.get() == "Pandas", reason="already on pandas backend")
def test_get_extension_from_dataframe_that_is_on_non_default_backend_when_auto_switch_is_false():
    assert not AutoSwitchBackend.get()
    pandas_df = pd.DataFrame([1, 2]).move_to("Pandas")
    register_dataframe_groupby_accessor("sum", backend="Pandas")(
        lambda df: "small_sum_result"
    )
    assert pandas_df.groupby(0).sum() == "small_sum_result"


================================================
FILE: modin/tests/pandas/extensions/test_pd_extensions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import re
from types import FunctionType

import pandas
import pytest

import modin.pandas as pd
from modin.config import context as config_context
from modin.pandas.api.extensions import register_pd_accessor
from modin.tests.pandas.utils import df_equals, eval_general


@pytest.fixture(
    params=sorted(
        key
        for key, value in pd.__dict__.items()
        if isinstance(value, FunctionType) and value.__module__ == pd.general.__name__
    )
)
def pd_general_function(request):
    return request.param


@pytest.fixture(
    params=sorted(
        key
        for key, value in pd.__dict__.items()
        if isinstance(value, FunctionType) and value.__module__ == pd.io.__name__
    )
)
def pd_io_function(request):
    return request.param


class TestRegisterForAllBackends:
    def test_add_new_function(self):
        expected_string_val = "Some string value"
        method_name = "new_method"

        @register_pd_accessor(method_name)
        def my_method_implementation():
            return expected_string_val

        assert pd.new_method() == expected_string_val

    def test_add_new_non_method(self):
        expected_val = 4
        attribute_name = "four"
        register_pd_accessor(attribute_name)(expected_val)
        assert pd.four == expected_val

    def test_override_io_function(self, pd_io_function):
        sentinel = object()
        register_pd_accessor(pd_io_function)(lambda: sentinel)
        assert getattr(pd, pd_io_function)() == sentinel

    def test_override_general_function(self, pd_general_function):
        sentinel = object()
        register_pd_accessor(pd_general_function)(lambda: sentinel)
        assert getattr(pd, pd_general_function)() == sentinel


class TestRegisterForOneBackend:
    def test_add_new_function(self):
        backend = "Pandas"
        expected_string_val = "Some string value"
        method_name = "new_method"

        @register_pd_accessor(method_name, backend=backend)
        def my_method_implementation():
            return expected_string_val

        with config_context(Backend=backend):
            assert getattr(pd, method_name)() == expected_string_val
        with config_context(Backend="Python_Test"):
            with pytest.raises(
                AttributeError,
                match=re.escape(
                    f"module 'modin.pandas' has no attribute {method_name}"
                ),
            ):
                getattr(pd, method_name)()

    def test_override_function(self):
        backend = "Pandas"
        expected_string_val = "Some string value"

        @register_pd_accessor("to_datetime", backend=backend)
        def my_method_implementation(*args, **kwargs):
            return expected_string_val

        with config_context(Backend=backend):
            # Since there are no query compiler inputs to to_datetime(), use
            # the to_datetime() implementation for Backend.get()
            assert pd.to_datetime(1) == expected_string_val

        with config_context(Backend="Python_Test"):
            # There are no query compiler inputs to to_datetime(), and
            # the current Backend.get() does not have a to_datetime() extension,
            # so fall back to the default to_datetime() implementation, which
            # should return the same result as pandas.to_datetime().
            eval_general(pd, pandas, lambda lib: lib.to_datetime(1))

    def test_add_new_non_method(self):
        backend = "Pandas"
        expected_val = 4
        attribute_name = "four"
        register_pd_accessor(attribute_name, backend=backend)(expected_val)
        with config_context(Backend=backend):
            assert pd.four == expected_val
        with config_context(Backend="Python_Test"):
            assert not hasattr(pd, attribute_name)

    def test_to_datetime_dispatches_to_implementation_for_input(self):

        @register_pd_accessor("to_datetime", backend="Pandas")
        def pandas_to_datetime(*args, **kwargs):
            return "pandas_to_datetime_result"

        with config_context(Backend="Pandas"):
            pandas_backend_series = pd.Series(1)

        with config_context(Backend="Python_Test"):
            python_backend_df = pd.Series(1)

        assert pd.to_datetime(pandas_backend_series) == "pandas_to_datetime_result"
        df_equals(
            pd.to_datetime(python_backend_df),
            pandas.to_datetime(python_backend_df._to_pandas()),
        )

    def test_concat_with_two_different_backends(self):
        with config_context(Backend="Pandas"):
            modin_on_pandas_df = pd.DataFrame({"a": [1, 2, 3]})
        with config_context(Backend="Python_Test"):
            modin_on_python_df = pd.DataFrame({"a": [4, 5, 6]})

        @register_pd_accessor("concat", backend="Pandas")
        def pandas_concat(*args, **kwargs):
            return "pandas_concat_result"

        @register_pd_accessor("concat", backend="Python_Test")
        def python_concat(*args, **kwargs):
            return "python_concat_result"

        # If the backends are different, we dispatch to the concat() override
        # for the backend of the first argument.
        assert (
            pd.concat([modin_on_pandas_df, modin_on_python_df])
            == "pandas_concat_result"
        )

        # With inplace casting we need to reset the original dataframes
        modin_on_pandas_df.move_to("Pandas", inplace=True)
        modin_on_python_df.move_to("Python_Test", inplace=True)

        assert (
            pd.concat([modin_on_python_df, modin_on_pandas_df])
            == "python_concat_result"
        )

    def test_index_class_override(self):
        class FakeIndex:
            def __init__(self, _values):
                pass

            def fake_method(self) -> str:
                return "python_fake_index"

        register_pd_accessor("Index", backend="Python_Test")(FakeIndex)

        with config_context(Backend="Pandas"):
            # Should return an actual native pandas index object
            df_equals(pd.Index([1]).to_series(), pd.Series([1], index=[1]))

        with config_context(Backend="Python_Test"):
            # Should just return a string
            assert pd.Index([1]).fake_method() == "python_fake_index"


================================================
FILE: modin/tests/pandas/extensions/test_series_extensions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import re
from unittest import mock

import pytest

import modin.pandas as pd
from modin.config import Backend
from modin.config import context as config_context
from modin.pandas.api.extensions import register_series_accessor

default___init__ = pd.Series._extensions[None]["__init__"]


def test_series_extension_simple_method(Backend1):
    expected_string_val = "Some string value"
    method_name = "new_method"
    ser = pd.Series([1, 2, 3]).set_backend(Backend1)

    @register_series_accessor(name=method_name, backend=Backend1)
    def my_method_implementation(self):
        return expected_string_val

    assert hasattr(pd.Series, method_name)
    assert ser.new_method() == expected_string_val


def test_series_extension_non_method(Backend1):
    expected_val = 4
    attribute_name = "four"
    register_series_accessor(name=attribute_name, backend=Backend1)(expected_val)
    ser = pd.Series([1, 2, 3]).set_backend(Backend1)

    assert ser.four == expected_val


def test_series_extension_accessing_existing_methods(Backend1):
    ser = pd.Series([1, 2, 3]).set_backend(Backend1)
    method_name = "self_accessor"
    expected_result = ser.sum() / ser.count()

    @register_series_accessor(name=method_name, backend=Backend1)
    def my_average(self):
        return self.sum() / self.count()

    assert ser.self_accessor() == expected_result


def test_series_extension_overrides_existing_method(Backend1):
    series = pd.Series([3, 2, 1])
    assert series.sort_values().iloc[0] == 1

    @register_series_accessor(name="sort_values", backend=Backend1)
    def my_sort_values(self):
        return self

    assert series.set_backend(Backend1).sort_values().iloc[0] == 3


def test_series_extension_method_uses_superclass_method(Backend1):
    series = pd.Series([3, 2, 1], name="name")
    assert series.sort_values().iloc[0] == 1

    @register_series_accessor(name="sort_values", backend=Backend1)
    def my_sort_values(self):
        return super(pd.Series, self).sort_values(by="name", ascending=False)

    assert series.set_backend(Backend1).sort_values().iloc[0] == 3


class TestOverride__init__:
    def test_override_one_backend_and_pass_no_query_compilers(self):
        default_backend = Backend.get()
        backend_init = mock.Mock(wraps=default___init__)
        register_series_accessor(name="__init__", backend=default_backend)(backend_init)
        output_series = pd.Series([1], index=["a"])
        assert output_series.get_backend() == default_backend
        backend_init.assert_has_calls(
            [
                mock.call(output_series, [1], index=["a"]),
            ]
        )

    def test_override_one_backend_and_pass_query_compiler_kwarg(self):
        backend_init = mock.Mock(wraps=default___init__)
        register_series_accessor(name="__init__", backend="Pandas")(backend_init)

        with config_context(Backend="Pandas"):
            input_series = pd.Series()

        backend_init.reset_mock()
        output_series = pd.Series(query_compiler=input_series._query_compiler)
        assert output_series.get_backend() == "Pandas"
        backend_init.assert_called_once_with(
            output_series, query_compiler=input_series._query_compiler
        )

    @pytest.mark.parametrize("input_backend", ["Python_Test", "Pandas"])
    def test_override_all_backends_and_pass_query_compiler_kwarg(self, input_backend):
        backend_init = mock.Mock(wraps=default___init__)
        register_series_accessor(name="__init__")(backend_init)

        with config_context(Backend=input_backend):
            input_series = pd.Series()

        backend_init.reset_mock()
        output_series = pd.Series(query_compiler=input_series._query_compiler)
        assert output_series.get_backend() == input_backend
        backend_init.assert_called_once_with(
            output_series, query_compiler=input_series._query_compiler
        )


class TestDunders:
    """
    Make sure to test that we override special "dunder" methods like __len__
    correctly. python calls these methods with DataFrame.__len__(obj)
    rather than getattr(obj, "__len__")().
    source: https://docs.python.org/3/reference/datamodel.html#special-lookup
    """

    def test_len(self, Backend1):
        @register_series_accessor(name="__len__", backend=Backend1)
        def always_get_1(self):
            return 1

        series = pd.Series([1, 2, 3])
        assert len(series) == 3
        backend_series = series.set_backend(Backend1)
        assert len(backend_series) == 1
        assert backend_series.__len__() == 1

    def test_repr(self, Backend1):
        @register_series_accessor(name="__repr__", backend=Backend1)
        def simple_repr(self) -> str:
            return "series_string"

        series = pd.Series([1, 2, 3])
        assert repr(series) == repr(series.modin.to_pandas())
        backend_series = series.set_backend(Backend1)
        assert repr(backend_series) == "series_string"
        assert backend_series.__repr__() == "series_string"


class TestProperty:
    def test_override_index(self, Backend1):
        series = pd.Series(["a", "b"])

        def set_index(self, new_index):
            self._query_compiler.index = [f"{v}_custom" for v in new_index]

        register_series_accessor(name="index", backend=Backend1)(
            property(fget=lambda self: self._query_compiler.index[::-1], fset=set_index)
        )

        assert list(series.index) == [0, 1]
        backend_series = series.set_backend(Backend1)
        assert list(backend_series.index) == [1, 0]
        backend_series.index = [2, 3]
        assert list(backend_series.index) == ["3_custom", "2_custom"]

    def test_add_deletable_property(self, Backend1):

        # register a public property `public_property_name` that is backed by
        # a private attribute `private_property_name`.

        public_property_name = "property_name"
        private_property_name = "_property_name"

        def get_property(self):
            return getattr(self, private_property_name)

        def set_property(self, value):
            setattr(self, private_property_name, value)

        def del_property(self):
            delattr(self, private_property_name)

        register_series_accessor(name=public_property_name, backend=Backend1)(
            property(get_property, set_property, del_property)
        )

        series = pd.Series([0])
        assert not hasattr(series, public_property_name)
        backend_series = series.set_backend(Backend1)
        setattr(backend_series, public_property_name, "value")
        assert hasattr(backend_series, private_property_name)
        assert getattr(backend_series, public_property_name) == "value"
        delattr(backend_series, public_property_name)
        # check that the deletion works.
        assert not hasattr(backend_series, private_property_name)

    def test_non_settable_extension_property(self, Backend1):

        property_name = "property_name"
        register_series_accessor(name=property_name, backend=Backend1)(
            property(fget=(lambda self: 4))
        )

        series = pd.Series([0])
        assert not hasattr(series, property_name)
        backend_series = series.set_backend(Backend1)
        assert getattr(backend_series, property_name) == 4
        with pytest.raises(AttributeError):
            setattr(backend_series, property_name, "value")

    def test_delete_non_deletable_extension_property(self, Backend1):

        property_name = "property_name"
        register_series_accessor(name=property_name, backend=Backend1)(
            property(fget=(lambda self: "value"))
        )

        series = pd.Series([0])
        assert not hasattr(series, property_name)
        backend_series = series.set_backend(Backend1)
        with pytest.raises(AttributeError):
            delattr(backend_series, property_name)


def test_deleting_extension_that_is_not_property_raises_attribute_error(Backend1):
    expected_string_val = "Some string value"
    method_name = "new_method"
    series = pd.Series([1, 2, 3]).set_backend(Backend1)

    @register_series_accessor(name=method_name, backend=Backend1)
    def my_method_implementation(self):
        return expected_string_val

    assert hasattr(pd.Series, method_name)
    assert series.new_method() == expected_string_val
    with pytest.raises(AttributeError):
        delattr(series, method_name)


def test_disallowed_extensions(Backend1, non_extendable_attribute_name):
    with pytest.raises(
        ValueError,
        match=re.escape(
            f"Cannot register an extension with the reserved name {non_extendable_attribute_name}."
        ),
    ):
        register_series_accessor(name=non_extendable_attribute_name, backend=Backend1)(
            "unused_value"
        )


def test_wrapped_extension(Backend1):
    """
    Tests using the extensions system to overwrite a method with a wrapped version of the original method
    obtained via getattr.
    Because the QueryCompilerCaster ABC automatically wraps all methods with a dispatch to the appropriate
    backend, we must use the __wrapped__ property of the originally-defined attribute to avoid
    infinite recursion.
    """
    original_item = pd.Series.item.__wrapped__

    @register_series_accessor(name="item", backend=Backend1)
    def item_implementation(self):
        return (original_item(self) + 2) * 5

    series = pd.Series([3])
    assert series.item() == 3
    assert series.set_backend(Backend1).item() == 25


================================================
FILE: modin/tests/pandas/integrations/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/pandas/integrations/test_lazy_import.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import lazy_import

pandas = lazy_import.lazy_module("pandas")
pyarrow = lazy_import.lazy_module("pyarrow")
from modin import pandas as pd  # noqa: E402


def test_dataframe_constructor():
    pd.DataFrame({"col1": [1, 2, 3], "col2": list("abc")})


================================================
FILE: modin/tests/pandas/internals/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: modin/tests/pandas/internals/test_benchmark_mode.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import unittest.mock as mock

import pytest

import modin.pandas as pd
from modin.config import Engine

engine = Engine.get()

# We have to explicitly mock subclass implementations of wait_partitions.
if engine == "Ray":
    wait_method = (
        "modin.core.execution.ray.implementations."
        + "pandas_on_ray.partitioning."
        + "PandasOnRayDataframePartitionManager.wait_partitions"
    )
elif engine == "Dask":
    wait_method = (
        "modin.core.execution.dask.implementations."
        + "pandas_on_dask.partitioning."
        + "PandasOnDaskDataframePartitionManager.wait_partitions"
    )
elif engine == "Unidist":
    wait_method = (
        "modin.core.execution.unidist.implementations."
        + "pandas_on_unidist.partitioning."
        + "PandasOnUnidistDataframePartitionManager.wait_partitions"
    )
else:
    wait_method = (
        "modin.core.dataframe.pandas.partitioning."
        + "partition_manager.PandasDataframePartitionManager.wait_partitions"
    )


@pytest.mark.parametrize("set_benchmark_mode", [False], indirect=True)
def test_turn_off(set_benchmark_mode):
    df = pd.DataFrame([0])
    with mock.patch(wait_method) as wait:
        df.dropna()
    wait.assert_not_called()


@pytest.mark.parametrize("set_benchmark_mode", [True], indirect=True)
def test_turn_on(set_benchmark_mode):
    df = pd.DataFrame([0])
    with mock.patch(wait_method) as wait:
        df.dropna()
    wait.assert_called()


================================================
FILE: modin/tests/pandas/native_df_interoperability/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
This module contains tests for interoperability between Modin dataframes using "native" execution and Modin dataframes using other execution modes.
"""


================================================
FILE: modin/tests/pandas/native_df_interoperability/conftest.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import itertools

import pytest


def _get_native_bool_descriptor(v: bool) -> str:
    return "native" if v else "default"


@pytest.fixture(
    params=list(itertools.product([True, False], repeat=2)),
    ids=lambda param: "_".join(_get_native_bool_descriptor(v) for v in param),
)
def df_mode_pair(request):
    return request.param


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_binary.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import matplotlib
import pytest

from modin.config import NPartitions
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
    eval_general_interop,
)
from modin.tests.pandas.utils import (
    default_to_pandas_ignore_string,
    df_equals,
    test_data,
    test_data_keys,
    test_data_values,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


@pytest.mark.parametrize(
    "other",
    [
        lambda df, axis: 4,
        lambda df, axis: df.iloc[0] if axis == "columns" else list(df[df.columns[0]]),
        lambda df, axis: {
            label: idx + 1
            for idx, label in enumerate(df.axes[0 if axis == "rows" else 1])
        },
        lambda df, axis: {
            label if idx % 2 else f"random_key{idx}": idx + 1
            for idx, label in enumerate(df.axes[0 if axis == "rows" else 1][::-1])
        },
    ],
    ids=[
        "scalar",
        "series_or_list",
        "dictionary_keys_equal_columns",
        "dictionary_keys_unequal_columns",
    ],
)
@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize(
    "op",
    [
        *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
        *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
    ],
)
@pytest.mark.parametrize("backend", [None, "pyarrow"])
def test_math_functions(other, axis, op, backend, df_mode_pair):
    data = test_data["float_nan_data"]
    if (op == "floordiv" or op == "rfloordiv") and axis == "rows":
        # lambda == "series_or_list"
        pytest.xfail(reason="different behavior")

    if op == "rmod" and axis == "rows":
        # lambda == "series_or_list"
        pytest.xfail(reason="different behavior")

    if op in ("mod", "rmod") and backend == "pyarrow":
        pytest.skip(reason="These functions are not implemented in pandas itself")

    eval_general_interop(
        data,
        backend,
        lambda df1, df2: getattr(df1, op)(other(df2, axis), axis=axis),
        df_mode_pair,
    )


@pytest.mark.parametrize("other", [lambda df: 2, lambda df: df])
def test___divmod__(other, df_mode_pair):
    data = test_data["float_nan_data"]
    eval_general_interop(
        data, None, lambda df1, df2: divmod(df1, other(df2)), df_mode_pair
    )


@pytest.mark.parametrize("other", ["as_left", 4])
@pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_comparison(data, op, other, request, df_mode_pair):
    def operation(df1, df2):
        return getattr(df1, op)(df2 if other == "as_left" else other)

    expected_exception = None
    if "int_data" in request.node.callspec.id and other == "a":
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7019")
    elif "float_nan_data" in request.node.callspec.id and other == "a":
        expected_exception = TypeError(
            "Invalid comparison between dtype=float64 and str"
        )
    eval_general_interop(
        data,
        None,
        operation,
        df_mode_pair,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize(
    "frame1_data,frame2_data,expected_pandas_equals",
    [
        pytest.param({}, {}, True, id="two_empty_dataframes"),
        pytest.param([[1]], [[0]], False, id="single_unequal_values"),
        pytest.param([[None]], [[None]], True, id="single_none_values"),
        pytest.param(
            [[1, 2], [3, 4]],
            [[1, 2], [3, 4]],
            True,
            id="equal_two_by_two_dataframes",
        ),
        pytest.param(
            [[1, 2], [3, 4]],
            [[5, 2], [3, 4]],
            False,
            id="unequal_two_by_two_dataframes",
        ),
    ],
)
def test_equals(frame1_data, frame2_data, expected_pandas_equals, df_mode_pair):
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
        frame1_data, native=df_mode_pair[0]
    )
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        frame2_data, native=df_mode_pair[1]
    )

    pandas_equals = pandas_df1.equals(pandas_df2)
    assert pandas_equals == expected_pandas_equals, (
        "Test expected pandas to say the dataframes were"
        + f"{'' if expected_pandas_equals else ' not'} equal, but they were"
        + f"{' not' if expected_pandas_equals else ''} equal."
    )

    assert modin_df1.equals(modin_df2) == pandas_equals
    assert modin_df1.equals(pandas_df2) == pandas_equals


@pytest.mark.parametrize("empty_operand", ["right", "left", "both"])
def test_empty_df(empty_operand, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(
        [0, 1, 2, 0, 1, 2], native=df_mode_pair[0]
    )
    modin_df_empty, pandas_df_empty = create_test_df_in_defined_mode(
        native=df_mode_pair[1]
    )

    if empty_operand == "right":
        modin_res = modin_df + modin_df_empty
        pandas_res = pandas_df + pandas_df_empty
    elif empty_operand == "left":
        modin_res = modin_df_empty + modin_df
        pandas_res = pandas_df_empty + pandas_df
    else:
        modin_res = modin_df_empty + modin_df_empty
        pandas_res = pandas_df_empty + pandas_df_empty

    df_equals(modin_res, pandas_res)


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_compiler_caster.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib
import json
import logging
from io import StringIO
from types import MappingProxyType
from typing import Iterator, Optional
from unittest import mock

import pandas
import pytest
from pytest import param

import modin.pandas as pd
from modin.config import context as config_context
from modin.config.envvars import (
    Backend,
    Engine,
    Execution,
    NativePandasMaxRows,
    NativePandasTransferThreshold,
)
from modin.core.execution.dispatching.factories import factories
from modin.core.execution.dispatching.factories.factories import BaseFactory
from modin.core.io.io import BaseIO
from modin.core.storage_formats.base.query_compiler import QCCoercionCost
from modin.core.storage_formats.base.query_compiler_calculator import (
    BackendCostCalculator,
)
from modin.core.storage_formats.pandas.native_query_compiler import NativeQueryCompiler
from modin.core.storage_formats.pandas.query_compiler_caster import (
    _GENERAL_EXTENSIONS,
    register_function_for_post_op_switch,
    register_function_for_pre_op_switch,
)
from modin.logging import DEFAULT_LOGGER_NAME
from modin.logging.metrics import add_metric_handler, clear_metric_handler
from modin.pandas.api.extensions import register_pd_accessor
from modin.tests.pandas.utils import (
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
)

# Some modin methods warn about defaulting to pandas at the API layer. That's
# expected and not an error as it would be normally.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)

BIG_DATA_CLOUD_MIN_NUM_ROWS = 10
SMALL_DATA_NUM_ROWS = 5


class CalculatorTestQc(NativeQueryCompiler):
    """
    A subclass of NativeQueryCompiler with simpler cost functions.

    We MAY eventually want to stop overriding the superclass's cost functions.
    """

    @classmethod
    def move_to_me_cost(cls, other_qc, api_cls_name, operation, arguments):
        if isinstance(other_qc, cls):
            return QCCoercionCost.COST_ZERO
        return None

    def stay_cost(self, api_cls_name, operation, arguments):
        return QCCoercionCost.COST_ZERO

    def move_to_cost(self, other_qc_type, api_cls_name, operation, arguments):
        if isinstance(self, other_qc_type):
            return QCCoercionCost.COST_ZERO
        return None


class CloudQC(CalculatorTestQc):
    "Represents a cloud-hosted query compiler"

    def get_backend(self):
        return "Cloud"

    @classmethod
    def max_cost(cls):
        return QCCoercionCost.COST_IMPOSSIBLE

    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):
        assert op is not None
        assert api_cls_name in [
            None,
            "_iLocIndexer",
            "_LocationIndexerBase",
            "Series",
            "DataFrame",
            "BasePandasDataset",
        ]
        return {
            CloudQC: QCCoercionCost.COST_ZERO,
            CloudQCHighSelf: QCCoercionCost.COST_LOW,
            ClusterQC: QCCoercionCost.COST_MEDIUM,
            DefaultQC: QCCoercionCost.COST_MEDIUM,
            LocalMachineQC: QCCoercionCost.COST_HIGH,
            PicoQC: QCCoercionCost.COST_IMPOSSIBLE,
            OmniscientEagerQC: None,
            OmniscientLazyQC: None,
        }.get(other_qc_cls)

    def stay_cost(self, api_cls_name, op, arguments):
        return QCCoercionCost.COST_ZERO


class CloudQCHighSelf(CloudQC):
    def get_backend(self):
        return "Cloud_High_Self"

    def stay_cost(self, api_cls_name, op, arguments):
        return QCCoercionCost.COST_HIGH


class ClusterQC(CalculatorTestQc):
    "Represents a local network cluster query compiler"

    def get_backend(self):
        return "Cluster"

    @classmethod
    def max_cost(cls):
        return QCCoercionCost.COST_HIGH

    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):
        return {
            CloudQC: QCCoercionCost.COST_MEDIUM,
            CloudQCHighSelf: QCCoercionCost.COST_MEDIUM,
            ClusterQC: QCCoercionCost.COST_ZERO,
            DefaultQC: None,  # cluster qc knows nothing about default qc
            LocalMachineQC: QCCoercionCost.COST_MEDIUM,
            PicoQC: QCCoercionCost.COST_HIGH,
        }.get(other_qc_cls)


class LocalMachineQC(CalculatorTestQc):
    "Represents a local machine query compiler"

    def get_backend(self):
        return "Local_Machine"

    @classmethod
    def max_cost(cls):
        return QCCoercionCost.COST_MEDIUM

    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):
        return {
            CloudQC: QCCoercionCost.COST_MEDIUM,
            CloudQCHighSelf: QCCoercionCost.COST_MEDIUM,
            ClusterQC: QCCoercionCost.COST_LOW,
            LocalMachineQC: QCCoercionCost.COST_ZERO,
            PicoQC: QCCoercionCost.COST_MEDIUM,
        }.get(other_qc_cls)


class PicoQC(CalculatorTestQc):
    "Represents a query compiler with very few resources"

    def get_backend(self):
        return "Pico"

    @classmethod
    def max_cost(cls):
        return QCCoercionCost.COST_LOW

    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):
        return {
            CloudQC: QCCoercionCost.COST_LOW,
            CloudQCHighSelf: QCCoercionCost.COST_LOW,
            ClusterQC: QCCoercionCost.COST_LOW,
            LocalMachineQC: QCCoercionCost.COST_LOW,
            PicoQC: QCCoercionCost.COST_ZERO,
        }.get(other_qc_cls)


class AdversarialQC(CalculatorTestQc):
    "Represents a query compiler which returns non-sensical costs"

    def get_backend(self):
        return "Adversarial"

    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):
        return {
            CloudQC: -1000,
            CloudQCHighSelf: -1000,
            ClusterQC: 10000,
            AdversarialQC: QCCoercionCost.COST_ZERO,
        }.get(other_qc_cls)


class OmniscientEagerQC(CalculatorTestQc):
    "Represents a query compiler which knows a lot, and wants to steal work"

    def get_backend(self):
        return "Eager"

    # keep other workloads from getting my workload
    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):
        if OmniscientEagerQC is other_qc_cls:
            return QCCoercionCost.COST_ZERO
        return QCCoercionCost.COST_IMPOSSIBLE

    # try to force other workloads to my engine
    @classmethod
    def move_to_me_cost(cls, other_qc, api_cls_name, operation, arguments):
        return QCCoercionCost.COST_ZERO


class OmniscientLazyQC(CalculatorTestQc):
    "Represents a query compiler which knows a lot, and wants to avoid work"

    def get_backend(self):
        return "Lazy"

    # encorage other engines to take my workload
    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):
        return QCCoercionCost.COST_ZERO

    # try to keep other workloads from getting my workload
    @classmethod
    def move_to_me_cost(cls, other_qc, api_cls_name, operation, arguments):
        if isinstance(other_qc, cls):
            return QCCoercionCost.COST_ZERO
        return QCCoercionCost.COST_IMPOSSIBLE


class DefaultQC(CalculatorTestQc):
    "Represents a query compiler with no costing information"

    def get_backend(self):
        return "Test_Casting_Default"


class DefaultQC2(CalculatorTestQc):
    "Represents a query compiler with no costing information, but different."

    def get_backend(self):
        return "Test_Casting_Default_2"


class BaseTestAutoMover(NativeQueryCompiler):

    _MAX_SIZE_THIS_ENGINE_CAN_HANDLE = BIG_DATA_CLOUD_MIN_NUM_ROWS

    def __init__(self, pandas_frame):
        super().__init__(pandas_frame)


class CloudForBigDataQC(BaseTestAutoMover):
    """Represents a cloud-hosted query compiler that prefers to stay on the cloud only for big data"""

    # Operations are more costly on this engine, even though it can handle larger datasets
    _MAX_SIZE_THIS_ENGINE_CAN_HANDLE = BIG_DATA_CLOUD_MIN_NUM_ROWS * 10
    _OPERATION_INITIALIZATION_OVERHEAD = QCCoercionCost.COST_MEDIUM
    _OPERATION_PER_ROW_OVERHEAD = 10

    def __init__(self, pandas_frame):
        super().__init__(pandas_frame)

    def stay_cost(self, api_cls_name, operation, arguments):
        if operation == "read_json":
            return QCCoercionCost.COST_IMPOSSIBLE
        return super().stay_cost(api_cls_name, operation, arguments)

    def get_backend(self) -> str:
        return "Big_Data_Cloud"

    @classmethod
    def max_cost(cls):
        return QCCoercionCost.COST_IMPOSSIBLE * 10

    @classmethod
    def move_to_me_cost(cls, other_qc, api_cls_name, operation, arguments):
        if api_cls_name in ("DataFrame", "Series") and operation == "__init__":
            if (query_compiler := arguments.get("query_compiler")) is not None:
                # When we create a dataframe or series with a query compiler
                # input, we should not switch the resulting dataframe or series
                # to a different backend.
                return (
                    QCCoercionCost.COST_ZERO
                    if isinstance(query_compiler, cls)
                    else QCCoercionCost.COST_IMPOSSIBLE
                )
            else:
                # Moving the in-memory __init__ inputs to the cloud is expensive.
                return QCCoercionCost.COST_HIGH
        return super().move_to_me_cost(other_qc, api_cls_name, operation, arguments)


class LocalForSmallDataQC(BaseTestAutoMover):
    """Represents a local query compiler that prefers small data."""

    # Operations are cheap on this engine for small data, but there is an upper bound
    _MAX_SIZE_THIS_ENGINE_CAN_HANDLE = BIG_DATA_CLOUD_MIN_NUM_ROWS
    _OPERATION_PER_ROW_OVERHEAD = 1

    def __init__(self, pandas_frame):
        super().__init__(pandas_frame)

    def get_backend(self) -> str:
        return "Small_Data_Local"

    @classmethod
    def max_cost(cls):
        return QCCoercionCost.COST_IMPOSSIBLE * 10


def register_backend(name, qc):
    class TestCasterIO(BaseIO):
        _should_warn_on_default_to_pandas: bool = False
        query_compiler_cls = qc

    class TestCasterFactory(BaseFactory):
        @classmethod
        def prepare(cls):
            cls.io_cls = TestCasterIO

    TestCasterFactory.prepare()

    factory_name = f"{name}OnNativeFactory"
    setattr(factories, factory_name, TestCasterFactory)
    Engine.add_option(name)
    Backend.register_backend(name, Execution(name, "Native"))


ALL_BACKENDS = {
    "Pico": PicoQC,
    "Cluster": ClusterQC,
    "Cloud": CloudQC,
    "Cloud_High_Self": CloudQCHighSelf,
    "Local_Machine": LocalMachineQC,
    "Adversarial": AdversarialQC,
    "Eager": OmniscientEagerQC,
    "Lazy": OmniscientLazyQC,
    "Test_Casting_Default": DefaultQC,
    "Test_Casting_Default_2": DefaultQC2,
    "Big_Data_Cloud": CloudForBigDataQC,
    "Small_Data_Local": LocalForSmallDataQC,
}

for backend, qc in ALL_BACKENDS.items():
    register_backend(backend, qc)

DEFAULT_TEST_BACKENDS = (
    "Pico",
    "Cluster",
    "Cloud",
    "Cloud_High_Self",
    "Local_Machine",
    "Lazy",
)


@pytest.fixture(autouse=True)
def turn_on_auto_switch_backend():
    with config_context(AutoSwitchBackend=True):
        yield


@contextlib.contextmanager
def backend_test_context(
    *, test_backend: Optional[str] = None, choices: Optional[tuple] = None
) -> Iterator[None]:
    if choices is None:
        # Consider only a select set custom-defined test backends by default for easier testing.
        # This is necessary because n-ary operations consider _all_ possible active backends, so
        # we may observe unexpected behavior if too many backends are activated at once.
        # If a QC is explicitly created for an inactive backend, the QC calculator should still
        # be able to accept it.
        choices = DEFAULT_TEST_BACKENDS
    if test_backend is None:
        test_backend = choices[0]
    old_default_backend = Backend.get()
    old_backend_choices = Backend.get_active_backends()
    try:
        Backend.set_active_backends(choices)
        Backend.put(test_backend)
        yield
    finally:
        Backend.set_active_backends(old_backend_choices)
        Backend.put(old_default_backend)


@pytest.fixture()
def cloud_df():
    return pd.DataFrame(query_compiler=CloudQC(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def cloud_high_self_df():
    return pd.DataFrame(query_compiler=CloudQCHighSelf(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def cluster_df():
    return pd.DataFrame(query_compiler=ClusterQC(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def local_df():
    return pd.DataFrame(query_compiler=LocalMachineQC(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def pico_df():
    return pd.DataFrame(query_compiler=PicoQC(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def adversarial_df():
    return pd.DataFrame(query_compiler=AdversarialQC(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def eager_df():
    return pd.DataFrame(query_compiler=OmniscientEagerQC(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def lazy_df():
    return pd.DataFrame(query_compiler=OmniscientLazyQC(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def default_df():
    return pd.DataFrame(query_compiler=DefaultQC(pandas.DataFrame([0, 1, 2])))


@pytest.fixture()
def default2_df():
    return pd.DataFrame(query_compiler=DefaultQC2(pandas.DataFrame([0, 1, 2])))


def test_two_same_backend(pico_df):
    df3 = pd.concat([pico_df, pico_df], axis=1)
    assert pico_df.get_backend() == "Pico"
    assert df3.get_backend() == "Pico"


def test_cast_to_second_backend_with_concat(pico_df, cluster_df, caplog):
    with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
        # We have to copy the input dataframes because of inplace merging
        df3 = pd.concat([pico_df.copy(), cluster_df.copy()], axis=1)
    assert pico_df.get_backend() == "Pico"
    assert cluster_df.get_backend() == "Cluster"
    assert df3.get_backend() == "Cluster"  # result should be on cluster

    log_records = caplog.records
    assert len(log_records) == 1
    assert log_records[0].name == DEFAULT_LOGGER_NAME
    assert log_records[0].levelno == logging.INFO
    assert log_records[0].message.startswith(
        "BackendCostCalculator results for pd.concat: "
    )


def test_cast_to_second_backend_with_concat_uses_second_backend_api_override(
    pico_df, cluster_df
):
    register_pd_accessor(name="concat", backend="Cluster")(
        lambda *args, **kwargs: "custom_concat_result"
    )
    # copy dataframes for concat to allow for in-place merging
    assert (
        pd.concat([pico_df.copy(), cluster_df.copy()], axis=1) == "custom_concat_result"
    )
    assert pico_df.get_backend() == "Pico"
    assert cluster_df.get_backend() == "Cluster"


def test_moving_pico_to_cluster_in_place_calls_set_backend_only_once_github_issue_7490(
    pico_df, cluster_df
):
    with mock.patch.object(
        pd.DataFrame, "set_backend", wraps=pico_df.set_backend
    ) as mock_set_backend:
        pico_df.set_backend(cluster_df.get_backend(), inplace=True)
    assert pico_df.get_backend() == "Cluster"
    mock_set_backend.assert_called_once_with("Cluster", inplace=True)


def test_cast_to_second_backend_with___init__(pico_df, cluster_df):
    df3 = pd.DataFrame({"pico": pico_df.iloc[:, 0], "cluster": cluster_df.iloc[:, 0]})
    assert (
        pico_df.get_backend() == "Pico"
    )  # pico stays despite in-place casting by iloc
    assert cluster_df.get_backend() == "Cluster"
    assert df3.get_backend() == "Cluster"  # result should be on cluster


def test_cast_to_first_backend(pico_df, cluster_df):
    df3 = pd.concat([cluster_df, pico_df], axis=1)
    assert pico_df.get_backend() == "Cluster"  # pico_df was cast in place by concat
    assert cluster_df.get_backend() == "Cluster"
    assert df3.get_backend() == cluster_df.get_backend()  # result should be on cluster


def test_cast_to_first_backend_with_concat_uses_first_backend_api_override(
    pico_df, cluster_df
):
    register_pd_accessor(name="concat", backend="Cluster")(
        lambda *args, **kwargs: "custom_concat_result"
    )
    assert pd.concat([cluster_df, pico_df], axis=1) == "custom_concat_result"
    assert pico_df.get_backend() == "Cluster"  # pico was cast in place by concat
    assert cluster_df.get_backend() == "Cluster"


def test_cast_to_first_backend_with___init__(pico_df, cluster_df):
    df3 = pd.DataFrame(
        {
            "cluster": cluster_df.iloc[:, 0],
            "pico": pico_df.iloc[:, 0],
        }
    )
    assert pico_df.get_backend() == "Pico"  # Pico not cast in place by iloc
    assert cluster_df.get_backend() == "Cluster"
    assert df3.get_backend() == "Cluster"  # result should be on cluster


def test_self_cost_causes_move(cloud_high_self_df, cluster_df):
    """
    Test that ``self_cost`` is being properly considered.

    Cost to stay on cloud_high_self is HIGH, but moving to cluster is MEDIUM.
    Cost to stay on cluster is ZERO, and moving to cloud_high_self is MEDIUM.

    With two dataframes, one on each backend, the total cost of using
    ``cloud_high_self`` as the final backend is:
    ``stay_cost(cloud_high_self) + move_cost(cluster->cloud_high_self)``
    which is ``HIGH + MEDIUM``.
    The total cost of using ``cluster`` as the final backend is:
    ``stay_cost(cluster) + move_cost(cloud_high_self->cluster)``
    which is ``ZERO + MEDIUM``.

    So we should select ``cluster``.
    """
    result = pd.concat([cloud_high_self_df, cluster_df])
    assert result.get_backend() == "Cluster"

    result = pd.concat([cluster_df, cloud_high_self_df])
    assert result.get_backend() == "Cluster"


@pytest.mark.parametrize(
    "df1, df2, df3, df4, expected_result_backend",
    [
        # no-op
        ("cloud_df", "cloud_df", "cloud_df", "cloud_df", "Cloud"),
        # moving all dfs to cloud is 1250, moving to cluster is 1000
        # regardless of how they are ordered
        ("pico_df", "local_df", "cluster_df", "cloud_df", "Cluster"),
        ("cloud_df", "local_df", "cluster_df", "pico_df", "Cluster"),
        ("cloud_df", "cluster_df", "local_df", "pico_df", "Cluster"),
        ("cloud_df", "cloud_df", "local_df", "pico_df", "Cloud"),
        # Still move everything to cloud
        ("pico_df", "pico_df", "pico_df", "cloud_df", "Cloud"),
        ("pico_df", "pico_df", "local_df", "cloud_df", "Cloud"),
    ],
)
def test_mixed_dfs(df1, df2, df3, df4, expected_result_backend, request):
    df1 = request.getfixturevalue(df1)
    df2 = request.getfixturevalue(df2)
    df3 = request.getfixturevalue(df3)
    df4 = request.getfixturevalue(df4)
    if expected_result_backend is None:
        with pytest.raises(ValueError):
            pd.concat(axis=1, objs=[df1, df2, df3, df4])
    else:
        result = pd.concat(axis=1, objs=[df1, df2, df3, df4])
        assert result.get_backend() == expected_result_backend


def test_adversarial_high(adversarial_df, cluster_df):
    with pytest.raises(ValueError):
        pd.concat([adversarial_df, cluster_df], axis=1)


def test_adversarial_low(adversarial_df, cloud_df):
    with pytest.raises(ValueError):
        pd.concat([adversarial_df, cloud_df], axis=1)


def test_two_two_qc_types_default_rhs(default_df, cluster_df):
    # none of the query compilers know about each other here
    # so we default to the caller
    df3 = pd.concat([default_df, cluster_df], axis=1)
    assert default_df.get_backend() == "Test_Casting_Default"
    assert (
        cluster_df.get_backend() == "Test_Casting_Default"
    )  # in place cast to default by concat
    assert df3.get_backend() == default_df.get_backend()  # should move to default


def test_two_two_qc_types_default_lhs(default_df, cluster_df):
    # none of the query compilers know about each other here
    # so we default to the caller
    df3 = pd.concat([cluster_df, default_df], axis=1)
    assert default_df.get_backend() == "Cluster"  # in place cast to Cluster by concat
    assert cluster_df.get_backend() == "Cluster"
    assert df3.get_backend() == cluster_df.get_backend()  # should move to cluster


def test_two_two_qc_types_default_2_rhs(default_df, cloud_df):
    # cloud knows a bit about costing; so we prefer moving to there
    df3 = pd.concat([default_df, cloud_df], axis=1)
    assert default_df.get_backend() == "Cloud"  # inplace cast to Cloud by concat
    assert cloud_df.get_backend() == "Cloud"
    assert df3.get_backend() == cloud_df.get_backend()  # should move to cloud


def test_two_two_qc_types_default_2_lhs(default_df, cloud_df):
    # cloud knows a bit about costing; so we prefer moving to there
    df3 = pd.concat([cloud_df, default_df], axis=1)
    assert default_df.get_backend() == "Cloud"  # inplace cast to Cloud by concat
    assert cloud_df.get_backend() == "Cloud"
    assert df3.get_backend() == cloud_df.get_backend()  # should move to cloud


def test_default_to_caller(default_df, default2_df):
    # No qc knows anything; default to caller

    df3 = pd.concat([default_df, default2_df], axis=1)
    assert df3.get_backend() == default_df.get_backend()  # should stay on caller

    df3 = pd.concat([default2_df, default_df], axis=1)
    assert df3.get_backend() == default2_df.get_backend()  # should stay on caller

    df3 = pd.concat([default_df, default_df], axis=1)
    assert df3.get_backend() == default_df.get_backend()  # no change


def test_no_qc_to_calculate():
    calculator = BackendCostCalculator(
        operation_arguments=MappingProxyType({}),
        api_cls_name=None,
        operation="operation0",
        query_compilers=[],
        preop_switch=False,
    )
    with pytest.raises(ValueError):
        calculator.calculate()


def test_qc_default_self_cost(default_df, default2_df):
    assert (
        default_df._query_compiler.move_to_cost(
            other_qc_type=type(default2_df._query_compiler),
            api_cls_name=None,
            operation="operation0",
            arguments=MappingProxyType({}),
        )
        is None
    )
    assert (
        default_df._query_compiler.move_to_cost(
            other_qc_type=type(default_df._query_compiler),
            api_cls_name=None,
            operation="operation0",
            arguments=MappingProxyType({}),
        )
        is QCCoercionCost.COST_ZERO
    )


def test_qc_casting_changed_operation(pico_df, cloud_df):
    pico_df1 = pico_df
    cloud_df1 = cloud_df
    native_cdf2 = cloud_df1._to_pandas()
    native_pdf2 = pico_df1._to_pandas()
    expected = native_cdf2 + native_pdf2
    # test both directions
    df_cast_to_rhs = pico_df1 + cloud_df1
    df_cast_to_lhs = cloud_df1 + pico_df1
    assert df_cast_to_rhs._to_pandas().equals(expected)
    assert df_cast_to_lhs._to_pandas().equals(expected)


def test_qc_mixed_loc(pico_df, cloud_df):
    pico_df1 = pico_df
    cloud_df1 = cloud_df
    assert pico_df1[pico_df1[0][0]][cloud_df1[0][1]] == 1
    assert pico_df1[cloud_df1[0][0]][pico_df1[0][1]] == 1
    assert cloud_df1[pico_df1[0][0]][pico_df1[0][1]] == 1


def test_merge_in_place(default_df, lazy_df, cloud_df):
    # lazy_df tries to pawn off work on other engines
    df = default_df.merge(lazy_df)
    assert df.get_backend() is default_df.get_backend()
    # Both arguments now have the same qc type
    assert lazy_df.get_backend() is default_df.get_backend()

    with config_context(BackendMergeCastInPlace=False):
        lazy_df = lazy_df.move_to("Lazy")
        cloud_df = cloud_df.move_to("Cloud")
        df = cloud_df.merge(lazy_df)
        assert df.get_backend() == cloud_df.get_backend()
        assert lazy_df.get_backend() == "Lazy"
        assert cloud_df.get_backend() == "Cloud"


def test_information_asymmetry(default_df, cloud_df, eager_df, lazy_df):
    # normally, the default query compiler should be chosen
    # here, but since eager knows about default, but not
    # the other way around, eager has a special ability to
    # control the directionality of the cast.
    df = default_df.merge(eager_df)
    assert df.get_backend() == eager_df.get_backend()
    df = cloud_df.merge(eager_df)
    assert df.get_backend() == eager_df.get_backend()

    # lazy_df tries to pawn off work on other engines
    df = default_df.merge(lazy_df)
    assert df.get_backend() == default_df.get_backend()
    df = cloud_df.merge(lazy_df)
    assert df.get_backend() == cloud_df.get_backend()


def test_setitem_in_place_with_self_switching_backend(cloud_df, local_df):
    local_df.iloc[1, 0] = cloud_df.iloc[1, 0] + local_df.iloc[1, 0]
    # compute happens in cloud, but we have to make sure that we propagate the
    # in-place update to the local_df
    df_equals(
        local_df,
        pandas.DataFrame(
            [
                0,
                2,
                2,
            ]
        ),
    )
    assert local_df.get_backend() == "Local_Machine"
    assert cloud_df.get_backend() == "Cloud"


@pytest.mark.parametrize("pin_local", [True, False], ids=["pinned", "unpinned"])
def test_switch_local_to_cloud_with_iloc___setitem__(local_df, cloud_df, pin_local):
    if pin_local:
        local_df = local_df.pin_backend()
    local_df.iloc[:, 0] = cloud_df.iloc[:, 0] + 1
    expected_pandas = local_df._to_pandas()
    expected_pandas.iloc[:, 0] = cloud_df._to_pandas().iloc[:, 0] + 1
    df_equals(local_df, expected_pandas)
    assert local_df.get_backend() == "Local_Machine" if pin_local else "Cloud"


# This test should force the creation of a dataframe which
# is too large for the backend and verify that it stays there
# because there are no other options
def test_single_backend_merge_no_good_options():
    with backend_test_context(
        test_backend="Small_Data_Local",
        choices=["Small_Data_Local"],
    ):
        df1 = pd.DataFrame({"a": [1] * 100})
        df1["two"] = pd.to_datetime(df1["a"])
        assert df1.get_backend() == "Small_Data_Local"


def test_stay_or_move_evaluation(cloud_high_self_df, default_df):
    default_cls = type(default_df._get_query_compiler())
    cloud_cls = type(cloud_high_self_df._get_query_compiler())
    empty_arguments = MappingProxyType({})

    stay_cost = cloud_high_self_df._get_query_compiler().stay_cost(
        "Series", "myop", arguments=empty_arguments
    )
    move_cost = cloud_high_self_df._get_query_compiler().move_to_cost(
        default_cls, "Series", "myop", arguments=empty_arguments
    )
    if stay_cost > move_cost:
        df = cloud_high_self_df.move_to("Test_Casting_Default")
    else:
        assert False

    stay_cost = df._get_query_compiler().stay_cost(
        "Series", "myop", arguments=empty_arguments
    )
    move_cost = df._get_query_compiler().move_to_cost(
        cloud_cls, "Series", "myop", arguments=empty_arguments
    )
    assert stay_cost is not None
    assert move_cost is None


def test_max_shape(cloud_df):
    # default implementation matches df.shape
    assert cloud_df.shape == cloud_df._query_compiler._max_shape()


class TestSwitchBackendPostOpDependingOnDataSize:
    def test_read_json(self):
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            big_json = json.dumps({"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS))})
            small_json = json.dumps(
                {"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}
            )
            assert pd.read_json(StringIO(big_json)).get_backend() == "Big_Data_Cloud"
            assert pd.read_json(StringIO(small_json)).get_backend() == "Big_Data_Cloud"
            register_function_for_post_op_switch(
                class_name=None, backend="Big_Data_Cloud", method="read_json"
            )
            assert pd.read_json(StringIO(big_json)).get_backend() == "Big_Data_Cloud"
            assert (
                pd.read_json(StringIO(small_json)).get_backend() == "Small_Data_Local"
            )

    @backend_test_context(
        test_backend="Big_Data_Cloud",
        choices=("Big_Data_Cloud", "Small_Data_Local"),
    )
    def test_read_json_logging_for_post_op_switch(self, caplog):
        register_function_for_post_op_switch(
            class_name=None, backend="Big_Data_Cloud", method="read_json"
        )
        with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
            assert (
                pd.read_json(
                    StringIO(
                        json.dumps(
                            {"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}
                        )
                    )
                ).get_backend()
                == "Small_Data_Local"
            )
        log_records = caplog.records
        assert len(log_records) == 2

        assert log_records[0].name == DEFAULT_LOGGER_NAME
        assert log_records[0].levelno == logging.INFO
        assert log_records[0].message.startswith(
            "After modin.pandas function read_json, considered moving to backend Small_Data_Local with"
        )

        assert log_records[1].name == DEFAULT_LOGGER_NAME
        assert log_records[1].levelno == logging.INFO
        assert log_records[1].message.startswith(
            "Chose to move to backend Small_Data_Local"
        )

    @backend_test_context(
        test_backend="Big_Data_Cloud",
        choices=("Big_Data_Cloud", "Small_Data_Local"),
    )
    def test_read_json_logging_for_post_op_not_switch(self, caplog):
        register_function_for_post_op_switch(
            class_name=None, backend="Big_Data_Cloud", method="read_json"
        )
        with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
            assert (
                pd.read_json(
                    StringIO(
                        json.dumps({"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS))})
                    )
                ).get_backend()
                == "Big_Data_Cloud"
            )
        log_records = caplog.records
        assert len(log_records) == 2

        assert log_records[0].name == DEFAULT_LOGGER_NAME
        assert log_records[0].levelno == logging.INFO
        assert log_records[0].message.startswith(
            "After modin.pandas function read_json, considered moving to backend Small_Data_Local with"
        )

        assert log_records[1].name == DEFAULT_LOGGER_NAME
        assert log_records[1].levelno == logging.INFO
        assert log_records[1].message.startswith(
            "Chose not to switch backends after operation read_json"
        )

    @backend_test_context(
        test_backend="Big_Data_Cloud",
        choices=("Big_Data_Cloud", "Small_Data_Local"),
    )
    def test_progress_bar_shows_modin_pandas_for_general_functions(self):
        """Test that progress bar messages show 'modin.pandas.read_json' instead of 'None.read_json' for general functions."""
        with mock.patch("tqdm.auto.trange") as mock_trange:
            mock_trange.return_value = range(2)

            # Register a post-op switch for read_json (general function with class_name=None)
            register_function_for_post_op_switch(
                class_name=None, backend="Big_Data_Cloud", method="read_json"
            )

            # Create a small dataset that will trigger backend switch and show progress bar
            json_input = json.dumps(
                {"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}
            )

            # This should trigger a backend switch and show progress bar
            result_df = pd.read_json(StringIO(json_input))
            assert result_df.get_backend() == "Small_Data_Local"

            # Verify that trange was called with correct progress bar message
            mock_trange.assert_called_once()
            call_args = mock_trange.call_args
            desc = call_args[1]["desc"]  # Get the 'desc' keyword argument

            assert desc.startswith(
                "Transfer: Big_Dat... → Small_D...  |    read_json    ≃ (9, 1)    "
            )

    def test_agg(self):
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            df = pd.DataFrame([[1, 2], [3, 4]])
            assert df.get_backend() == "Big_Data_Cloud"
            assert df.sum().get_backend() == "Big_Data_Cloud"
            register_function_for_post_op_switch(
                class_name="DataFrame", backend="Big_Data_Cloud", method="sum"
            )
            assert df.get_backend() == "Big_Data_Cloud"
            assert df.sum().get_backend() == "Small_Data_Local"

    def test_agg_pinned(self):
        # The operation in test_agg would naturally cause an automatic switch, but the
        # absence of AutoSwitchBackend or the presence of a pin on the frame prevent this
        # switch from happening.
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            register_function_for_post_op_switch(
                class_name="DataFrame", backend="Big_Data_Cloud", method="sum"
            )
            # No pin or config, should switch
            df = pd.DataFrame([[1, 2], [3, 4]])
            assert df.get_backend() == "Big_Data_Cloud"
            assert df.sum().get_backend() == "Small_Data_Local"
            # config set to false, should not switch
            with config_context(AutoSwitchBackend=False):
                df = pd.DataFrame([[1, 2], [3, 4]])
                assert df.get_backend() == "Big_Data_Cloud"
                assert df.sum().get_backend() == "Big_Data_Cloud"
            # no config, but data is pinned
            df = pd.DataFrame([[1, 2], [3, 4]]).pin_backend()
            assert df.get_backend() == "Big_Data_Cloud"
            assert df.sum().get_backend() == "Big_Data_Cloud"
            # a frame-level pin remains valid across a transformation
            df_copy = df + 1
            assert df_copy.get_backend() == "Big_Data_Cloud"
            assert df_copy.sum().get_backend() == "Big_Data_Cloud"
            # unpinning df allows a switch again
            df = df.unpin_backend()
            assert df.get_backend() == "Big_Data_Cloud"
            assert df.sum().get_backend() == "Small_Data_Local"
            df_copy = df + 1
            assert df_copy.get_backend() == "Big_Data_Cloud"
            assert df_copy.sum().get_backend() == "Small_Data_Local"
            # check in-place pin/unpin operations
            df.pin_backend(inplace=True)
            assert df.get_backend() == "Big_Data_Cloud"
            assert df.sum().get_backend() == "Big_Data_Cloud"
            df.unpin_backend(inplace=True)
            assert df.get_backend() == "Big_Data_Cloud"
            assert df.sum().get_backend() == "Small_Data_Local"

    @pytest.mark.parametrize(
        "num_groups, expected_backend",
        [
            (BIG_DATA_CLOUD_MIN_NUM_ROWS - 1, "Small_Data_Local"),
            (BIG_DATA_CLOUD_MIN_NUM_ROWS, "Big_Data_Cloud"),
        ],
    )
    @pytest.mark.parametrize(
        "groupby_class,operation",
        [
            param(
                "DataFrameGroupBy",
                lambda df: df.groupby("col0").sum(),
                id="DataFrameGroupBy",
            ),
            param(
                "SeriesGroupBy",
                lambda df: df.groupby("col0")["col1"].sum(),
                id="SeriesGroupBy",
            ),
        ],
    )
    def test_dataframe_groupby_agg_switches_for_small_result(
        self, num_groups, expected_backend, operation, groupby_class
    ):
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            modin_df, pandas_df = create_test_dfs(
                {
                    "col0": list(range(num_groups)),
                    "col1": list(range(1, num_groups + 1)),
                }
            )

            assert modin_df.get_backend() == "Big_Data_Cloud"
            assert operation(modin_df).get_backend() == "Big_Data_Cloud"

            register_function_for_post_op_switch(
                class_name=groupby_class, backend="Big_Data_Cloud", method="sum"
            )

            assert modin_df.get_backend() == "Big_Data_Cloud"
            modin_result = operation(modin_df)
            pandas_result = operation(pandas_df)
            df_equals(modin_result, pandas_result)
            assert modin_result.get_backend() == expected_backend
            assert modin_df.get_backend() == "Big_Data_Cloud"

    @pytest.mark.parametrize(
        "groupby_class,operation",
        [
            param(
                "DataFrameGroupBy",
                lambda groupby: groupby.sum(),
                id="DataFrameGroupBy",
            ),
            param(
                "SeriesGroupBy",
                lambda groupby: groupby["col1"].sum(),
                id="SeriesGroupBy",
            ),
        ],
    )
    @pytest.mark.parametrize(
        "auto_switch_backend",
        [True, False],
        ids=lambda param: f"auto_switch_backend_{param}",
    )
    def test_auto_switch_config_can_disable_groupby_agg_auto_switch(
        self,
        operation,
        groupby_class,
        auto_switch_backend,
    ):
        num_groups = BIG_DATA_CLOUD_MIN_NUM_ROWS - 1
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ), config_context(AutoSwitchBackend=auto_switch_backend):
            modin_groupby, pandas_groupby = (
                df.groupby("col0")
                for df in create_test_dfs(
                    {
                        "col0": list(range(num_groups)),
                        "col1": list(range(1, num_groups + 1)),
                    }
                )
            )

            assert modin_groupby.get_backend() == "Big_Data_Cloud"
            assert operation(modin_groupby).get_backend() == "Big_Data_Cloud"

            register_function_for_post_op_switch(
                class_name=groupby_class, backend="Big_Data_Cloud", method="sum"
            )

            assert modin_groupby.get_backend() == "Big_Data_Cloud"
            modin_result = operation(modin_groupby)
            pandas_result = operation(pandas_groupby)
            df_equals(modin_result, pandas_result)
            assert modin_result.get_backend() == (
                "Small_Data_Local" if auto_switch_backend else "Big_Data_Cloud"
            )
            assert modin_groupby.get_backend() == "Big_Data_Cloud"

    @pytest.mark.parametrize(
        "groupby_class,groupby_operation,agg_operation",
        [
            param(
                "DataFrameGroupBy",
                lambda df: df.groupby("col0"),
                lambda groupby: groupby.sum(),
                id="DataFrameGroupBy",
            ),
            param(
                "SeriesGroupBy",
                lambda df: df.groupby("col0")["col1"],
                lambda groupby: groupby.sum(),
                id="SeriesGroupBy",
            ),
        ],
    )
    @backend_test_context(
        test_backend="Big_Data_Cloud",
        choices=("Big_Data_Cloud", "Small_Data_Local"),
    )
    def test_pinned_dataframe_prevents_groupby_backend_switch(
        self, groupby_class, groupby_operation, agg_operation
    ):
        """Test that pinning a DataFrame prevents groupby operations from switching backends."""
        modin_df, pandas_df = create_test_dfs(
            {
                "col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),
                "col1": list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)),
            }
        )

        assert modin_df.get_backend() == "Big_Data_Cloud"

        # Pin the DataFrame
        modin_df.pin_backend(inplace=True)
        assert modin_df.is_backend_pinned()

        # Create groupby object - should inherit pin status from dataframe
        modin_groupby = groupby_operation(modin_df)
        pandas_groupby = groupby_operation(pandas_df)
        assert modin_groupby.is_backend_pinned()  # Inherited from DataFrame

        # Register a post-op switch that would normally move to Small_Data_Local
        register_function_for_post_op_switch(
            class_name=groupby_class, backend="Big_Data_Cloud", method="sum"
        )

        # The operation should stay on Big_Data_Cloud due to inherited pinning
        modin_result = agg_operation(modin_groupby)
        pandas_result = agg_operation(pandas_groupby)
        df_equals(modin_result, pandas_result)
        assert modin_result.get_backend() == "Big_Data_Cloud"

    @pytest.mark.parametrize(
        "groupby_class,groupby_operation,agg_operation",
        [
            param(
                "DataFrameGroupBy",
                lambda df: df.groupby("col0"),
                lambda groupby: groupby.sum(),
                id="DataFrameGroupBy",
            ),
            param(
                "SeriesGroupBy",
                lambda df: df.groupby("col0")["col1"],
                lambda groupby: groupby.sum(),
                id="SeriesGroupBy",
            ),
        ],
    )
    @pytest.mark.parametrize("inplace", [True, False], ids=["inplace", "not_inplace"])
    @backend_test_context(
        test_backend="Big_Data_Cloud",
        choices=("Big_Data_Cloud", "Small_Data_Local"),
    )
    def test_pinned_groupby_prevents_backend_switch(
        self, groupby_class, groupby_operation, agg_operation, inplace
    ):
        """Test that pinning a GroupBy object prevents operations from switching backends."""
        modin_df, pandas_df = create_test_dfs(
            {
                "col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),
                "col1": list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)),
            }
        )

        assert modin_df.get_backend() == "Big_Data_Cloud"

        # Create groupby object and pin it
        modin_groupby = groupby_operation(modin_df)
        pandas_groupby = groupby_operation(pandas_df)

        if inplace:
            modin_groupby.pin_backend(inplace=True)
            assert modin_groupby.is_backend_pinned()
        else:
            pinned_groupby = modin_groupby.pin_backend(inplace=False)
            assert not modin_groupby.is_backend_pinned()
            assert pinned_groupby.is_backend_pinned()
            modin_groupby = pinned_groupby

        # Register a post-op switch that would normally move to Small_Data_Local
        register_function_for_post_op_switch(
            class_name=groupby_class, backend="Big_Data_Cloud", method="sum"
        )

        # The operation should stay on Big_Data_Cloud due to pinning
        modin_result = agg_operation(modin_groupby)
        pandas_result = agg_operation(pandas_groupby)
        df_equals(modin_result, pandas_result)
        assert modin_result.get_backend() == "Big_Data_Cloud"


class TestSwitchBackendPreOp:
    @pytest.mark.parametrize(
        "data_size, expected_backend",
        [
            param(
                BIG_DATA_CLOUD_MIN_NUM_ROWS - 1,
                "Small_Data_Local",
                id="small_data_should_move_to_small_engine",
            ),
            param(
                BIG_DATA_CLOUD_MIN_NUM_ROWS,
                "Big_Data_Cloud",
                id="big_data_should_stay_in_cloud",
            ),
        ],
    )
    def test_describe_switches_depending_on_data_size(
        self, data_size, expected_backend
    ):
        # Mock the default describe() implementation so that we can check that we
        # are calling it with the correct backend as an input. We can't just inspect
        # the mock's call_args_list because call_args_list keeps a reference to the
        # input dataframe, whose backend may change in place.
        mock_describe = mock.Mock(
            wraps=pd.DataFrame._extensions[None]["describe"],
            side_effect=(
                # 1) Record the input backend
                lambda self, *args, **kwargs: setattr(
                    mock_describe, "_last_input_backend", self.get_backend()
                )
                # 2) Return mock.DEFAULT so that we fall back to the original
                #    describe() implementation
                or mock.DEFAULT
            ),
        )
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            df = pd.DataFrame(list(range(data_size)))
            with mock.patch.dict(
                pd.DataFrame._extensions[None], {"describe": mock_describe}
            ):
                # Before we register the post-op switch, the describe() method
                # should not trigger auto-switch.
                assert df.get_backend() == "Big_Data_Cloud"
                describe_result = df.describe()
                df_equals(describe_result, df._to_pandas().describe())
                assert describe_result.get_backend() == "Big_Data_Cloud"
                assert df.get_backend() == "Big_Data_Cloud"
                mock_describe.assert_called_once()
                assert mock_describe._last_input_backend == "Big_Data_Cloud"

                mock_describe.reset_mock()

                register_function_for_pre_op_switch(
                    class_name="DataFrame", backend="Big_Data_Cloud", method="describe"
                )

                # Now that we've registered the pre-op switch, the describe() call
                # should trigger auto-switch.
                assert df.get_backend() == "Big_Data_Cloud"
                describe_result = df.describe()
                df_equals(describe_result, df._to_pandas().describe())
                assert describe_result.get_backend() == expected_backend
                assert df.get_backend() == expected_backend
                mock_describe.assert_called_once()
                assert mock_describe._last_input_backend == expected_backend

    def test_read_json_with_extensions(self):
        json_input = json.dumps({"col0": [1]})
        # Mock the read_json implementation for each backend so that we can check
        # that we are calling the correct implementation. Also, we have to make
        # the extension methods produce dataframes with the correct backends.
        pandas_read_json = mock.Mock(
            wraps=(
                lambda *args, **kwargs: _GENERAL_EXTENSIONS[None]["read_json"](
                    *args, **kwargs
                ).move_to("Small_Data_Local")
            )
        )
        pandas_read_json.__name__ = "read_json"
        cloud_read_json = mock.Mock(
            wraps=(
                lambda *args, **kwargs: _GENERAL_EXTENSIONS[None]["read_json"](
                    *args, **kwargs
                ).move_to("Big_Data_Cloud")
            )
        )
        cloud_read_json.__name__ = "read_json"

        register_pd_accessor("read_json", backend="Small_Data_Local")(pandas_read_json)
        register_pd_accessor("read_json", backend="Big_Data_Cloud")(cloud_read_json)

        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            df = pd.read_json(StringIO(json_input))
            assert df.get_backend() == "Big_Data_Cloud"
            pandas_read_json.assert_not_called()
            cloud_read_json.assert_called_once()

            register_function_for_pre_op_switch(
                class_name=None, backend="Big_Data_Cloud", method="read_json"
            )

            pandas_read_json.reset_mock()
            cloud_read_json.reset_mock()

            df = pd.read_json(StringIO(json_input))

            assert df.get_backend() == "Small_Data_Local"
            pandas_read_json.assert_called_once()
            cloud_read_json.assert_not_called()

    def test_read_json_without_extensions(self):
        json_input = json.dumps({"col0": [1]})

        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            df = pd.read_json(StringIO(json_input))
            assert df.get_backend() == "Big_Data_Cloud"

            register_function_for_pre_op_switch(
                class_name=None, backend="Big_Data_Cloud", method="read_json"
            )

            df = pd.read_json(StringIO(json_input))

            assert df.get_backend() == "Small_Data_Local"

    @pytest.mark.parametrize(
        "data_size, expected_backend",
        [
            param(
                BIG_DATA_CLOUD_MIN_NUM_ROWS - 1,
                "Small_Data_Local",
                id="small_data_should_move_to_small_engine",
            ),
            param(
                BIG_DATA_CLOUD_MIN_NUM_ROWS,
                "Big_Data_Cloud",
                id="big_data_should_stay_in_cloud",
            ),
        ],
    )
    def test_iloc_setitem_switches_depending_on_data_size(
        self, data_size, expected_backend
    ):
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            md_df, pd_df = create_test_dfs(list(range(data_size)))
            assert md_df.get_backend() == "Big_Data_Cloud"
            eval_general(
                md_df,
                pd_df,
                lambda df: df.iloc.__setitem__((0, 0), -1),
                __inplace__=True,
            )
            assert md_df.get_backend() == "Big_Data_Cloud"

            register_function_for_pre_op_switch(
                class_name="_iLocIndexer",
                backend="Big_Data_Cloud",
                method="__setitem__",
            )
            eval_general(
                md_df,
                pd_df,
                lambda df: df.iloc.__setitem__((0, 0), 0),
                __inplace__=True,
            )
            assert md_df.get_backend() == expected_backend

    def test_iloc_pinned(self):
        # The operation in test_iloc would naturally cause an automatic switch, but the
        # absence of AutoSwitchBackend or the presence of a pin on the frame prevent this
        # switch from happening.
        data_size = BIG_DATA_CLOUD_MIN_NUM_ROWS - 1
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            register_function_for_pre_op_switch(
                class_name="_iLocIndexer",
                backend="Big_Data_Cloud",
                method="__setitem__",
            )
            # No pin or config, should switch
            df = pd.DataFrame(list(range(data_size)))
            assert df.get_backend() == "Big_Data_Cloud"
            df.iloc[(0, 0)] = -1
            assert df.get_backend() == "Small_Data_Local"
            # config set to false, should not switch
            with config_context(AutoSwitchBackend=False):
                df = pd.DataFrame(list(range(data_size)))
                assert df.get_backend() == "Big_Data_Cloud"
                df.iloc[(0, 0)] = -2
                assert df.get_backend() == "Big_Data_Cloud"
            # no config, but data is pinned
            df = pd.DataFrame(list(range(data_size))).pin_backend()
            assert df.get_backend() == "Big_Data_Cloud"
            df.iloc[(0, 0)] = -3
            assert df.get_backend() == "Big_Data_Cloud"
            # a frame-level pin remains valid across a transformation
            df_copy = df + 1
            assert df_copy.get_backend() == "Big_Data_Cloud"
            df_copy.iloc[(0, 0)] = -4
            assert df_copy.get_backend() == "Big_Data_Cloud"
            # unpinning df allows a switch again
            df.unpin_backend(inplace=True)
            assert df.get_backend() == "Big_Data_Cloud"
            df.iloc[(0, 0)] = -5
            assert df.get_backend() == "Small_Data_Local"
            # An in-place set_backend operation clears the pin
            df.move_to("Big_Data_Cloud", inplace=True)
            # check in-place pin/unpin operations
            df.pin_backend(inplace=True)
            assert df.get_backend() == "Big_Data_Cloud"
            df.iloc[(0, 0)] = -6
            assert df.get_backend() == "Big_Data_Cloud"
            df.unpin_backend(inplace=True)
            assert df.get_backend() == "Big_Data_Cloud"
            df.iloc[(0, 0)] = -7
            assert df.get_backend() == "Small_Data_Local"

    @pytest.mark.parametrize(
        "args, kwargs, expected_backend",
        (
            param((), {}, "Small_Data_Local", id="no_args_or_kwargs"),
            param(([1],), {}, "Small_Data_Local", id="small_list_data_in_arg"),
            param(
                (list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS)),),
                {},
                "Small_Data_Local",
                id="big_list_data_in_arg",
            ),
            param((), {"data": [1]}, "Small_Data_Local", id="list_data_in_kwarg"),
            param(
                (),
                {"data": pandas.Series([1])},
                "Small_Data_Local",
                id="series_data_in_kwarg",
            ),
            param(
                (),
                {"query_compiler": CloudForBigDataQC(pandas.DataFrame([0, 1, 2]))},
                "Big_Data_Cloud",
                id="cloud_query_compiler_in_kwarg",
            ),
            param(
                (),
                {"query_compiler": LocalForSmallDataQC(pandas.DataFrame([0, 1, 2]))},
                "Small_Data_Local",
                id="small_query_compiler_in_kwarg",
            ),
        ),
    )
    @pytest.mark.parametrize("data_class", [pd.DataFrame, pd.Series])
    def test___init___with_in_memory_data_uses_native_query_compiler(
        self, args, kwargs, expected_backend, data_class
    ):
        register_function_for_pre_op_switch(
            class_name=data_class.__name__,
            method="__init__",
            backend="Big_Data_Cloud",
        )
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            assert data_class(*args, **kwargs).get_backend() == expected_backend

    @pytest.mark.parametrize("data_class", [pd.DataFrame, pd.Series])
    @backend_test_context(
        test_backend="Big_Data_Cloud", choices=("Big_Data_Cloud", "Small_Data_Local")
    )
    @pytest.mark.parametrize(
        "auto_switch_backend,expected_backend",
        [
            (True, "Small_Data_Local"),
            (False, "Big_Data_Cloud"),
        ],
    )
    def test_auto_switch_backend_disabled_prevents___init__auto_switch(
        self, auto_switch_backend, expected_backend, data_class
    ):
        register_function_for_pre_op_switch(
            class_name=data_class.__name__,
            method="__init__",
            backend="Big_Data_Cloud",
        )
        with config_context(AutoSwitchBackend=auto_switch_backend):
            assert data_class([1, 2, 3]).get_backend() == expected_backend

    @pytest.mark.parametrize(
        "num_input_rows, expected_backend",
        [
            param(
                BIG_DATA_CLOUD_MIN_NUM_ROWS - 1,
                "Small_Data_Local",
            ),
            (BIG_DATA_CLOUD_MIN_NUM_ROWS, "Big_Data_Cloud"),
        ],
    )
    @pytest.mark.parametrize(
        "groupby_class,operation",
        [
            param(
                "DataFrameGroupBy",
                lambda df: df.groupby("col0").apply(lambda x: x + 1),
                id="DataFrameGroupBy",
            ),
            param(
                "SeriesGroupBy",
                lambda df: df.groupby("col0")["col1"].apply(lambda x: x + 1),
                id="SeriesGroupBy",
            ),
        ],
    )
    def test_groupby_apply_switches_for_small_input(
        self, num_input_rows, expected_backend, operation, groupby_class
    ):
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            modin_df, pandas_df = create_test_dfs(
                {
                    "col0": list(range(num_input_rows)),
                    "col1": list(range(1, num_input_rows + 1)),
                }
            )
            assert modin_df.get_backend() == "Big_Data_Cloud"
            assert operation(modin_df).get_backend() == "Big_Data_Cloud"

            register_function_for_pre_op_switch(
                class_name=groupby_class, backend="Big_Data_Cloud", method="apply"
            )

            modin_result = operation(modin_df)
            pandas_result = operation(pandas_df)
            df_equals(modin_result, pandas_result)
            assert modin_result.get_backend() == expected_backend
            if groupby_class == "DataFrameGroupBy":
                assert modin_df.get_backend() == expected_backend
            # The original dataframe does not move with the SeriesGroupBy
            if groupby_class == "SeriesGroupBy":
                assert modin_df.get_backend() == "Big_Data_Cloud"

    def test_T_switches(self):
        # Ensure that calling df.T triggers a switch (GH#7653)
        with backend_test_context(
            test_backend="Big_Data_Cloud",
            choices=("Big_Data_Cloud", "Small_Data_Local"),
        ):
            modin_df, pandas_df = create_test_dfs(
                {"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}
            )
            assert modin_df.get_backend() == "Big_Data_Cloud"
            # Registering transpose should be sufficient to cause T to trigger a switch.
            register_function_for_pre_op_switch(
                class_name="DataFrame", backend="Big_Data_Cloud", method="transpose"
            )
            modin_result = modin_df.T
            pandas_result = pandas_df.T
            df_equals(modin_result, pandas_result)
            assert modin_result.get_backend() == "Small_Data_Local"

    def test_concat_switch_point(self, pico_df, cloud_df, cloud_high_self_df):
        # When concat is a switch point, backends other than those present in arguments should be considered.
        with backend_test_context(
            test_backend="Cloud", choices=(*DEFAULT_TEST_BACKENDS, "Eager")
        ):
            register_function_for_pre_op_switch(
                class_name=None, backend="Cloud", method="concat"
            )
            result = pd.concat([cloud_df, pico_df])
            # concat causes in-place switching
            # the Eager backend will always steal everything
            assert pico_df.get_backend() == "Eager"
            assert cloud_df.get_backend() == "Eager"
            assert result.get_backend() == "Eager"
            pico_df.move_to("Pico", inplace=True)
            cloud_df.move_to("Cloud", inplace=True)
        with backend_test_context(
            test_backend="Cloud_High_Self", choices=("Cloud_High_Self", "Cloud")
        ):
            register_function_for_pre_op_switch(
                class_name=None, backend="Cloud_High_Self", method="concat"
            )
            result = pd.concat([cloud_high_self_df, cloud_high_self_df])
            assert cloud_high_self_df.get_backend() == "Cloud"
            assert result.get_backend() == "Cloud"

    @pytest.mark.parametrize("consider_all_backends", [True, False])
    def test_consider_all_backends_flag(
        self, pico_df, cloud_df, cloud_high_self_df, consider_all_backends
    ):
        # When concat is a switch point, backends other than those present in arguments should be considered
        # if BackendJoinConsiderAllBackends is set.
        with backend_test_context(
            test_backend="Cloud", choices=(*DEFAULT_TEST_BACKENDS, "Eager")
        ), config_context(BackendJoinConsiderAllBackends=consider_all_backends):
            register_function_for_pre_op_switch(
                class_name=None, backend="Cloud", method="concat"
            )
            result = pd.concat([cloud_df, pico_df])
            # concat causes in-place switching
            if consider_all_backends:
                assert pico_df.get_backend() == "Eager"
                assert cloud_df.get_backend() == "Eager"
                assert result.get_backend() == "Eager"
            else:
                assert pico_df.get_backend() == "Cloud"
                assert cloud_df.get_backend() == "Cloud"
                assert result.get_backend() == "Cloud"


def test_move_to_clears_pin():
    # Pin status is reset to false after a set_backend call
    with backend_test_context(
        test_backend="Big_Data_Cloud",
        choices=("Big_Data_Cloud", "Small_Data_Local"),
    ):
        df = pd.DataFrame(list(range(10)))
        # in-place
        df.pin_backend(inplace=True)
        assert df.is_backend_pinned()
        df.move_to("Small_Data_Local", inplace=True)
        assert not df.is_backend_pinned()
        # not in-place
        intermediate = df.pin_backend().move_to("Big_Data_Cloud")
        assert not intermediate.is_backend_pinned()
        assert intermediate.pin_backend().is_backend_pinned()


@pytest.mark.parametrize(
    "pin_backends, expected_backend",
    [
        param(
            [("Small_Data_Local", False), ("Big_Data_Cloud", False)],
            "Small_Data_Local",
            id="no_pin",
        ),  # no backend pinned
        param(
            [("Small_Data_Local", True), ("Big_Data_Cloud", False)],
            "Small_Data_Local",
            id="one_pin",
        ),  # one backend is pinned, so move there
        param(
            [
                ("Big_Data_Cloud", False),
                ("Small_Data_Local", True),
                ("Small_Data_Local", True),
            ],
            "Small_Data_Local",
            id="two_pin",
        ),  # two identical pinned backends
        param(
            [("Small_Data_Local", True), ("Big_Data_Cloud", True)],
            None,
            id="conflict_pin",
        ),  # conflicting pins raises ValueError
    ],
)
def test_concat_with_pin(pin_backends, expected_backend):
    with backend_test_context(
        test_backend="Big_Data_Cloud",
        choices=("Big_Data_Cloud", "Small_Data_Local"),
    ):
        dfs = [
            pd.DataFrame([1] * 10).move_to(backend)._set_backend_pinned(should_pin)
            for backend, should_pin in pin_backends
        ]
        if expected_backend is None:
            with pytest.raises(
                ValueError,
                match="Cannot combine arguments that are pinned to conflicting backends",
            ):
                pd.concat(dfs)
        else:
            result = pd.concat(dfs)
            assert result.is_backend_pinned() == any(
                df.is_backend_pinned() for df in dfs
            )
            assert result.get_backend() == expected_backend
            df_equals(
                result, pandas.concat([pandas.DataFrame([1] * 10)] * len(pin_backends))
            )


@pytest.mark.parametrize(
    "groupby_operation",
    [
        param(
            lambda df: df.groupby("col0"),
            id="DataFrameGroupBy",
        ),
        param(
            lambda df: df.groupby("col0")["col1"],
            id="SeriesGroupBy",
        ),
    ],
)
def test_pin_groupby_in_place(groupby_operation):
    """Test that groupby objects can be pinned with inplace=True."""
    modin_df = pd.DataFrame(
        {
            "col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),
            "col1": list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)),
        }
    )

    groupby_object = groupby_operation(modin_df)
    assert not groupby_object.is_backend_pinned()

    groupby_object.pin_backend(inplace=True)
    assert groupby_object.is_backend_pinned()

    groupby_object.unpin_backend(inplace=True)
    assert not groupby_object.is_backend_pinned()


@pytest.mark.parametrize(
    "groupby_operation",
    [
        param(
            lambda df: df.groupby("col0"),
            id="DataFrameGroupBy",
        ),
        param(
            lambda df: df.groupby("col0")["col1"],
            id="SeriesGroupBy",
        ),
    ],
)
def test_pin_groupby_not_in_place(groupby_operation):
    """Test that pin_backend works with inplace=False for groupby objects."""
    original_groupby = groupby_operation(pd.DataFrame(columns=["col0", "col1"]))
    assert not original_groupby.is_backend_pinned()
    new_groupby = original_groupby.pin_backend(inplace=False)
    assert not original_groupby.is_backend_pinned()
    assert new_groupby.is_backend_pinned()


@pytest.mark.parametrize(
    "groupby_operation",
    [
        param(
            lambda df: df.groupby("col0"),
            id="DataFrameGroupBy",
        ),
        param(
            lambda df: df.groupby("col0")["col1"],
            id="SeriesGroupBy",
        ),
    ],
)
def test_unpin_groupby_not_in_place(groupby_operation):
    """Test that unpin_backend works with inplace=False for groupby objects."""
    original_groupby = groupby_operation(pd.DataFrame(columns=["col0", "col1"]))
    original_groupby.pin_backend(inplace=True)
    assert original_groupby.is_backend_pinned()
    new_groupby = original_groupby.unpin_backend(inplace=False)
    assert original_groupby.is_backend_pinned()
    assert not new_groupby.is_backend_pinned()


@pytest.mark.parametrize(
    "data_type,data_factory,groupby_factory",
    [
        param(
            "DataFrame",
            lambda: pd.DataFrame(
                {
                    "col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),
                    "col1": list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)),
                }
            ),
            lambda obj: obj.groupby("col0"),
            id="DataFrame",
        ),
        param(
            "Series",
            lambda: pd.Series(list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)), name="data"),
            lambda obj: obj.groupby([0] * (BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),
            id="Series",
        ),
    ],
)
def test_groupby_pinning_reflects_parent_object_pin_status(
    data_type, data_factory, groupby_factory
):
    """Test that groupby pinning inherits from parent object (DataFrame/Series) pin status but can be modified independently."""
    modin_obj = data_factory()

    old_groupby_obj = groupby_factory(modin_obj)

    # Initially not pinned
    assert not old_groupby_obj.is_backend_pinned()
    assert not modin_obj.is_backend_pinned()

    # Pin the parent object - new groupby objects should inherit this
    modin_obj.pin_backend(inplace=True)

    # Create a new groupby object after pinning parent object
    new_groupby_obj = groupby_factory(modin_obj)

    # New groupby should inherit the pinned status
    assert new_groupby_obj.is_backend_pinned()
    assert modin_obj.is_backend_pinned()

    # But we can still modify groupby pinning independently
    new_groupby_obj.unpin_backend(inplace=True)

    # Parent object should remain pinned, groupby should be unpinned
    assert not new_groupby_obj.is_backend_pinned()
    assert modin_obj.is_backend_pinned()

    assert not old_groupby_obj.is_backend_pinned()
    old_groupby_obj.pin_backend(inplace=True)
    assert old_groupby_obj.is_backend_pinned()


def test_second_init_only_calls_from_pandas_once_github_issue_7559():
    with config_context(Backend="Big_Data_Cloud"):
        # Create a dataframe once first so that we can initialize the dummy
        # query compiler for the Big_Data_Cloud backend.
        pd.DataFrame([1])
        with mock.patch.object(
            factories.Big_Data_CloudOnNativeFactory.io_cls.query_compiler_cls,
            "from_pandas",
            wraps=factories.Big_Data_CloudOnNativeFactory.io_cls.query_compiler_cls.from_pandas,
        ) as mock_from_pandas:
            pd.DataFrame([1])
            mock_from_pandas.assert_called_once()


def test_native_config():
    qc = NativeQueryCompiler(pandas.DataFrame([0, 1, 2]))

    # Native Query Compiler gets a special configuration
    assert qc._TRANSFER_THRESHOLD == 0
    assert qc._transfer_threshold() == NativePandasTransferThreshold.get()
    assert qc._MAX_SIZE_THIS_ENGINE_CAN_HANDLE == 1
    assert qc._engine_max_size() == NativePandasMaxRows.get()

    oldmax = qc._engine_max_size()
    oldthresh = qc._transfer_threshold()

    with config_context(NativePandasMaxRows=123, NativePandasTransferThreshold=321):
        qc2 = NativeQueryCompiler(pandas.DataFrame([0, 1, 2]))
        assert qc2._transfer_threshold() == 321
        assert qc2._engine_max_size() == 123
        assert qc._engine_max_size() == 123
        assert qc._transfer_threshold() == 321

        # sub class configuration is unchanged
        class AQC(NativeQueryCompiler):
            pass

        subqc = AQC(pandas.DataFrame([0, 1, 2]))
        assert subqc._TRANSFER_THRESHOLD == 0
        assert subqc._MAX_SIZE_THIS_ENGINE_CAN_HANDLE == 1

    assert qc._engine_max_size() == oldmax
    assert qc._transfer_threshold() == oldthresh


def test_cast_metrics(pico_df, cluster_df):
    try:
        count = 0

        def test_handler(metric: str, value) -> None:
            nonlocal count
            if metric.startswith("modin.hybrid.merge"):
                count += 1

        add_metric_handler(test_handler)
        df3 = pd.concat([pico_df, cluster_df], axis=1)
        assert df3.get_backend() == "Cluster"  # result should be on cluster
        assert count == 7
    finally:
        clear_metric_handler(test_handler)


def test_switch_metrics(pico_df, cluster_df):
    with backend_test_context(
        test_backend="Big_Data_Cloud",
        choices=("Big_Data_Cloud", "Small_Data_Local"),
    ):
        try:
            count = 0

            def test_handler(metric: str, value) -> None:
                nonlocal count
                if metric.startswith("modin.hybrid.auto"):
                    count += 1

            add_metric_handler(test_handler)

            register_function_for_pre_op_switch(
                class_name="DataFrame",
                backend="Big_Data_Cloud",
                method="describe",
            )
            df = pd.DataFrame([1] * 10)
            assert df.get_backend() == "Big_Data_Cloud"
            df.describe()
            assert count == 8
        finally:
            clear_metric_handler(test_handler)


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_copy_on_write.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# Tests interactions between a modin frame and a parent or child native pandas frame when one
# object's metadata or data is modified.
# Only valid on the native pandas backend.

import functools

import pandas
import pytest

import modin.pandas as pd
from modin.config import Backend
from modin.config import context as config_context


@pytest.fixture(scope="module", autouse=True)
def mutation_cow_test():
    if Backend.get() != "Pandas":
        pytest.skip(
            reason="tests are only meaningful with pandas backend",
            allow_module_level=True,
        )


@pytest.fixture(scope="function")
def copy_on_write(request):
    # Indirect fixture for toggling copy-on-write when tests are run
    with config_context(
        Backend="Pandas", NativePandasDeepCopy=False
    ), pandas.option_context("mode.copy_on_write", request.param):
        yield request.param


def get_mutation_fixtures(data, **kwargs):
    # Return a fixture that sets the copy_on_write fixture, then passes a modin and native DF together for mutation testing.
    # One parameter combination creates a modin DF from a native DF.
    # The other creates a native DF by calling to_pandas on a modin DF.
    def wrapper(f):
        # Need to create separate functions so parametrized runs don't affect each other.
        def native_first():
            native_input = pandas.DataFrame(data, **kwargs)
            return native_input, pd.DataFrame(native_input)

        def modin_first():
            modin_input = pd.DataFrame(data, **kwargs)
            return modin_input, modin_input.modin.to_pandas()

        @pytest.mark.parametrize("df_factory", [native_first, modin_first])
        @pytest.mark.parametrize(
            "copy_on_write",
            [pytest.param(True, id="CoW"), pytest.param(False, id="no_CoW")],
            indirect=True,
        )
        @functools.wraps(f)
        def test_runner(*args, **kwargs):
            return f(*args, **kwargs)

        return test_runner

    return wrapper


@pytest.mark.parametrize(
    "axis", [pytest.param(0, id="index"), pytest.param(1, id="columns")]
)
@get_mutation_fixtures({"A": [0, 1], "B": [2, 3]})
def test_set_axis_name(axis, copy_on_write, df_factory):
    df1, df2 = df_factory()
    df1.axes[axis].name = "x"
    assert df1.axes[axis].name == "x"
    # Changes do not propagate when copy-on-write is enabled.
    if copy_on_write:
        assert df2.axes[axis].name is None
    else:
        assert df2.axes[axis].name == "x"
    df2.axes[axis].name = "y"
    assert df1.axes[axis].name == ("x" if copy_on_write else "y")
    assert df2.axes[axis].name == "y"


@pytest.mark.parametrize(
    "axis", [pytest.param(0, id="index"), pytest.param(1, id="columns")]
)
@get_mutation_fixtures({"A": [0, 1], "B": [2, 3]}, index=["A", "B"])
def test_rename_axis(axis, copy_on_write, df_factory):
    df1, df2 = df_factory()
    # Renames don't propagate, regardless of CoW.
    df1.rename({"A": "aprime"}, axis=axis, inplace=True)
    assert df1.axes[axis].tolist() == ["aprime", "B"]
    assert df2.axes[axis].tolist() == ["A", "B"]
    df2.rename({"B": "bprime"}, axis=axis, inplace=True)
    assert df1.axes[axis].tolist() == ["aprime", "B"]
    assert df2.axes[axis].tolist() == ["A", "bprime"]


@get_mutation_fixtures({"A": [0, 1], "B": [2, 3]})
def test_locset(copy_on_write, df_factory):
    df1, df2 = df_factory()
    df1.loc[0, "A"] = -1
    assert df1.loc[0, "A"] == -1
    assert df2.loc[0, "A"] == (0 if copy_on_write else -1)
    df2.loc[1, "B"] = 999
    assert df1.loc[1, "B"] == (3 if copy_on_write else 999)
    assert df2.loc[1, "B"] == 999


@get_mutation_fixtures({"A": [0, 1], "B": [2, 3]})
def test_add_column(copy_on_write, df_factory):
    df1, df2 = df_factory()
    df1["C"] = [4, 5]
    assert df1["C"].tolist() == [4, 5]
    # Even with CoW disabled, the new column is not added to df2.
    assert df2.columns.tolist() == ["A", "B"]
    df2["D"] = [6, 7]
    assert df2["D"].tolist() == [6, 7]
    assert df1.columns.tolist() == ["A", "B", "C"]


@get_mutation_fixtures({"A": [0, 1], "B": [2, 3]})
def test_add_row(copy_on_write, df_factory):
    df1, df2 = df_factory()
    df1.loc[9] = [4, 5]
    assert df1.loc[9].tolist() == [4, 5]
    # Even with CoW disabled, the new row is not added to df2.
    assert df2.index.tolist() == [0, 1]
    df2.loc[10] = [6, 7]
    assert df2.loc[10].tolist() == [6, 7]
    assert df1.index.tolist() == [0, 1, 9]


@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@get_mutation_fixtures({"A": [0, 1], "B": [2, 3]})
def test_chained_assignment(copy_on_write, df_factory):
    df1, df2 = df_factory()
    is_assign_noop = copy_on_write and isinstance(df1, pandas.DataFrame)
    df1["A"][0] = -1
    assert df1["A"][0] == (0 if is_assign_noop else -1)
    assert df2["A"][0] == (
        0 if copy_on_write or isinstance(df2, pandas.DataFrame) else -1
    )
    is_assign_noop = copy_on_write and isinstance(df2, pandas.DataFrame)
    df2["B"][1] = 999
    assert df1["B"][1] == (
        3 if copy_on_write or isinstance(df1, pandas.DataFrame) else 999
    )
    assert df2["B"][1] == (3 if is_assign_noop else 999)


@get_mutation_fixtures({"A": [0, 1], "B": [2, 3]})
def test_column_reassign(copy_on_write, df_factory):
    df1, df2 = df_factory()
    df1["A"] = df1["A"] - 1
    assert df1["A"].tolist() == [-1, 0]
    assert df2["A"].tolist() == [0, 1]
    df2["B"] = df2["B"] + 1
    assert df1["B"].tolist() == [2, 3]
    assert df2["B"].tolist() == [3, 4]


@pytest.mark.parametrize("always_deep", [True, False])
def test_explicit_copy(always_deep):
    # Test that making an explicit copy with deep=True actually makes a deep copy.
    with config_context(NativePandasDeepCopy=always_deep):
        df = pd.DataFrame([[0]])
        # We don't really care about behavior with shallow copy, since modin semantics don't line up
        # perfectly with native pandas.
        df_copy = df.copy(deep=True)
        df.loc[0, 0] = -1
        assert df.loc[0, 0] == -1
        assert df_copy.loc[0, 0] == 0


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_default.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import matplotlib
import numpy as np
import pandas
import pytest
from numpy.testing import assert_array_equal

import modin.pandas as pd
from modin.config import NPartitions
from modin.pandas.io import to_pandas
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
    create_test_series_in_defined_mode,
    eval_general_interop,
)
from modin.tests.pandas.utils import (
    default_to_pandas_ignore_string,
    df_equals,
    test_data,
    test_data_diff_dtype,
    test_data_keys,
    test_data_large_categorical_dataframe,
    test_data_values,
)
from modin.tests.test_utils import (
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = [
    pytest.mark.filterwarnings(default_to_pandas_ignore_string),
    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT
    pytest.mark.filterwarnings(
        "ignore:.*bool is now deprecated and will be removed:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:first is deprecated and will be removed:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:last is deprecated and will be removed:FutureWarning"
    ),
]


@pytest.mark.parametrize(
    "op, make_args",
    [
        ("align", lambda df: {"other": df}),
        ("corrwith", lambda df: {"other": df}),
        ("ewm", lambda df: {"com": 0.5}),
        ("from_dict", lambda df: {"data": None}),
        ("from_records", lambda df: {"data": to_pandas(df)}),
        ("hist", lambda df: {"column": "int_col"}),
        ("interpolate", None),
        ("mask", lambda df: {"cond": df != 0}),
        ("pct_change", None),
        ("to_xarray", None),
        ("flags", None),
        ("set_flags", lambda df: {"allows_duplicate_labels": False}),
    ],
)
def test_ops_defaulting_to_pandas(op, make_args, df_mode_pair):
    modin_df1, _ = create_test_df_in_defined_mode(
        test_data_diff_dtype,
        post_fn=lambda df: df.drop(["str_col", "bool_col"], axis=1),
        native=df_mode_pair[0],
    )
    modin_df2, _ = create_test_df_in_defined_mode(
        test_data_diff_dtype,
        post_fn=lambda df: df.drop(["str_col", "bool_col"], axis=1),
        native=df_mode_pair[1],
    )
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_df1)
    ):
        operation = getattr(modin_df1, op)
        if make_args is not None:
            operation(**make_args(modin_df2))
        else:
            try:
                operation()
            # `except` for non callable attributes
            except TypeError:
                pass


@pytest.mark.parametrize(
    "data",
    test_data_values + [test_data_large_categorical_dataframe],
    ids=test_data_keys + ["categorical_ints"],
)
def test_to_numpy(data):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
    assert_array_equal(modin_df.values, pandas_df.values)


def test_array_ufunc():
    modin_df, pandas_df = create_test_df_in_defined_mode([[1, 2], [3, 4]], native=True)
    df_equals(np.sqrt(modin_df), np.sqrt(pandas_df))
    modin_ser, pandas_ser = create_test_series_in_defined_mode(
        [1, 2, 3, 4, 9], native=True
    )
    df_equals(np.sqrt(modin_ser), np.sqrt(pandas_ser))


def test_asfreq(df_mode_pair):
    index = pd.date_range("1/1/2000", periods=4, freq="min")
    series, _ = create_test_series_in_defined_mode(
        [0.0, None, 2.0, 3.0], index=index, native=df_mode_pair[0]
    )
    df, _ = create_test_df_in_defined_mode({"s": series}, native=df_mode_pair[1])
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(df)
    ):
        # We are only testing that this defaults to pandas, so we will just check for
        # the warning
        df.asfreq(freq="30S")


def test_assign(df_mode_pair):
    data = test_data_values[0]

    def assign_one_column(df1, df2):
        df1.assign(new_column=pd.Series(df2.iloc[:, 0]))

    eval_general_interop(data, None, assign_one_column, df_mode_pair)

    def assign_multiple_columns(df1, df2):
        df1.assign(
            new_column=pd.Series(df2.iloc[:, 0]), new_column2=pd.Series(df2.iloc[:, 1])
        )

    eval_general_interop(data, None, assign_multiple_columns, df_mode_pair)


def test_combine_first(df_mode_pair):
    data1 = {"A": [None, 0], "B": [None, 4]}
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
        data1, native=df_mode_pair[0]
    )
    data2 = {"A": [1, 1], "B": [3, 3]}
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        data2, native=df_mode_pair[1]
    )

    df_equals(
        modin_df1.combine_first(modin_df2),
        pandas_df1.combine_first(pandas_df2),
        # https://github.com/modin-project/modin/issues/5959
        check_dtypes=False,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dot(data, df_mode_pair):

    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])
    col_len = len(modin_df.columns)

    # Test series input
    modin_series, pandas_series = create_test_series_in_defined_mode(
        np.arange(col_len),
        index=pandas_df.columns,
        native=df_mode_pair[1],
    )
    modin_result = modin_df.dot(modin_series)
    pandas_result = pandas_df.dot(pandas_series)
    df_equals(modin_result, pandas_result)

    def dot_func(df1, df2):
        return df1.dot(df2.T)

    # modin_result = modin_df.dot(modin_df.T)
    # pandas_result = pandas_df.dot(pandas_df.T)
    # df_equals(modin_result, pandas_result)
    # Test dataframe input
    eval_general_interop(data, None, dot_func, df_mode_pair)

    # Test when input series index doesn't line up with columns
    with pytest.raises(ValueError):
        modin_series_without_index, _ = create_test_series_in_defined_mode(
            np.arange(col_len), native=df_mode_pair[1]
        )
        modin_df.dot(modin_series_without_index)

    # Test case when left dataframe has size (n x 1)
    # and right dataframe has size (1 x n)
    eval_general_interop(pandas_series, None, dot_func, df_mode_pair)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_matmul(data, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])
    col_len = len(modin_df.columns)

    # Test list input
    arr = np.arange(col_len)
    modin_result = modin_df @ arr
    pandas_result = pandas_df @ arr
    df_equals(modin_result, pandas_result)

    # Test bad dimensions
    with pytest.raises(ValueError):
        modin_df @ np.arange(col_len + 10)

    # Test series input
    modin_series, pandas_series = create_test_series_in_defined_mode(
        np.arange(col_len),
        index=pandas_df.columns,
        native=df_mode_pair[1],
    )
    modin_result = modin_df @ modin_series
    pandas_result = pandas_df @ pandas_series
    df_equals(modin_result, pandas_result)

    # Test dataframe input
    def matmul_func(df1, df2):
        return df1 @ df2.T

    # Test dataframe input
    eval_general_interop(data, None, matmul_func, df_mode_pair)

    # Test when input series index doesn't line up with columns
    with pytest.raises(ValueError):
        modin_series_without_index, _ = create_test_series_in_defined_mode(
            np.arange(col_len), native=df_mode_pair[1]
        )
        modin_df @ modin_series_without_index


@pytest.mark.parametrize("data", [test_data["int_data"]], ids=["int_data"])
@pytest.mark.parametrize(
    "index",
    [
        pytest.param(lambda _, df: df.columns[0], id="single_index_col"),
        pytest.param(
            lambda _, df: [*df.columns[0:2], *df.columns[-7:-4]],
            id="multiple_index_cols",
        ),
        pytest.param(None, id="default_index"),
    ],
)
@pytest.mark.parametrize(
    "columns",
    [
        pytest.param(lambda _, df: df.columns[len(df.columns) // 2], id="single_col"),
        pytest.param(
            lambda _, df: [
                *df.columns[(len(df.columns) // 2) : (len(df.columns) // 2 + 4)],
                df.columns[-7],
            ],
            id="multiple_cols",
        ),
        pytest.param(None, id="default_columns"),
    ],
)
@pytest.mark.parametrize(
    "values",
    [
        pytest.param(lambda _, df: df.columns[-1], id="single_value_col"),
        pytest.param(lambda _, df: df.columns[-4:-1], id="multiple_value_cols"),
    ],
)
@pytest.mark.parametrize(
    "aggfunc",
    [
        pytest.param(lambda df, _: np.mean(df), id="callable_tree_reduce_func"),
        pytest.param("mean", id="tree_reduce_func"),
        pytest.param("nunique", id="full_axis_func"),
    ],
)
def test_pivot_table_data(data, index, columns, values, aggfunc, request, df_mode_pair):
    if (
        "callable_tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "callable_tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "full_axis_func-single_value_col-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
        or "full_axis_func-multiple_value_cols-multiple_cols-multiple_index_cols"
        in request.node.callspec.id
    ):
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7011")

    expected_exception = None
    if "default_columns-default_index" in request.node.callspec.id:
        expected_exception = ValueError("No group keys passed!")
    elif (
        "callable_tree_reduce_func" in request.node.callspec.id
        and "int_data" in request.node.callspec.id
    ):
        expected_exception = TypeError("'numpy.float64' object is not callable")

    eval_general_interop(
        data,
        None,
        operation=lambda df, _, *args, **kwargs: df.pivot_table(
            *args, **kwargs
        ).sort_index(axis=int(index is not None)),
        df_mode_pair=df_mode_pair,
        index=index,
        columns=columns,
        values=values,
        aggfunc=aggfunc,
        expected_exception=expected_exception,
    )


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_default_to_pandas_without_warnings.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# While other modin backends raise a warning when defaulting to pandas, it does not make sense to
# do so when we're running on the native pandas backend already. These tests ensure such warnings
# are not raised with the pandas backend.

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import Backend
from modin.tests.pandas.utils import df_equals

pytestmark = [
    pytest.mark.skipif(
        Backend.get() != "Pandas",
        reason="warnings only suppressed on native pandas backend",
        allow_module_level=True,
    ),
    # Error if a default to pandas warning is detected.
    pytest.mark.filterwarnings("error:is not supported by NativeOnNative:UserWarning"),
]


def test_crosstab_no_warning():
    # Example from pandas docs
    # https://pandas.pydata.org/docs/reference/api/pandas.crosstab.html
    a = np.array(
        ["foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar", "foo", "foo", "foo"],
        dtype=object,
    )
    b = np.array(
        ["one", "one", "one", "two", "one", "one", "one", "two", "two", "two", "one"],
        dtype=object,
    )
    c = np.array(
        [
            "dull",
            "dull",
            "shiny",
            "dull",
            "dull",
            "shiny",
            "shiny",
            "dull",
            "shiny",
            "shiny",
            "shiny",
        ],
        dtype=object,
    )
    df_equals(
        pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]),
        pandas.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]),
    )


def test_json_normalize_no_warning():
    # Example from pandas docs
    # https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html
    data = [
        {"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
        {"name": {"given": "Mark", "family": "Regner"}},
        {"id": 2, "name": "Faye Raker"},
    ]
    df_equals(pd.json_normalize(data), pandas.json_normalize(data))


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_general.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas
import pytest

import modin.pandas as pd
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
    create_test_series_in_defined_mode,
)
from modin.tests.pandas.utils import default_to_pandas_ignore_string, df_equals

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


def test_cut(df_mode_pair):
    modin_x, pandas_x = create_test_series_in_defined_mode(
        [1, 3], native=df_mode_pair[0]
    )
    modin_bins, pandas_bins = create_test_series_in_defined_mode(
        [0, 2], native=df_mode_pair[1]
    )

    def operation(*, lib, x, bins):
        return lib.cut(x, bins)

    df_equals(
        operation(lib=pd, x=modin_x, bins=modin_bins),
        operation(lib=pandas, x=pandas_x, bins=pandas_bins),
    )


def test_qcut(df_mode_pair):
    modin_x, pandas_x = create_test_series_in_defined_mode(
        [1, 2, 3, 4], native=df_mode_pair[0]
    )
    modin_quantiles, pandas_quantiles = create_test_series_in_defined_mode(
        [0, 0.5, 1], native=df_mode_pair[1]
    )

    def operation(*, lib, x, quantiles):
        return lib.qcut(x, quantiles)

    df_equals(
        operation(lib=pd, x=modin_x, quantiles=modin_quantiles),
        operation(lib=pandas, x=pandas_x, quantiles=pandas_quantiles),
    )


def test_merge_ordered(df_mode_pair):
    modin_left, pandas_left = create_test_df_in_defined_mode(
        {
            "key": ["a", "c", "e", "a", "c", "e"],
            "lvalue": [1, 2, 3, 1, 2, 3],
            "group": ["a", "a", "a", "b", "b", "b"],
        },
        native=df_mode_pair[0],
    )
    modin_right, pandas_right = create_test_df_in_defined_mode(
        {"key": ["b", "c", "d"], "rvalue": [1, 2, 3]},
        native=df_mode_pair[1],
    )

    def operation(*, lib, left, right):
        return lib.merge_ordered(left, right, fill_method="ffill", left_by="group")

    df_equals(
        operation(lib=pd, left=modin_left, right=modin_right),
        operation(lib=pandas, left=pandas_left, right=pandas_right),
    )


def test_merge_asof(df_mode_pair):
    modin_left, pandas_left = create_test_df_in_defined_mode(
        {"a": [1, 5, 10], "left_val": ["a", "b", "c"]}, native=df_mode_pair[0]
    )
    modin_right, pandas_right = create_test_df_in_defined_mode(
        {"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]},
        native=df_mode_pair[1],
    )

    def operation(*, lib, left, right):
        return lib.merge_asof(left, right, on="a")

    df_equals(
        operation(lib=pd, left=modin_left, right=modin_right),
        operation(lib=pandas, left=pandas_left, right=pandas_right),
    )


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_indexing.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
from itertools import product

import matplotlib
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
    create_test_series_in_defined_mode,
    eval_general_interop,
)
from modin.tests.pandas.utils import (
    RAND_HIGH,
    RAND_LOW,
    assert_dtypes_equal,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    test_data,
    test_data_keys,
    test_data_values,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
# of defaulting to pandas.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


def eval_setitem(md_df, pd_df, value, col=None, loc=None, expected_exception=None):
    if loc is not None:
        col = pd_df.columns[loc]

    value_getter = value if callable(value) else (lambda *args, **kwargs: value)

    eval_general(
        md_df,
        pd_df,
        lambda df: df.__setitem__(col, value_getter(df)),
        __inplace__=True,
        expected_exception=expected_exception,
    )
    for pair in list(product([True, False], repeat=2)):
        eval_general_interop(
            pd_df,
            None,
            lambda df1, df2: df1.__setitem__(col, value_getter(df2)),
            pair,
            __inplace__=True,
            expected_exception=expected_exception,
        )


def eval_loc(md_df, pd_df, value, key):
    if isinstance(value, tuple):
        assert len(value) == 2
        # case when value for pandas different
        md_value, pd_value = value
    else:
        md_value, pd_value = value, value

    eval_general(
        md_df,
        pd_df,
        lambda df: df.loc.__setitem__(
            key, pd_value if isinstance(df, pandas.DataFrame) else md_value
        ),
        __inplace__=True,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "key_func",
    [
        # test for the case from https://github.com/modin-project/modin/issues/4308
        lambda df: "non_existing_column",
        lambda df: df.columns[0],
        lambda df: df.index,
        lambda df: [df.index, df.columns[0]],
        lambda df: (
            pandas.Series(list(range(len(df.index))))
            if isinstance(df, pandas.DataFrame)
            else pd.Series(list(range(len(df))))
        ),
    ],
    ids=[
        "non_existing_column",
        "first_column_name",
        "original_index",
        "list_of_index_and_first_column_name",
        "series_of_integers",
    ],
)
@pytest.mark.parametrize(
    "drop_kwargs",
    [{"drop": True}, {"drop": False}, {}],
    ids=["drop_True", "drop_False", "no_drop_param"],
)
def test_set_index(data, key_func, drop_kwargs, request, df_mode_pair):
    if (
        "list_of_index_and_first_column_name" in request.node.name
        and "drop_False" in request.node.name
    ):
        pytest.xfail(
            reason="KeyError: https://github.com/modin-project/modin/issues/5636"
        )
    expected_exception = None
    if "non_existing_column" in request.node.callspec.id:
        expected_exception = KeyError(
            "None of ['non_existing_column'] are in the columns"
        )

    eval_general_interop(
        data,
        None,
        lambda df1, df2: df1.set_index(key_func(df2), **drop_kwargs),
        expected_exception=expected_exception,
        df_mode_pair=df_mode_pair,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_loc(data, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])

    indices = [i % 3 == 0 for i in range(len(modin_df.index))]
    columns = [i % 5 == 0 for i in range(len(modin_df.columns))]

    # Key is a Modin or pandas series of booleans
    series1, _ = create_test_series_in_defined_mode(indices, native=df_mode_pair[0])
    series2, _ = create_test_series_in_defined_mode(
        columns, index=modin_df.columns, native=df_mode_pair[0]
    )
    df_equals(
        modin_df.loc[series1, series2],
        pandas_df.loc[
            pandas.Series(indices), pandas.Series(columns, index=modin_df.columns)
        ],
    )


@pytest.mark.parametrize("left, right", [(2, 1), (6, 1), (lambda df: 70, 1), (90, 70)])
def test_loc_insert_row(left, right, df_mode_pair):
    # This test case comes from
    # https://github.com/modin-project/modin/issues/3764
    data = [[1, 2, 3], [4, 5, 6]]

    def _test_loc_rows(df1, df2):
        df1.loc[left] = df2.loc[right]
        return df1

    expected_exception = None
    if right == 70:
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/7024")

    eval_general_interop(
        data,
        None,
        _test_loc_rows,
        expected_exception=expected_exception,
        df_mode_pair=df_mode_pair,
    )


@pytest.fixture
def loc_iter_dfs_interop(df_mode_pair):
    columns = ["col1", "col2", "col3"]
    index = ["row1", "row2", "row3"]
    md_df1, pd_df1 = create_test_df_in_defined_mode(
        {col: ([idx] * len(index)) for idx, col in enumerate(columns)},
        columns=columns,
        index=index,
        native=df_mode_pair[0],
    )
    md_df2, pd_df2 = create_test_df_in_defined_mode(
        {col: ([idx] * len(index)) for idx, col in enumerate(columns)},
        columns=columns,
        index=index,
        native=df_mode_pair[1],
    )
    return md_df1, pd_df1, md_df2, pd_df2


@pytest.mark.parametrize("reverse_order", [False, True])
@pytest.mark.parametrize("axis", [0, 1])
def test_loc_iter_assignment(loc_iter_dfs_interop, reverse_order, axis):
    if reverse_order and axis:
        pytest.xfail(
            "Due to internal sorting of lookup values assignment order is lost, see GH-#2552"
        )

    md_df1, pd_df1, md_df2, pd_df2 = loc_iter_dfs_interop

    select = [slice(None), slice(None)]
    select[axis] = sorted(pd_df1.axes[axis][:-1], reverse=reverse_order)
    select = tuple(select)

    pd_df1.loc[select] = pd_df1.loc[select] + pd_df2.loc[select]
    md_df1.loc[select] = md_df1.loc[select] + md_df2.loc[select]
    df_equals(md_df1, pd_df1)


def test_loc_series(df_mode_pair):
    md_df1, pd_df1 = create_test_df_in_defined_mode(
        {"a": [1, 2], "b": [3, 4]}, native=df_mode_pair[0]
    )
    md_df2, pd_df2 = create_test_df_in_defined_mode(
        {"a": [1, 2], "b": [3, 4]}, native=df_mode_pair[1]
    )

    pd_df1.loc[pd_df2["a"] > 1, "b"] = np.log(pd_df1["b"])
    md_df1.loc[md_df2["a"] > 1, "b"] = np.log(md_df1["b"])

    df_equals(pd_df1, md_df1)


def test_reindex_like(df_mode_pair):
    o_data = [
        [24.3, 75.7, "high"],
        [31, 87.8, "high"],
        [22, 71.6, "medium"],
        [35, 95, "medium"],
    ]
    o_columns = ["temp_celsius", "temp_fahrenheit", "windspeed"]
    o_index = pd.date_range(start="2014-02-12", end="2014-02-15", freq="D")
    new_data = [[28, "low"], [30, "low"], [35.1, "medium"]]
    new_columns = ["temp_celsius", "windspeed"]
    new_index = pd.DatetimeIndex(["2014-02-12", "2014-02-13", "2014-02-15"])
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
        o_data,
        columns=o_columns,
        index=o_index,
        native=df_mode_pair[0],
    )
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        new_data,
        columns=new_columns,
        index=new_index,
        native=df_mode_pair[1],
    )
    modin_result = modin_df2.reindex_like(modin_df1)
    pandas_result = pandas_df2.reindex_like(pandas_df1)
    df_equals(modin_result, pandas_result)


def test_reindex_multiindex(df_mode_pair):
    data1, data2 = np.random.randint(1, 20, (5, 5)), np.random.randint(10, 25, 6)
    index = np.array(["AUD", "BRL", "CAD", "EUR", "INR"])
    pandas_midx = pandas.MultiIndex.from_product(
        [["Bank_1", "Bank_2"], ["AUD", "CAD", "EUR"]], names=["Bank", "Curency"]
    )
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
        data=data1, index=index, columns=index, native=df_mode_pair[0]
    )
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        data=data2, index=pandas_midx, native=df_mode_pair[1]
    )

    modin_df2.columns, pandas_df2.columns = ["Notional"], ["Notional"]
    md_midx = pd.MultiIndex.from_product([modin_df2.index.levels[0], modin_df1.index])
    pd_midx = pandas.MultiIndex.from_product(
        [pandas_df2.index.levels[0], pandas_df1.index]
    )
    # reindex without axis, index, or columns
    modin_result = modin_df1.reindex(md_midx, fill_value=0)
    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0)
    df_equals(modin_result, pandas_result)
    # reindex with only axis
    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0)
    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0)
    df_equals(modin_result, pandas_result)
    # reindex with axis and level
    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0, level=0)
    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0, level=0)
    df_equals(modin_result, pandas_result)


def test_getitem_empty_mask(df_mode_pair):
    # modin-project/modin#517
    modin_frames = []
    pandas_frames = []
    data1 = np.random.randint(0, 100, size=(100, 4))
    mdf1, pdf1 = create_test_df_in_defined_mode(
        data1, columns=list("ABCD"), native=df_mode_pair[0]
    )

    modin_frames.append(mdf1)
    pandas_frames.append(pdf1)

    data2 = np.random.randint(0, 100, size=(100, 4))
    mdf2, pdf2 = create_test_df_in_defined_mode(
        data2, columns=list("ABCD"), native=df_mode_pair[1]
    )
    modin_frames.append(mdf2)
    pandas_frames.append(pdf2)

    data3 = np.random.randint(0, 100, size=(100, 4))
    mdf3, pdf3 = create_test_df_in_defined_mode(
        data3, columns=list("ABCD"), native=df_mode_pair[0]
    )
    modin_frames.append(mdf3)
    pandas_frames.append(pdf3)

    modin_data = pd.concat(modin_frames)
    pandas_data = pandas.concat(pandas_frames)
    df_equals(
        modin_data[[False for _ in modin_data.index]],
        pandas_data[[False for _ in modin_data.index]],
    )


def test___setitem__mask(df_mode_pair):
    # DataFrame mask:
    data = test_data["int_data"]
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(data, native=df_mode_pair[0])
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(data, native=df_mode_pair[0])

    mean = int((RAND_HIGH + RAND_LOW) / 2)
    pandas_df1[pandas_df2 > mean] = -50
    modin_df1[modin_df2 > mean] = -50

    df_equals(modin_df1, pandas_df1)


@pytest.mark.parametrize(
    "data",
    [
        {},
        {"id": [], "max_speed": [], "health": []},
        {"id": [1], "max_speed": [2], "health": [3]},
        {"id": [4, 40, 400], "max_speed": [111, 222, 333], "health": [33, 22, 11]},
    ],
    ids=["empty_frame", "empty_cols", "1_length_cols", "2_length_cols"],
)
@pytest.mark.parametrize(
    "value",
    [[11, 22], [11, 22, 33]],
    ids=["2_length_val", "3_length_val"],
)
@pytest.mark.parametrize("convert_to_series", [False, True])
@pytest.mark.parametrize("new_col_id", [123, "new_col"], ids=["integer", "string"])
def test_setitem_on_empty_df(data, value, convert_to_series, new_col_id, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])

    def applyier(df):
        if convert_to_series:
            converted_value = (
                pandas.Series(value)
                if isinstance(df, pandas.DataFrame)
                else create_test_series_in_defined_mode(value, native=df_mode_pair[1])[
                    1
                ]
            )
        else:
            converted_value = value
        df[new_col_id] = converted_value
        return df

    expected_exception = None
    if not convert_to_series:
        values_length = len(value)
        index_length = len(pandas_df.index)
        expected_exception = ValueError(
            f"Length of values ({values_length}) does not match length of index ({index_length})"
        )

    eval_general(
        modin_df,
        pandas_df,
        applyier,
        expected_exception=expected_exception,
        check_for_execution_propagation=False,
        no_check_for_execution_propagation_reason=(
            "https://github.com/modin-project/modin/issues/7428"
        ),
        __inplace__=True,
    )
    # Because of https://github.com/modin-project/modin/issues/7600,
    # df_equals does not check dtypes equality for empty frames.
    assert_dtypes_equal(modin_df, pandas_df)


def test_setitem_on_empty_df_4407(df_mode_pair):
    data = {}
    index = pd.date_range(end="1/1/2018", periods=0, freq="D")
    column = pd.date_range(end="1/1/2018", periods=1, freq="h")[0]
    modin_df, pandas_df = create_test_df_in_defined_mode(
        data, columns=index, native=df_mode_pair[0]
    )
    modin_ser, pandas_ser = create_test_series_in_defined_mode(
        [1], native=df_mode_pair[1]
    )
    modin_df[column] = modin_ser
    pandas_df[column] = pandas_ser

    df_equals(modin_df, pandas_df)
    assert modin_df.columns.freq == pandas_df.columns.freq


def test_setitem_2d_insertion(df_mode_pair):
    def build_value_picker(modin_value, pandas_value):
        """Build a function that returns either Modin or pandas DataFrame depending on the passed frame."""
        return lambda source_df, *args, **kwargs: (
            modin_value
            if isinstance(source_df, (pd.DataFrame, pd.Series))
            else pandas_value
        )

    modin_df, pandas_df = create_test_df_in_defined_mode(
        test_data["int_data"], native=df_mode_pair[0]
    )

    # Easy case - key and value.columns are equal
    modin_value, pandas_value = create_test_df_in_defined_mode(
        {
            "new_value1": np.arange(len(modin_df)),
            "new_value2": np.arange(len(modin_df)),
        },
        native=df_mode_pair[1],
    )
    eval_setitem(
        modin_df,
        pandas_df,
        build_value_picker(modin_value, pandas_value),
        col=["new_value1", "new_value2"],
    )

    # Key and value.columns have equal values but in different order
    new_columns = ["new_value3", "new_value4"]
    modin_value.columns, pandas_value.columns = new_columns, new_columns
    eval_setitem(
        modin_df,
        pandas_df,
        build_value_picker(modin_value, pandas_value),
        col=["new_value4", "new_value3"],
    )

    # Key and value.columns have different values
    new_columns = ["new_value5", "new_value6"]
    modin_value.columns, pandas_value.columns = new_columns, new_columns
    eval_setitem(
        modin_df,
        pandas_df,
        build_value_picker(modin_value, pandas_value),
        col=["__new_value5", "__new_value6"],
    )

    # Key and value.columns have different lengths, testing that both raise the same exception
    eval_setitem(
        modin_df,
        pandas_df,
        build_value_picker(modin_value.iloc[:, [0]], pandas_value.iloc[:, [0]]),
        col=["new_value7", "new_value8"],
        expected_exception=ValueError("Columns must be same length as key"),
    )


@pytest.mark.parametrize("does_value_have_different_columns", [True, False])
def test_setitem_2d_update(does_value_have_different_columns, df_mode_pair):
    def test(dfs, iloc):
        """Update columns on the given numeric indices."""
        df1, df2 = dfs
        cols1 = df1.columns[iloc].tolist()
        cols2 = df2.columns[iloc].tolist()
        df1[cols1] = df2[cols2]
        return df1

    modin_df, pandas_df = create_test_df_in_defined_mode(
        test_data["int_data"], native=df_mode_pair[0]
    )
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        test_data["int_data"], native=df_mode_pair[1]
    )
    modin_df2 *= 10
    pandas_df2 *= 10

    if does_value_have_different_columns:
        new_columns = [f"{col}_new" for col in modin_df.columns]
        modin_df2.columns = new_columns
        pandas_df2.columns = new_columns

    modin_dfs = (modin_df, modin_df2)
    pandas_dfs = (pandas_df, pandas_df2)

    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 2])
    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, -1])
    eval_general(
        modin_dfs, pandas_dfs, test, iloc=slice(1, None)
    )  # (start=1, stop=None)
    eval_general(
        modin_dfs, pandas_dfs, test, iloc=slice(None, -2)
    )  # (start=None, stop=-2)
    eval_general(
        modin_dfs,
        pandas_dfs,
        test,
        iloc=[0, 1, 5, 6, 9, 10, -2, -1],
    )
    eval_general(
        modin_dfs,
        pandas_dfs,
        test,
        iloc=[5, 4, 0, 10, 1, -1],
    )
    eval_general(
        modin_dfs, pandas_dfs, test, iloc=slice(None, None, 2)
    )  # (start=None, stop=None, step=2)


def test___setitem__single_item_in_series(df_mode_pair):
    # Test assigning a single item in a Series for issue
    # https://github.com/modin-project/modin/issues/3860
    modin_series1, pandas_series1 = create_test_series_in_defined_mode(
        99, native=df_mode_pair[0]
    )
    modin_series2, pandas_series2 = create_test_series_in_defined_mode(
        100, native=df_mode_pair[1]
    )
    modin_series1[:1] = modin_series2
    pandas_series1[:1] = pandas_series2
    df_equals(modin_series1, pandas_series1)


@pytest.mark.parametrize(
    "value",
    [
        1,
        np.int32(1),
        1.0,
        "str val",
        pandas.Timestamp("1/4/2018"),
        np.datetime64(0, "ms"),
        True,
    ],
)
def test_loc_boolean_assignment_scalar_dtypes(value, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(
        {
            "a": [1, 2, 3],
            "b": [3.0, 5.0, 6.0],
            "c": ["a", "b", "c"],
            "d": [1.0, "c", 2.0],
            "e": pandas.to_datetime(["1/1/2018", "1/2/2018", "1/3/2018"]),
            "f": [True, False, True],
        },
        native=df_mode_pair[1],
    )
    modin_idx, pandas_idx = create_test_series_in_defined_mode(
        [False, True, True], native=df_mode_pair[1]
    )

    modin_df.loc[modin_idx] = value
    pandas_df.loc[pandas_idx] = value
    df_equals(modin_df, pandas_df)


# This is a very subtle bug that comes from:
# https://github.com/modin-project/modin/issues/4945
def test_lazy_eval_index(df_mode_pair):
    data = {"col0": [0, 1]}

    def func(df1, df2):
        df_copy = df1[df2["col0"] < 6].copy()
        # The problem here is that the index is not copied over so it needs
        # to get recomputed at some point. Our implementation of __setitem__
        # requires us to build a mask and insert the value from the right
        # handside into the new DataFrame. However, it's possible that we
        # won't have any new partitions, so we will end up computing an empty
        # index.
        df_copy["col0"] = df_copy["col0"].apply(lambda x: x + 1)
        return df_copy

    eval_general_interop(data, None, func, df_mode_pair=df_mode_pair)


def test_index_of_empty_frame(df_mode_pair):
    # Test on an empty frame created by user

    # Test on an empty frame produced by Modin's logic
    data = test_data_values[0]
    md_df1, pd_df1 = create_test_df_in_defined_mode(
        data,
        index=pandas.RangeIndex(len(next(iter(data.values()))), name="index name"),
        native=df_mode_pair[0],
    )
    md_df2, pd_df2 = create_test_df_in_defined_mode(
        data,
        index=pandas.RangeIndex(len(next(iter(data.values()))), name="index name"),
        native=df_mode_pair[1],
    )

    md_res = md_df1.query(f"{md_df2.columns[0]} > {RAND_HIGH}")
    pd_res = pd_df1.query(f"{pd_df2.columns[0]} > {RAND_HIGH}")

    assert md_res.empty and pd_res.empty
    df_equals(md_res.index, pd_res.index)


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_iter.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import warnings

import matplotlib
import pytest

import modin.pandas as pd
from modin.config import NPartitions
from modin.pandas.utils import SET_DATAFRAME_ATTRIBUTE_WARNING
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
    create_test_series_in_defined_mode,
)
from modin.tests.pandas.utils import df_equals, eval_general

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")


def test___setattr__mutating_column(df_mode_pair):
    # Use case from issue #4577
    modin_df, pandas_df = create_test_df_in_defined_mode(
        [[1]], columns=["col0"], native=df_mode_pair[0]
    )
    # Replacing a column with a list should mutate the column in place.
    pandas_df.col0 = [3]
    modin_df.col0 = [3]
    modin_ser, pandas_ser = create_test_series_in_defined_mode(
        [3], native=df_mode_pair[1]
    )
    df_equals(modin_df, pandas_df)
    # Check that the col0 attribute reflects the value update.
    df_equals(modin_df.col0, pandas_df.col0)

    pandas_df.col0 = pandas_ser
    modin_df.col0 = modin_ser

    # Check that the col0 attribute reflects this update
    df_equals(modin_df, pandas_df)

    pandas_df.loc[0, "col0"] = 4
    modin_df.loc[0, "col0"] = 4

    # Check that the col0 attribute reflects update via loc
    df_equals(modin_df, pandas_df)
    assert modin_df.col0.equals(modin_df["col0"])

    # Check that attempting to add a new col via attributes raises warning
    # and adds the provided list as a new attribute and not a column.
    with pytest.warns(
        UserWarning,
        match=SET_DATAFRAME_ATTRIBUTE_WARNING,
    ):
        modin_df.col1 = [4]

    with warnings.catch_warnings():
        warnings.filterwarnings(
            action="error",
            message=SET_DATAFRAME_ATTRIBUTE_WARNING,
        )
        modin_df.col1 = [5]
        modin_df.new_attr = 6
        modin_df.col0 = 7

    assert "new_attr" in dir(
        modin_df
    ), "Modin attribute was not correctly added to the df."
    assert (
        "new_attr" not in modin_df
    ), "New attribute was not correctly added to columns."
    assert modin_df.new_attr == 6, "Modin attribute value was set incorrectly."
    assert isinstance(
        modin_df.col0, pd.Series
    ), "Scalar was not broadcasted properly to an existing column."


def test_isin_with_modin_objects(df_mode_pair):
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
        {"a": [1, 2], "b": [3, 4]}, native=df_mode_pair[0]
    )
    modin_series, pandas_series = create_test_series_in_defined_mode(
        [1, 4, 5, 6], native=df_mode_pair[1]
    )

    eval_general(
        (modin_df1, modin_series),
        (pandas_df1, pandas_series),
        lambda srs: srs[0].isin(srs[1]),
    )

    modin_df2 = modin_series.to_frame("a")
    pandas_df2 = pandas_series.to_frame("a")

    eval_general(
        (modin_df1, modin_df2),
        (pandas_df1, pandas_df2),
        lambda srs: srs[0].isin(srs[1]),
    )

    # Check case when indices are not matching
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
        {"a": [1, 2], "b": [3, 4]},
        index=[10, 11],
        native=df_mode_pair[0],
    )

    eval_general(
        (modin_df1, modin_series),
        (pandas_df1, pandas_series),
        lambda srs: srs[0].isin(srs[1]),
    )
    eval_general(
        (modin_df1, modin_df2),
        (pandas_df1, pandas_df2),
        lambda srs: srs[0].isin(srs[1]),
    )


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_join_sort.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import matplotlib
import numpy as np
import pandas
import pytest
from pytest import param

import modin.pandas as pd
from modin.config import NPartitions
from modin.pandas.io import to_pandas
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
    create_test_series_in_defined_mode,
    eval_general_interop,
)
from modin.tests.pandas.utils import (
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    random_state,
    test_data_keys,
    test_data_values,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)

# Initialize env for storage format detection in @pytest.mark.*
pd.DataFrame()


def df_equals_and_sort(df1, df2):
    """Sort dataframe's rows and run ``df_equals()`` for them."""
    df1 = df1.sort_values(by=df1.columns.tolist(), ignore_index=True)
    df2 = df2.sort_values(by=df2.columns.tolist(), ignore_index=True)
    df_equals(df1, df2)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_combine(data, df_mode_pair):
    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
        data, native=df_mode_pair[0]
    )
    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
        data, native=df_mode_pair[1]
    )
    modin_df_1.combine(
        modin_df_2 + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2
    )
    pandas_df_1.combine(
        pandas_df_2 + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2
    )


@pytest.mark.parametrize(
    "test_data, test_data2",
    [
        (
            np.random.randint(0, 100, size=(64, 64)),
            np.random.randint(0, 100, size=(128, 64)),
        ),
        (
            np.random.randint(0, 100, size=(128, 64)),
            np.random.randint(0, 100, size=(64, 64)),
        ),
        (
            np.random.randint(0, 100, size=(64, 64)),
            np.random.randint(0, 100, size=(64, 128)),
        ),
        (
            np.random.randint(0, 100, size=(64, 128)),
            np.random.randint(0, 100, size=(64, 64)),
        ),
    ],
)
def test_join(test_data, test_data2, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"),
        native=df_mode_pair[0],
    )
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"),
        native=df_mode_pair[1],
    )

    hows = ["inner", "left", "right", "outer"]
    ons = ["col33", "col34"]
    sorts = [False, True]
    assert len(ons) == len(sorts), "the loop below is designed for this condition"
    for i in range(len(hows)):
        for j in range(len(ons)):
            modin_result = modin_df.join(
                modin_df2,
                how=hows[i],
                on=ons[j],
                sort=sorts[j],
                lsuffix="_caller",
                rsuffix="_other",
            )
            pandas_result = pandas_df.join(
                pandas_df2,
                how=hows[i],
                on=ons[j],
                sort=sorts[j],
                lsuffix="_caller",
                rsuffix="_other",
            )
            if sorts[j]:
                # sorting in `join` is implemented through range partitioning technique
                # therefore the order of the rows after it does not match the pandas,
                # so additional sorting is needed in order to get the same result as for pandas
                df_equals_and_sort(modin_result, pandas_result)
            else:
                df_equals(modin_result, pandas_result)

    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 0, 1],
        "col4": [2, 4, 5, 6],
    }

    modin_df = pd.DataFrame(frame_data)
    pandas_df = pandas.DataFrame(frame_data)

    frame_data2 = {"col5": [0], "col6": [1]}
    modin_df2 = pd.DataFrame(frame_data2)
    pandas_df2 = pandas.DataFrame(frame_data2)

    join_types = ["left", "right", "outer", "inner"]
    for how in join_types:
        modin_join = modin_df.join(modin_df2, how=how)
        pandas_join = pandas_df.join(pandas_df2, how=how)
        df_equals(modin_join, pandas_join)

    frame_data3 = {"col7": [1, 2, 3, 5, 6, 7, 8]}

    modin_df3 = pd.DataFrame(frame_data3)
    pandas_df3 = pandas.DataFrame(frame_data3)

    join_types = ["left", "outer", "inner"]
    for how in join_types:
        modin_join = modin_df.join([modin_df2, modin_df3], how=how)
        pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)
        df_equals(modin_join, pandas_join)


def test_join_cross_6786(df_mode_pair):
    data = [[7, 8, 9], [10, 11, 12]]
    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
        data, columns=["x", "y", "z"], native=df_mode_pair[0]
    )
    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
        data, columns=["x", "y", "z"], native=df_mode_pair[1]
    )
    modin_join = modin_df_1.join(
        modin_df_2[["x"]].set_axis(["p", "q"], axis=0), how="cross", lsuffix="p"
    )
    pandas_join = pandas_df_1.join(
        pandas_df_2[["x"]].set_axis(["p", "q"], axis=0), how="cross", lsuffix="p"
    )
    df_equals(modin_join, pandas_join)


@pytest.mark.parametrize(
    "test_data, test_data2",
    [
        (
            np.random.randint(0, 100, size=(64, 64)),
            np.random.randint(0, 100, size=(128, 64)),
        ),
        (
            np.random.randint(0, 100, size=(128, 64)),
            np.random.randint(0, 100, size=(64, 64)),
        ),
        (
            np.random.randint(0, 100, size=(64, 64)),
            np.random.randint(0, 100, size=(64, 128)),
        ),
        (
            np.random.randint(0, 100, size=(64, 128)),
            np.random.randint(0, 100, size=(64, 64)),
        ),
    ],
)
@pytest.mark.parametrize(
    "merge_with_on, merge_with_left_on_right_on",
    [
        param(
            lambda df1, df2, *, lib, how, sort, on=None: df1.merge(
                df2, how=how, on=on, sort=sort
            ),
            lambda df1, df2, *, lib, how, sort: df1.merge(
                df2, how=how, left_on="key", right_on="key", sort=sort
            ),
            id="merge_with_dataframe_method",
        ),
        param(
            lambda df1, df2, *, lib, how, sort, on=None: lib.merge(
                df1,
                df2,
                how=how,
                on=on,
                sort=sort,
            ),
            lambda df1, df2, *, lib, how, sort: lib.merge(
                df1, df2, how=how, left_on="key", right_on="key", sort=sort
            ),
            id="merge_with_general_function",
        ),
    ],
)
def test_merge(
    test_data,
    test_data2,
    df_mode_pair,
    merge_with_on,
    merge_with_left_on_right_on,
):
    modin_df, pandas_df = create_test_df_in_defined_mode(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"),
        native=df_mode_pair[0],
    )
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"),
        native=df_mode_pair[1],
    )
    hows = ["left", "inner", "right"]
    ons = ["col33", ["col33", "col34"]]
    sorts = [False, True]
    assert len(ons) == len(sorts), "the loop below is designed for this condition"
    for i in range(len(hows)):
        for j in range(len(ons)):
            modin_result = merge_with_on(
                modin_df, modin_df2, how=hows[i], on=ons[j], sort=sorts[j], lib=pd
            )
            pandas_result = merge_with_on(
                pandas_df, pandas_df2, how=hows[i], on=ons[j], sort=sorts[j], lib=pandas
            )
            # FIXME: https://github.com/modin-project/modin/issues/2246
            df_equals_and_sort(modin_result, pandas_result)

            modin_result = merge_with_left_on_right_on(
                modin_df, modin_df2, how=hows[i], sort=sorts[j], lib=pd
            )
            pandas_result = merge_with_left_on_right_on(
                pandas_df, pandas_df2, how=hows[i], sort=sorts[j], lib=pandas
            )
            # FIXME: https://github.com/modin-project/modin/issues/2246
            df_equals_and_sort(modin_result, pandas_result)


@pytest.mark.parametrize("how", ["left", "inner", "right"])
def test_merge_empty(
    how,
    df_mode_pair,
):
    data = np.random.randint(0, 100, size=(64, 64))
    eval_general_interop(
        data,
        None,
        lambda df1, df2: df1.merge(df2.iloc[:0], how=how),
        df_mode_pair,
    )


def test_merge_with_mi_columns(df_mode_pair):
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
        {
            ("col0", "a"): [1, 2, 3, 4],
            ("col0", "b"): [2, 3, 4, 5],
            ("col1", "a"): [3, 4, 5, 6],
        },
        native=df_mode_pair[0],
    )

    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        {
            ("col0", "a"): [1, 2, 3, 4],
            ("col0", "c"): [2, 3, 4, 5],
            ("col1", "a"): [3, 4, 5, 6],
        },
        native=df_mode_pair[1],
    )

    eval_general(
        (modin_df1, modin_df2),
        (pandas_df1, pandas_df2),
        lambda dfs: dfs[0].merge(dfs[1], on=[("col0", "a")]),
    )


def test_where(df_mode_pair):
    columns = list("abcdefghij")

    frame_data = random_state.randn(100, 10)
    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
        frame_data, columns=columns, native=df_mode_pair[0]
    )
    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
        frame_data, columns=columns, native=df_mode_pair[1]
    )
    pandas_cond_df = pandas_df_2 % 5 < 2
    modin_cond_df = modin_df_2 % 5 < 2

    pandas_result = pandas_df_1.where(pandas_cond_df, -pandas_df_2)
    modin_result = modin_df_1.where(modin_cond_df, -modin_df_2)
    assert all((to_pandas(modin_result) == pandas_result).all())

    # test case when other is Series
    other_data = random_state.randn(len(pandas_df_1))
    modin_other, pandas_other = create_test_series_in_defined_mode(
        other_data, native=df_mode_pair[0]
    )
    pandas_result = pandas_df_1.where(pandas_cond_df, pandas_other, axis=0)
    modin_result = modin_df_1.where(modin_cond_df, modin_other, axis=0)
    df_equals(modin_result, pandas_result)

    # Test that we choose the right values to replace when `other` == `True`
    # everywhere.
    other_data = np.full(shape=pandas_df_1.shape, fill_value=True)
    modin_other, pandas_other = create_test_df_in_defined_mode(
        other_data, columns=columns, native=df_mode_pair[0]
    )
    pandas_result = pandas_df_1.where(pandas_cond_df, pandas_other)
    modin_result = modin_df_1.where(modin_cond_df, modin_other)
    df_equals(modin_result, pandas_result)

    other = pandas_df_1.loc[3]
    pandas_result = pandas_df_1.where(pandas_cond_df, other, axis=1)
    modin_result = modin_df_1.where(modin_cond_df, other, axis=1)
    assert all((to_pandas(modin_result) == pandas_result).all())

    other = pandas_df_1["e"]
    pandas_result = pandas_df_1.where(pandas_cond_df, other, axis=0)
    modin_result = modin_df_1.where(modin_cond_df, other, axis=0)
    assert all((to_pandas(modin_result) == pandas_result).all())

    pandas_result = pandas_df_1.where(pandas_df_2 < 2, True)
    modin_result = modin_df_1.where(modin_df_2 < 2, True)
    assert all((to_pandas(modin_result) == pandas_result).all())


@pytest.mark.parametrize("align_axis", ["index", "columns"])
@pytest.mark.parametrize("keep_shape", [False, True])
@pytest.mark.parametrize("keep_equal", [False, True])
def test_compare(align_axis, keep_shape, keep_equal, df_mode_pair):
    kwargs = {
        "align_axis": align_axis,
        "keep_shape": keep_shape,
        "keep_equal": keep_equal,
    }
    frame_data1 = random_state.randn(100, 10)
    frame_data2 = random_state.randn(100, 10)
    modin_df, pandas_df = create_test_df_in_defined_mode(
        frame_data1, columns=list("abcdefghij"), native=df_mode_pair[0]
    )
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        frame_data2, columns=list("abcdefghij"), native=df_mode_pair[0]
    )
    modin_result = modin_df.compare(modin_df2, **kwargs)
    pandas_result = pandas_df.compare(pandas_df2, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    modin_result = modin_df2.compare(modin_df, **kwargs)
    pandas_result = pandas_df2.compare(pandas_df, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    series_data1 = ["a", "b", "c", "d", "e"]
    series_data2 = ["a", "a", "c", "b", "e"]
    modin_series1, pandas_series1 = create_test_series_in_defined_mode(
        series_data1, native=df_mode_pair[0]
    )
    modin_series2, pandas_series2 = create_test_series_in_defined_mode(
        series_data2, native=df_mode_pair[1]
    )

    modin_result = modin_series1.compare(modin_series2, **kwargs)
    pandas_result = pandas_series1.compare(pandas_series2, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    modin_result = modin_series2.compare(modin_series1, **kwargs)
    pandas_result = pandas_series2.compare(pandas_series1, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_map_metadata.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


import matplotlib
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
    create_test_series_in_defined_mode,
)
from modin.tests.pandas.utils import (
    RAND_HIGH,
    RAND_LOW,
    axis_keys,
    axis_values,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    name_contains,
    numeric_dfs,
    random_state,
    test_data,
    test_data_keys,
    test_data_values,
)

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


def eval_insert(modin_df, pandas_df, **kwargs):
    if "col" in kwargs and "column" not in kwargs:
        kwargs["column"] = kwargs.pop("col")
    _kwargs = {"loc": 0, "column": "New column"}
    _kwargs.update(kwargs)

    eval_general(
        modin_df,
        pandas_df,
        operation=lambda df, **kwargs: df.insert(**kwargs),
        __inplace__=True,
        **_kwargs,
    )


def test_empty_df(df_mode_pair):
    modin_df, pd_df = create_test_df_in_defined_mode(None, native=df_mode_pair[0])
    md_series, pd_series = create_test_series_in_defined_mode(
        [1, 2, 3, 4, 5], native=df_mode_pair[1]
    )
    modin_df["a"] = md_series
    pd_df["a"] = pd_series
    df_equals(modin_df, pd_df)


def test_astype(df_mode_pair):
    td = pandas.DataFrame(test_data["int_data"])[["col1", "index", "col3", "col4"]]
    modin_df, pandas_df = create_test_df_in_defined_mode(
        td.values,
        index=td.index,
        columns=td.columns,
        native=df_mode_pair[0],
    )

    def astype_func(df):
        md_ser, pd_ser = create_test_series_in_defined_mode(
            [str, str], index=["col1", "col1"], native=df_mode_pair[1]
        )
        if isinstance(df, pd.DataFrame):
            return df.astype(md_ser)
        else:
            return df.astype(pd_ser)

    # The dtypes series must have a unique index.
    eval_general(
        modin_df,
        pandas_df,
        astype_func,
        expected_exception=ValueError(
            "cannot reindex on an axis with duplicate labels"
        ),
    )


###########################################################################


def test_convert_dtypes_5653(df_mode_pair):
    modin_part1, _ = create_test_df_in_defined_mode(
        {"col1": ["a", "b", "c", "d"]}, native=df_mode_pair[0]
    )
    modin_part2, _ = create_test_df_in_defined_mode(
        {"col1": [None, None, None, None]}, native=df_mode_pair[1]
    )
    modin_df = pd.concat([modin_part1, modin_part2])
    if modin_df._query_compiler.storage_format == "Pandas":
        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
    modin_df = modin_df.convert_dtypes()
    assert len(modin_df.dtypes) == 1
    assert modin_df.dtypes.iloc[0] == "string"


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("bound_type", ["list", "series"], ids=["list", "series"])
@pytest.mark.exclude_in_sanity
def test_clip(request, data, axis, bound_type, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])

    if name_contains(request.node.name, numeric_dfs):
        ind_len = (
            len(modin_df.index)
            if not pandas.DataFrame()._get_axis_number(axis)
            else len(modin_df.columns)
        )

        lower = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)
        upper = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)

        if bound_type == "series":
            modin_lower, pandas_lower = create_test_series_in_defined_mode(
                lower, native=df_mode_pair[1]
            )
            modin_upper, pandas_upper = create_test_series_in_defined_mode(
                upper, native=df_mode_pair[0]
            )
        else:
            modin_lower = pandas_lower = lower
            modin_upper = pandas_upper = upper

        # test lower and upper list bound on each column
        modin_result = modin_df.clip(modin_lower, modin_upper, axis=axis)
        pandas_result = pandas_df.clip(pandas_lower, pandas_upper, axis=axis)
        df_equals(modin_result, pandas_result)

        # test only upper list bound on each column
        modin_result = modin_df.clip(np.nan, modin_upper, axis=axis)
        pandas_result = pandas_df.clip(np.nan, pandas_upper, axis=axis)
        df_equals(modin_result, pandas_result)

        with pytest.raises(ValueError):
            modin_df.clip(lower=[1, 2, 3], axis=None)


@pytest.mark.parametrize(
    "data, other_data",
    [
        ({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "C": [7, 8, 9]}),
        ({"C": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "A": [7, 8, 9]}),
        (
            {"A": ["a", "b", "c"], "B": ["x", "y", "z"]},
            {"B": ["d", "e", "f", "g", "h", "i"]},
        ),
        ({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, np.nan, 6]}),
    ],
)
@pytest.mark.parametrize("errors", ["raise", "ignore"])
def test_update(data, other_data, errors, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])
    other_modin_df, other_pandas_df = create_test_df_in_defined_mode(
        other_data, native=df_mode_pair[1]
    )
    expected_exception = None
    if errors == "raise":
        expected_exception = ValueError("Data overlaps.")
    eval_general(
        modin_df,
        pandas_df,
        lambda df: (
            df.update(other_modin_df, errors=errors)
            if isinstance(df, pd.DataFrame)
            else df.update(other_pandas_df, errors=errors)
        ),
        __inplace__=True,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize(
    "get_index",
    [
        pytest.param(lambda idx: None, id="None_idx"),
        pytest.param(lambda idx: ["a", "b", "c"], id="No_intersection_idx"),
        pytest.param(lambda idx: idx, id="Equal_idx"),
        pytest.param(lambda idx: idx[::-1], id="Reversed_idx"),
    ],
)
@pytest.mark.parametrize(
    "get_columns",
    [
        pytest.param(lambda idx: None, id="None_idx"),
        pytest.param(lambda idx: ["a", "b", "c"], id="No_intersection_idx"),
        pytest.param(lambda idx: idx, id="Equal_idx"),
        pytest.param(lambda idx: idx[::-1], id="Reversed_idx"),
    ],
)
@pytest.mark.parametrize("dtype", [None, "str"])
@pytest.mark.exclude_in_sanity
def test_constructor_from_modin_series(get_index, get_columns, dtype, df_mode_pair):
    modin_df, pandas_df = create_test_df_in_defined_mode(
        test_data_values[0], native=df_mode_pair[0]
    )

    modin_data = {f"new_col{i}": modin_df.iloc[:, i] for i in range(modin_df.shape[1])}
    pandas_data = {
        f"new_col{i}": pandas_df.iloc[:, i] for i in range(pandas_df.shape[1])
    }

    index = get_index(modin_df.index)
    columns = get_columns(list(modin_data.keys()))

    new_modin = pd.DataFrame(modin_data, index=index, columns=columns, dtype=dtype)
    new_pandas = pandas.DataFrame(
        pandas_data, index=index, columns=columns, dtype=dtype
    )
    df_equals(new_modin, new_pandas)


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_pickle.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


import numpy as np
import pytest

import modin.pandas as pd
from modin.config import PersistentPickle
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
)
from modin.tests.pandas.utils import df_equals


@pytest.fixture
def modin_df():
    return pd.DataFrame({"col1": np.arange(1000), "col2": np.arange(2000, 3000)})


@pytest.fixture
def modin_column(modin_df):
    return modin_df["col1"]


@pytest.fixture(params=[True, False])
def persistent(request):
    old = PersistentPickle.get()
    PersistentPickle.put(request.param)
    yield request.param
    PersistentPickle.put(old)


def test__reduce__(df_mode_pair):
    # `DataFrame.__reduce__` will be called implicitly when lambda expressions are
    # pre-processed for the distributed engine.
    dataframe_data = ["Major League Baseball", "National Basketball Association"]
    abbr_md, abbr_pd = create_test_df_in_defined_mode(
        dataframe_data, index=["MLB", "NBA"], native=df_mode_pair[0]
    )

    dataframe_data = {
        "name": ["Mariners", "Lakers"] * 500,
        "league_abbreviation": ["MLB", "NBA"] * 500,
    }
    teams_md, teams_pd = create_test_df_in_defined_mode(
        dataframe_data, native=df_mode_pair[1]
    )

    result_md = (
        teams_md.set_index("name")
        .league_abbreviation.apply(lambda abbr: abbr_md[0].loc[abbr])
        .rename("league")
    )

    result_pd = (
        teams_pd.set_index("name")
        .league_abbreviation.apply(lambda abbr: abbr_pd[0].loc[abbr])
        .rename("league")
    )
    df_equals(result_md, result_pd)


================================================
FILE: modin/tests/pandas/native_df_interoperability/test_window.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import matplotlib
import numpy as np
import pandas

import modin.pandas as pd
from modin.config import NPartitions
from modin.tests.pandas.native_df_interoperability.utils import (
    create_test_df_in_defined_mode,
)
from modin.tests.pandas.utils import df_equals

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")


def test_fillna_4660(df_mode_pair):
    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
        {"a": ["a"], "b": ["b"], "c": [pd.NA]},
        index=["row1"],
        native=df_mode_pair[0],
    )
    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
        {"a": ["a"], "b": ["b"], "c": [pd.NA]},
        index=["row1"],
        native=df_mode_pair[1],
    )
    modin_result = modin_df_1["c"].fillna(modin_df_2["b"])
    pandas_result = pandas_df_1["c"].fillna(pandas_df_2["b"])
    df_equals(modin_result, pandas_result)


def test_fillna_dict_series(df_mode_pair):
    frame_data = {
        "a": [np.nan, 1, 2, np.nan, np.nan],
        "b": [1, 2, 3, np.nan, np.nan],
        "c": [np.nan, 1, 2, 3, 4],
    }
    df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)
    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
        frame_data, native=df_mode_pair[0]
    )
    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
        frame_data, native=df_mode_pair[1]
    )

    df_equals(modin_df.fillna({"a": 0, "b": 5}), df.fillna({"a": 0, "b": 5}))

    df_equals(
        modin_df.fillna({"a": 0, "b": 5, "d": 7}),
        df.fillna({"a": 0, "b": 5, "d": 7}),
    )

    # Series treated same as dict
    df_equals(
        modin_df_1.fillna(modin_df_2.max()), pandas_df_1.fillna(pandas_df_2.max())
    )


def test_fillna_dataframe(df_mode_pair):
    frame_data = {
        "a": [np.nan, 1, 2, np.nan, np.nan],
        "b": [1, 2, 3, np.nan, np.nan],
        "c": [np.nan, 1, 2, 3, 4],
    }
    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(
        frame_data, index=list("VWXYZ"), native=df_mode_pair[0]
    )
    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(
        {"a": [np.nan, 10, 20, 30, 40], "b": [50, 60, 70, 80, 90], "foo": ["bar"] * 5},
        index=list("VWXuZ"),
        native=df_mode_pair[1],
    )

    # only those columns and indices which are shared get filled
    df_equals(modin_df_1.fillna(modin_df_2), pandas_df_1.fillna(pandas_df_2))


================================================
FILE: modin/tests/pandas/native_df_interoperability/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from contextlib import contextmanager, nullcontext

from modin import set_execution
from modin.config import Engine, StorageFormat
from modin.config import context as config_context
from modin.config.envvars import Backend
from modin.tests.pandas.utils import (
    NoModinException,
    create_test_dfs,
    create_test_series,
    df_equals,
)
from modin.tests.test_utils import current_execution_is_native
from modin.utils import try_cast_to_pandas


@contextmanager
def switch_to_native_execution():
    engine = Engine.get()
    storage_format = StorageFormat.get()
    try:
        set_execution("Native", "Native")
        yield
    finally:
        set_execution(engine=engine, storage_format=storage_format)


def create_test_df_in_defined_mode(
    *args, post_fn=None, backend=None, native=None, **kwargs
):
    assert not current_execution_is_native(), "already in native dataframe mode."

    if not isinstance(native, bool):
        raise ValueError("`native` should be True or False.")

    # Use the default backend unless native
    hybrid_backend = "Pandas" if native else Backend.get()
    with switch_to_native_execution() if native else nullcontext():
        with config_context(AutoSwitchBackend=False, Backend=hybrid_backend):
            modin_df, pandas_df = create_test_dfs(
                *args, post_fn=post_fn, backend=backend, **kwargs
            )
            return modin_df, pandas_df


def create_test_series_in_defined_mode(
    vals, sort=False, backend=None, native=None, **kwargs
):
    assert not current_execution_is_native(), "already in native dataframe mode."

    if not isinstance(native, bool):
        raise ValueError("`native` should be True or False.")

    # Use the default backend unless native
    hybrid_backend = "Pandas" if native else Backend.get()
    with switch_to_native_execution() if native else nullcontext():
        with config_context(AutoSwitchBackend=False, Backend=hybrid_backend):
            modin_ser, pandas_ser = create_test_series(
                vals, sort=sort, backend=backend, **kwargs
            )
        return modin_ser, pandas_ser


def eval_general_interop(
    data,
    backend,
    operation,
    df_mode_pair,
    comparator=df_equals,
    __inplace__=False,
    expected_exception=None,
    check_kwargs_callable=True,
    md_extra_kwargs=None,
    comparator_kwargs=None,
    **kwargs,
):
    df1_native, df2_native = df_mode_pair
    modin_df1, pandas_df1 = create_test_df_in_defined_mode(
        data, backend=backend, native=df1_native
    )
    modin_df2, pandas_df2 = create_test_df_in_defined_mode(
        data, backend=backend, native=df2_native
    )
    md_kwargs, pd_kwargs = {}, {}

    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
        try:
            pd_result = fn(pandas_df1, pandas_df2, **pd_kwargs)
        except Exception as pd_e:
            try:
                if inplace:
                    _ = fn(modin_df1, modin_df2, **md_kwargs)
                    try_cast_to_pandas(modin_df1)  # force materialization
                else:
                    try_cast_to_pandas(
                        fn(modin_df1, modin_df2, **md_kwargs)
                    )  # force materialization
            except Exception as md_e:
                assert isinstance(
                    md_e, type(pd_e)
                ), "Got Modin Exception type {}, but pandas Exception type {} was expected".format(
                    type(md_e), type(pd_e)
                )
                if expected_exception:
                    if Engine.get() == "Ray":
                        from ray.exceptions import RayTaskError

                        # unwrap ray exceptions from remote worker
                        if isinstance(md_e, RayTaskError):
                            md_e = md_e.args[0]
                    assert (
                        type(md_e) is type(expected_exception)
                        and md_e.args == expected_exception.args
                    ), f"not acceptable Modin's exception: [{repr(md_e)}] expected {expected_exception}"
                    assert (
                        pd_e.args == expected_exception.args
                    ), f"not acceptable Pandas' exception: [{repr(pd_e)}]"
                elif expected_exception is False:
                    # The only way to disable exception message checking.
                    pass
                else:
                    # It’s not enough that Modin and pandas have the same types of exceptions;
                    # we need to explicitly specify the instance of an exception
                    # (using `expected_exception`) in tests so that we can check exception messages.
                    # This allows us to eliminate situations where exceptions are thrown
                    # that we don't expect, which could hide different bugs.
                    raise pd_e
            else:
                raise NoModinException(
                    f"Modin doesn't throw an exception, while pandas does: [{repr(pd_e)}]"
                )
        else:
            md_result = fn(modin_df1, modin_df2, **md_kwargs)
            return (md_result, pd_result) if not inplace else (modin_df1, pandas_df1)

    for key, value in kwargs.items():
        if check_kwargs_callable and callable(value):
            values = execute_callable(value)
            # that means, that callable raised an exception
            if values is None:
                return
            else:
                md_value, pd_value = values
        else:
            md_value, pd_value = value, value

        md_kwargs[key] = md_value
        pd_kwargs[key] = pd_value

        if md_extra_kwargs:
            assert isinstance(md_extra_kwargs, dict)
            md_kwargs.update(md_extra_kwargs)

    values = execute_callable(
        operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
    )
    if values is not None:
        comparator(*values, **(comparator_kwargs or {}))


================================================
FILE: modin/tests/pandas/test_api.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import inspect

import numpy as np
import pandas
import pytest

import modin.pandas as pd

_MODIN_EXTRA_ATTRIBUTES = (
    # modin - namespace for accessing additional Modin functions that are not available in Pandas
    "modin",
    # get_backend - get storage and engine backend for the current DataFrame
    "get_backend",
    # set_backend - set storage and engine backend for the current DataFrame
    "set_backend",
    # move_to - set storage and engine backend for the current DataFrame
    "move_to",
    # is_backend_pinned, pin_backend, unpin_backend - change automatic switching behavior
    "is_backend_pinned",
    "pin_backend",
    "unpin_backend",
)


def test_top_level_api_equality():
    modin_dir = [obj for obj in dir(pd) if obj[0] != "_"]
    pandas_dir = [obj for obj in dir(pandas) if obj[0] != "_"]
    missing_from_modin = set(pandas_dir) - set(modin_dir)
    extra_in_modin = set(modin_dir) - set(pandas_dir)
    ignore_pandas = [
        "annotations",
        "np",
        "tests",
        "pandas",
        "core",
        "compat",
        "util",
        "offsets",
        "datetime",
        "api",
        "tseries",
        "to_msgpack",  # This one is experimental, and doesn't look finished
        "Panel",  # This is deprecated and throws a warning every time.
    ]

    ignore_modin = [
        "indexing",
        "iterator",
        "series",
        "accessor",
        "base",
        "utils",
        "dataframe",
        "groupby",
        "general",
        "datetime",
        "warnings",
        "os",
        "series_utils",
        "window",
    ]

    assert not len(
        missing_from_modin - set(ignore_pandas)
    ), "Differences found in API: {}".format(missing_from_modin - set(ignore_pandas))

    assert not len(
        extra_in_modin - set(ignore_modin)
    ), "Differences found in API: {}".format(extra_in_modin - set(ignore_modin))

    difference = []
    allowed_different = ["Interval", "datetime", "StringDtype"]

    # Check that we have all keywords and defaults in pandas
    for m in set(pandas_dir) - set(ignore_pandas):
        if m in allowed_different:
            continue
        try:
            pandas_sig = dict(inspect.signature(getattr(pandas, m)).parameters)
        except (TypeError, ValueError):
            continue
        try:
            modin_sig = dict(inspect.signature(getattr(pd, m)).parameters)
        except (TypeError, ValueError):
            continue

        if not pandas_sig == modin_sig:
            try:
                append_val = (
                    m,
                    {
                        i: pandas_sig[i]
                        for i in pandas_sig.keys()
                        if i not in modin_sig
                        or pandas_sig[i].default != modin_sig[i].default
                        and not (
                            pandas_sig[i].default is np.nan
                            and modin_sig[i].default is np.nan
                        )
                    },
                )
            except Exception:
                raise
            try:
                # This validates that there are actually values to add to the difference
                # based on the condition above.
                if len(list(append_val[-1])[-1]) > 0:
                    difference.append(append_val)
            except IndexError:
                pass

    assert not len(difference), "Missing params found in API: {}".format(difference)

    # Check that we have no extra keywords or defaults
    for m in set(pandas_dir) - set(ignore_pandas):
        if m in allowed_different:
            continue
        try:
            pandas_sig = dict(inspect.signature(getattr(pandas, m)).parameters)
        except (TypeError, ValueError):
            continue
        try:
            modin_sig = dict(inspect.signature(getattr(pd, m)).parameters)
        except (TypeError, ValueError):
            continue
        if not pandas_sig == modin_sig:
            try:
                append_val = (
                    m,
                    {
                        i: modin_sig[i]
                        for i in modin_sig.keys()
                        if i not in pandas_sig and i != "query_compiler"
                    },
                )
            except Exception:
                raise
            try:
                # This validates that there are actually values to add to the difference
                # based on the condition above.
                if len(list(append_val[-1])[-1]) > 0:
                    difference.append(append_val)
            except IndexError:
                pass

    assert not len(difference), "Extra params found in API: {}".format(difference)


def test_dataframe_api_equality():
    modin_dir = [obj for obj in dir(pd.DataFrame) if obj[0] != "_"]
    pandas_dir = [obj for obj in dir(pandas.DataFrame) if obj[0] != "_"]

    ignore_in_pandas = ["timetuple"]
    # modin - namespace for accessing additional Modin functions that are not available in Pandas
    missing_from_modin = set(pandas_dir) - set(modin_dir)
    assert not len(
        missing_from_modin - set(ignore_in_pandas)
    ), "Differences found in API: {}".format(
        len(missing_from_modin - set(ignore_in_pandas))
    )
    assert not len(
        set(modin_dir) - set(_MODIN_EXTRA_ATTRIBUTES) - set(pandas_dir)
    ), "Differences found in API: {}".format(set(modin_dir) - set(pandas_dir))

    assert_parameters_eq(
        (pandas.DataFrame, pd.DataFrame),
        modin_dir,
        allowed_different=_MODIN_EXTRA_ATTRIBUTES,
    )


def test_series_str_api_equality():
    modin_dir = [obj for obj in dir(pd.Series.str) if obj[0] != "_"]
    pandas_dir = [obj for obj in dir(pandas.Series.str) if obj[0] != "_"]

    missing_from_modin = set(pandas_dir) - set(modin_dir)
    assert not len(missing_from_modin), "Differences found in API: {}".format(
        missing_from_modin
    )
    extra_in_modin = set(modin_dir) - set(pandas_dir)
    assert not len(extra_in_modin), "Differences found in API: {}".format(
        extra_in_modin
    )
    assert_parameters_eq((pandas.Series.str, pd.Series.str), modin_dir, [])


def test_series_dt_api_equality():
    modin_dir = [obj for obj in dir(pd.Series.dt) if obj[0] != "_"]
    pandas_dir = [obj for obj in dir(pandas.Series.dt) if obj[0] != "_"]

    # should be deleted, but for some reason the check fails
    # https://github.com/pandas-dev/pandas/pull/33595
    ignore = ["week", "weekofyear"]
    missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)
    assert not len(missing_from_modin), "Differences found in API: {}".format(
        missing_from_modin
    )
    extra_in_modin = set(modin_dir) - set(pandas_dir)
    assert not len(extra_in_modin), "Differences found in API: {}".format(
        extra_in_modin
    )
    assert_parameters_eq((pandas.Series.dt, pd.Series.dt), modin_dir, [])


def test_series_cat_api_equality():
    modin_dir = [obj for obj in dir(pd.Series.cat) if obj[0] != "_"]
    pandas_dir = [obj for obj in dir(pandas.Series.cat) if obj[0] != "_"]

    missing_from_modin = set(pandas_dir) - set(modin_dir)
    assert not len(missing_from_modin), "Differences found in API: {}".format(
        len(missing_from_modin)
    )
    extra_in_modin = set(modin_dir) - set(pandas_dir)
    assert not len(extra_in_modin), "Differences found in API: {}".format(
        extra_in_modin
    )
    # all methods of `pandas.Series.cat` don't have any information about parameters,
    # just method(*args, **kwargs)
    assert_parameters_eq((pandas.core.arrays.Categorical, pd.Series.cat), modin_dir, [])


@pytest.mark.parametrize("obj", ["DataFrame", "Series"])
def test_sparse_accessor_api_equality(obj):
    modin_dir = [x for x in dir(getattr(pd, obj).sparse) if x[0] != "_"]
    pandas_dir = [x for x in dir(getattr(pandas, obj).sparse) if x[0] != "_"]

    missing_from_modin = set(pandas_dir) - set(modin_dir)
    assert not len(missing_from_modin), "Differences found in API: {}".format(
        len(missing_from_modin)
    )
    extra_in_modin = set(modin_dir) - set(pandas_dir)
    assert not len(extra_in_modin), "Differences found in API: {}".format(
        extra_in_modin
    )


@pytest.mark.parametrize("obj", ["SeriesGroupBy", "DataFrameGroupBy"])
def test_groupby_api_equality(obj):
    modin_dir = [x for x in dir(getattr(pd.groupby, obj)) if x[0] != "_"]
    pandas_dir = [x for x in dir(getattr(pandas.core.groupby, obj)) if x[0] != "_"]
    # These attributes are not mentioned in the pandas documentation,
    # but we might want to implement them someday.
    ignore = ["keys", "level", "grouper"]
    missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)
    assert not len(missing_from_modin), "Differences found in API: {}".format(
        len(missing_from_modin)
    )
    # FIXME: wrong inheritance
    ignore = (
        {"boxplot", "corrwith", "dtypes"} if obj == "SeriesGroupBy" else {"boxplot"}
    ) | set(_MODIN_EXTRA_ATTRIBUTES)
    extra_in_modin = (
        set(modin_dir) - set(pandas_dir) - set(ignore) - set(_MODIN_EXTRA_ATTRIBUTES)
    )
    assert not len(extra_in_modin), "Differences found in API: {}".format(
        extra_in_modin
    )
    assert_parameters_eq(
        (getattr(pandas.core.groupby, obj), getattr(pd.groupby, obj)), modin_dir, ignore
    )


def test_series_api_equality():
    modin_dir = [obj for obj in dir(pd.Series) if obj[0] != "_"]
    pandas_dir = [obj for obj in dir(pandas.Series) if obj[0] != "_"]

    ignore = ["timetuple"]
    missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)
    assert not len(missing_from_modin), "Differences found in API: {}".format(
        missing_from_modin
    )

    extra_in_modin = set(modin_dir) - set(_MODIN_EXTRA_ATTRIBUTES) - set(pandas_dir)
    assert not len(extra_in_modin), "Differences found in API: {}".format(
        extra_in_modin
    )

    assert_parameters_eq(
        (pandas.Series, pd.Series), modin_dir, allowed_different=_MODIN_EXTRA_ATTRIBUTES
    )


def assert_parameters_eq(objects, attributes, allowed_different):
    pandas_obj, modin_obj = objects
    difference = []

    # Check that Modin functions/methods don't have extra params
    for m in attributes:
        if m in allowed_different:
            continue
        try:
            pandas_sig = dict(inspect.signature(getattr(pandas_obj, m)).parameters)
        except TypeError:
            continue
        try:
            modin_sig = dict(inspect.signature(getattr(modin_obj, m)).parameters)
        except TypeError:
            continue

        if not pandas_sig == modin_sig:
            append_val = (
                m,
                {
                    i: pandas_sig[i]
                    for i in pandas_sig.keys()
                    if i not in modin_sig
                    or pandas_sig[i].default != modin_sig[i].default
                    and not (
                        pandas_sig[i].default is np.nan
                        and modin_sig[i].default is np.nan
                    )
                },
            )
            try:
                # This validates that there are actually values to add to the difference
                # based on the condition above.
                if len(list(append_val[-1])[-1]) > 0:
                    difference.append(append_val)
            except IndexError:
                pass
    assert not len(difference), "Missing params found in API: {}".format(difference)

    difference = []
    # Check that Modin functions/methods have all params as pandas
    for m in attributes:
        if m in allowed_different:
            continue
        try:
            pandas_sig = dict(inspect.signature(getattr(pandas_obj, m)).parameters)
        except TypeError:
            continue
        try:
            modin_sig = dict(inspect.signature(getattr(modin_obj, m)).parameters)
        except TypeError:
            continue

        if not pandas_sig == modin_sig:
            append_val = (
                m,
                {i: modin_sig[i] for i in modin_sig.keys() if i not in pandas_sig},
            )
            try:
                # This validates that there are actually values to add to the difference
                # based on the condition above.
                if len(list(append_val[-1])[-1]) > 0:
                    difference.append(append_val)
            except IndexError:
                pass
    assert not len(difference), "Extra params found in API: {}".format(difference)


================================================
FILE: modin/tests/pandas/test_backend.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import platform
import re
from unittest.mock import patch

import pandas
import pytest
import tqdm.auto

import modin.pandas as pd
from modin.config import Backend
from modin.config import context as config_context
from modin.tests.pandas.utils import (
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
)

WINDOWS_RAY_SKIP_MARK = pytest.mark.skipif(
    platform.system() == "Windows",
    reason=(
        "Some windows tests with engine != ray use 2 cores, but that "
        + "doesn't work with ray due to "
        + "https://github.com/modin-project/modin/issues/7387"
    ),
)

# Some modin methods warn about defaulting to pandas at the API layer. That's
# expected and not an error as it would be normally.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)


def test_new_dataframe_uses_default_backend():
    # We run this test with `Backend` set to just one value (instead of
    # trying to look for every possible `Backend` value in the same pytest
    # process) because switching to the MPI backend within a test process
    # that's not set up to run MPI (i.e. because the test process has been
    # started `mpiexec` instead of just `pytest`) would cause errors. We assume
    # that CI runs this test file once with every possible `Backend`.
    assert pd.DataFrame([1]).get_backend() == Backend.get()


@pytest.mark.parametrize("setter_method", ["set_backend", "move_to"])
@pytest.mark.parametrize(
    "inplace_kwargs",
    [
        pytest.param({"inplace": True}, id="inplace"),
        pytest.param({"inplace": False}, id="not_inplace"),
        pytest.param({}, id="no_inplace_kwargs"),
    ],
)
@pytest.mark.parametrize(
    "starting_backend, new_backend, expected_result_backend",
    [
        pytest.param(Backend.get(), "pandas", "Pandas", id="current_to_pandas"),
        pytest.param("pandas", Backend.get(), Backend.get(), id="pandas_to_current"),
        pytest.param(
            Backend.get(), "python_test", "Python_Test", id="current_to_python"
        ),
        pytest.param(
            "python_test", Backend.get(), Backend.get(), id="python_to_current"
        ),
        pytest.param("python_test", "pandas", "Pandas", id="python_to_pandas1"),
        pytest.param("PYTHON_test", "PANDAS", "Pandas", id="python_to_pandas2"),
        pytest.param("pandas", "python_test", "Python_Test", id="pandas_to_python"),
        pytest.param("pandas", "pandas", "Pandas", id="pandas_to_pandas"),
        pytest.param(
            "python_test", "python_test", "Python_Test", id="python_to_python"
        ),
        pytest.param(
            "ray",
            "dask",
            "Dask",
            id="ray_to_dask",
            marks=WINDOWS_RAY_SKIP_MARK,
        ),
        pytest.param(
            "dask",
            "ray",
            "Ray",
            id="dask_to_ray",
            marks=WINDOWS_RAY_SKIP_MARK,
        ),
        pytest.param(
            "ray",
            "python_test",
            "Python_Test",
            id="ray_to_python",
            marks=WINDOWS_RAY_SKIP_MARK,
        ),
        pytest.param("dask", "python_test", "Python_Test", id="dask_to_python"),
        pytest.param(
            "python_test",
            "ray",
            "Ray",
            id="python_to_ray",
            marks=WINDOWS_RAY_SKIP_MARK,
        ),
        pytest.param("python_test", "dask", "Dask", id="python_to_dask"),
        pytest.param("ray", "ray", "Ray", id="ray_to_ray", marks=WINDOWS_RAY_SKIP_MARK),
        pytest.param("dask", "dask", "Dask", id="dask_to_dask"),
    ],
)
@pytest.mark.parametrize(
    "data_class",
    [
        pytest.param(pd.DataFrame, id="dataframe"),
        pytest.param(pd.Series, id="series"),
    ],
)
def test_set_valid_backend(
    setter_method,
    inplace_kwargs,
    starting_backend,
    new_backend,
    data_class,
    expected_result_backend,
):
    progress_iter_count = 2
    with patch.object(
        tqdm.auto, "trange", return_value=range(progress_iter_count)
    ) as mock_trange, config_context(Backend=starting_backend):
        original_df = data_class([1])
        # convert to pandas for comparison while still on the `starting_backend`.
        original_df_as_pandas = original_df.modin.to_pandas()
        method_result = getattr(original_df, setter_method)(
            new_backend, **inplace_kwargs
        )
        if inplace_kwargs.get("inplace", False):
            assert method_result is None
            result_df = original_df
        else:
            assert method_result is not None
            result_df = method_result
        assert result_df.get_backend() == expected_result_backend
        df_equals(result_df, original_df_as_pandas)
        # The global Backend should remain the same even if we change the
        # backend for a single dataframe.
        assert Backend.get() == Backend.normalize(starting_backend)
        if Backend.normalize(starting_backend) == Backend.normalize(
            expected_result_backend
        ):
            mock_trange.assert_not_called()
        else:
            # trange constructor is only called once and the iterator is consumed
            # progress_iter_count times, but we can't easily assert on the number of iterations
            mock_trange.assert_called_once()


def test_same_backend():
    with patch.object(
        tqdm.auto, "trange", return_value=range(2)
    ) as mock_trange, config_context(Backend="Python_Test"):
        df = pd.DataFrame([1])
        new_df = df.set_backend("Python_Test")
        mock_trange.assert_not_called()
        assert new_df.get_backend() == "Python_Test"
        new_df = df.set_backend("Python_Test", inplace=True)
        mock_trange.assert_not_called()
        assert new_df is None
        assert df.get_backend() == "Python_Test"


def test_set_nonexistent_backend():
    backend_choice_string = ", ".join(f"'{choice}'" for choice in Backend.choices)
    with pytest.raises(
        ValueError,
        match=re.escape(
            "Unknown backend 'does_not_exist'. "
            + f"Available backends are: {backend_choice_string}"
        ),
    ):
        pd.DataFrame([1]).set_backend("does_not_exist")


@pytest.mark.parametrize("backend", [None, 1, [], {}])
def test_wrong_backend_type(backend):
    with pytest.raises(
        TypeError,
        match=re.escape(
            "Backend value should be a string, but instead it is "
            + f"{repr(backend)} of type {type(backend)}"
        ),
    ):
        pd.DataFrame([1]).set_backend(backend)


def test_get_backend_docstrings():
    dataframe_method = pd.DataFrame.get_backend
    series_method = pd.Series.get_backend
    assert dataframe_method.__doc__ != series_method.__doc__
    assert dataframe_method.__doc__ == series_method.__doc__.replace(
        "Series", "DataFrame"
    )


@pytest.mark.parametrize("setter_method", ["set_backend", "move_to"])
def test_set_backend_docstrings(setter_method):
    dataframe_method = getattr(pd.DataFrame, setter_method)
    series_method = getattr(pd.Series, setter_method)
    assert dataframe_method.__doc__ != series_method.__doc__
    assert dataframe_method.__doc__ == series_method.__doc__.replace(
        "Series", "DataFrame"
    )


class TestGroupbySetBackend:
    @pytest.mark.parametrize("setter_method", ["set_backend", "move_to"])
    @pytest.mark.parametrize(
        "inplace_kwargs",
        [
            pytest.param({"inplace": True}, id="inplace"),
            pytest.param({"inplace": False}, id="not_inplace"),
            pytest.param({}, id="no_inplace_kwargs"),
        ],
    )
    @pytest.mark.parametrize(
        "starting_backend, new_backend",
        [
            pytest.param(Backend.get(), "Pandas", id="current_to_pandas"),
            pytest.param("Pandas", Backend.get(), id="pandas_to_current"),
            pytest.param(Backend.get(), "Python_Test", id="current_to_python"),
            pytest.param("Python_Test", Backend.get(), id="python_to_current"),
            pytest.param("Python_Test", "Pandas", id="python_to_pandas"),
            pytest.param("Pandas", "Python_Test", id="pandas_to_python"),
        ],
    )
    @pytest.mark.parametrize(
        "by_level_factory",
        [
            pytest.param(lambda df: ("C", None), id="by_string_column"),
            pytest.param(lambda df: (["C", "D"], None), id="by_list_of_strings"),
            pytest.param(lambda df: (df["C"], None), id="by_series"),
            pytest.param(lambda df: (["C", df["D"]], None), id="by_list_mixed"),
            pytest.param(lambda df: (pandas.Grouper(key="C"), None), id="by_grouper"),
            pytest.param(lambda df: (None, 0), id="level_scalar"),
            pytest.param(lambda df: (None, [0, 1]), id="level_list"),
            pytest.param(
                lambda df: (["C", df["D"]], None), id="by_mixed_string_series"
            ),
        ],
    )
    def test_dataframe(
        self,
        setter_method,
        inplace_kwargs,
        starting_backend,
        new_backend,
        by_level_factory,
    ):
        """Test set_backend functionality for DataFrame groupby objects with various 'by' and 'level' combinations."""
        with config_context(Backend=starting_backend):

            def do_groupby(df):
                by, level = by_level_factory(df)
                return df.groupby(by=by, level=level)

            inplace = inplace_kwargs.get("inplace", False)
            original_modin_df, original_pandas_df = create_test_dfs(
                pandas.DataFrame(
                    data={
                        "A": [1, 2, 3, 4, 5, 6],
                        "B": [10, 20, 30, 40, 50, 60],
                        "C": ["x", "y", "x", "y", "x", "y"],
                        "D": ["p", "p", "q", "q", "r", "r"],
                    },
                    index=pd.MultiIndex.from_tuples(
                        [
                            ("foo", 1),
                            ("foo", 2),
                            ("bar", 1),
                            ("bar", 2),
                            ("baz", 1),
                            ("baz", 2),
                        ],
                        names=["first", "second"],
                    ),
                )
            )

            # Create DataFrame groupby object
            original_groupby = do_groupby(original_modin_df)

            setter_result = getattr(original_groupby, setter_method)(
                new_backend, **inplace_kwargs
            )

            if inplace:
                assert setter_result is None
                result_groupby = original_groupby
                # Verify that the underlying DataFrame's backend was also changed
                assert original_groupby._df.get_backend() == new_backend
            else:
                assert setter_result is not original_groupby
                result_groupby = setter_result
                # Verify original DataFrame's backend was not changed
                assert original_groupby._df.get_backend() == starting_backend

            # Verify backend was changed
            assert result_groupby.get_backend() == new_backend

            # Verify that groupby still works correctly after backend switch
            # Create a fresh groupby for comparison to avoid mixed backend states
            pandas_groupby_sum = do_groupby(original_pandas_df).sum()
            df_equals(
                result_groupby.sum(),
                pandas_groupby_sum,
            )
            if not inplace:
                df_equals(
                    original_groupby.sum(),
                    pandas_groupby_sum,
                )

    @pytest.mark.parametrize("setter_method", ["set_backend", "move_to"])
    @pytest.mark.parametrize(
        "inplace_kwargs",
        [
            pytest.param({"inplace": True}, id="inplace"),
            pytest.param({"inplace": False}, id="not_inplace"),
            pytest.param({}, id="no_inplace_kwargs"),
        ],
    )
    @pytest.mark.parametrize(
        "starting_backend, new_backend",
        [
            pytest.param(Backend.get(), "Pandas", id="current_to_pandas"),
            pytest.param("Pandas", Backend.get(), id="pandas_to_current"),
            pytest.param(Backend.get(), "Python_Test", id="current_to_python"),
            pytest.param("Python_Test", Backend.get(), id="python_to_current"),
            pytest.param("Python_Test", "Pandas", id="python_to_pandas"),
            pytest.param("Pandas", "Python_Test", id="pandas_to_python"),
        ],
    )
    @pytest.mark.parametrize(
        "by_level_factory",
        [
            pytest.param(lambda series: (None, 0), id="by_index_level_0"),
            pytest.param(
                lambda series: (None, [0, 1]),
                id="by_index_levels_list",
            ),
            pytest.param(
                lambda series: (pandas.Grouper(level=0), None),
                id="by_grouper_level",
            ),
            pytest.param(lambda series: (None, 0), id="level_scalar"),
            pytest.param(lambda series: (None, [0, 1]), id="level_list"),
            pytest.param(lambda series: (series, None), id="by_self"),
            pytest.param(lambda series: (series % 2, None), id="by_self_modulo_2"),
        ],
    )
    def test_series(
        self,
        setter_method,
        inplace_kwargs,
        starting_backend,
        new_backend,
        by_level_factory,
    ):
        """Test set_backend functionality for Series groupby objects with various 'by' and 'level' combinations."""
        with config_context(Backend=starting_backend):
            inplace = inplace_kwargs.get("inplace", False)
            # Create test data with MultiIndex to support level-based grouping
            idx = pd.MultiIndex.from_tuples(
                [
                    ("foo", 1),
                    ("foo", 2),
                    ("bar", 1),
                    ("bar", 2),
                    ("baz", 1),
                    ("baz", 2),
                ],
                names=["first", "second"],
            )
            original_pandas_series = pandas.Series([1, 2, 1, 3, 4, 5], index=idx)
            original_modin_series = pd.Series([1, 2, 1, 3, 4, 5], index=idx)

            def do_groupby(series):
                by, level = by_level_factory(series)
                return series.groupby(by=by, level=level)

            # Create Series groupby object
            original_groupby = do_groupby(original_modin_series)

            setter_result = getattr(original_groupby, setter_method)(
                new_backend, **inplace_kwargs
            )

            if inplace:
                assert setter_result is None
                result_groupby = original_groupby
                # Verify that the underlying Series's backend was also changed
                assert original_groupby._df.get_backend() == new_backend
            else:
                assert setter_result is not original_groupby
                result_groupby = setter_result
                # Verify original Series's backend was not changed
                assert original_groupby._df.get_backend() == starting_backend

            assert result_groupby.get_backend() == new_backend

            pandas_groupby_sum = do_groupby(original_pandas_series).sum()
            df_equals(result_groupby.sum(), pandas_groupby_sum)
            if not inplace:
                df_equals(original_groupby.sum(), pandas_groupby_sum)


# Tests for fallback progress printing when tqdm is not available
@pytest.mark.parametrize(
    "switch_operation,expected_output",
    [
        (
            None,
            "Transfer: Python_... → Pandas      |                 ≃ (3, 1)    ",
        ),
        (
            "test_operation",
            "Transfer: Python_... → Pandas      | test_operation  ≃ (3, 1)    ",
        ),
    ],
)
@patch("tqdm.auto.trange", side_effect=ImportError("tqdm not available"))
@config_context(Backend="python_test")
def test_fallback_progress_printing(
    mock_trange, capsys, switch_operation, expected_output
):
    """Test that fallback progress printing works when tqdm is not available and ShowBackendSwitchProgress is enabled."""
    df = pd.DataFrame([1, 2, 3])

    df.set_backend("pandas", switch_operation=switch_operation)

    captured = capsys.readouterr()
    assert expected_output in captured.err
    assert captured.out == ""  # Nothing should go to stdout


@config_context(Backend="python_test")
def test_bigger_df_progress_message():
    # Insiginificant digits in the size get truncated
    df = pd.DataFrame([[1] * 144] * 121)
    with patch.object(tqdm.auto, "trange", return_value=range(2)) as mock_trange:
        df.set_backend("pandas")
        mock_trange.assert_called_once()
        call_args = mock_trange.call_args
        desc = call_args[1]["desc"]  # Get the 'desc' keyword argument
        assert desc.startswith(
            "Transfer: Python_... → Pandas      |                 ≃ (1e+02, 1e+02)"
        )


@patch("tqdm.auto.trange", side_effect=ImportError("tqdm not available"))
@config_context(Backend="python_test")
def test_fallback_progress_printing_silent_when_disabled(mock_trange, capsys):
    """Test that fallback progress printing is silent when ShowBackendSwitchProgress is disabled."""

    df = pd.DataFrame([1, 2, 3])

    with config_context(ShowBackendSwitchProgress=False):
        df.set_backend("pandas")

    captured = capsys.readouterr()
    assert captured.out == ""
    assert captured.err == ""


@config_context(Backend="python_test")
def test_tqdm_progress_bar_disabled_when_backend_switch_progress_false(capsys):
    """Test that tqdm progress bar doesn't appear when ShowBackendSwitchProgress is disabled."""
    df = pd.DataFrame([1, 2, 3])

    with config_context(ShowBackendSwitchProgress=False), patch(
        "tqdm.auto.trange"
    ) as mock_trange:
        df.set_backend("pandas")

    mock_trange.assert_not_called()
    captured = capsys.readouterr()
    assert captured.out == ""
    assert captured.err == ""


================================================
FILE: modin/tests/pandas/test_concat.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions, StorageFormat
from modin.pandas.io import from_pandas
from modin.utils import get_current_execution

from .utils import (
    create_test_dfs,
    default_to_pandas_ignore_string,
    df_equals,
    generate_dfs,
    generate_multiindex_dfs,
    generate_none_dfs,
)

NPartitions.put(4)

pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)

# Initialize env for storage format detection in @pytest.mark.*
pd.DataFrame()


def test_df_concat():
    df, df2 = generate_dfs()

    df_equals(pd.concat([df, df2]), pandas.concat([df, df2]))


def test_concat():
    df, df2 = generate_dfs()
    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)

    df_equals(pd.concat([modin_df, modin_df2]), pandas.concat([df, df2]))


def test_concat_with_series():
    df, df2 = generate_dfs()
    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)
    pandas_series = pandas.Series([1, 2, 3, 4], name="new_col")

    df_equals(
        pd.concat([modin_df, modin_df2, pandas_series], axis=0),
        pandas.concat([df, df2, pandas_series], axis=0),
    )

    df_equals(
        pd.concat([modin_df, modin_df2, pandas_series], axis=1),
        pandas.concat([df, df2, pandas_series], axis=1),
    )


def test_concat_on_index():
    df, df2 = generate_dfs()
    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)

    df_equals(
        pd.concat([modin_df, modin_df2], axis="index"),
        pandas.concat([df, df2], axis="index"),
    )

    df_equals(
        pd.concat([modin_df, modin_df2], axis="rows"),
        pandas.concat([df, df2], axis="rows"),
    )

    df_equals(
        pd.concat([modin_df, modin_df2], axis=0), pandas.concat([df, df2], axis=0)
    )


@pytest.mark.parametrize("no_dup_cols", [True, False])
@pytest.mark.parametrize("different_len", [True, False])
def test_concat_on_column(no_dup_cols, different_len):
    df, df2 = generate_dfs()
    if no_dup_cols:
        df = df.drop(set(df.columns) & set(df2.columns), axis="columns")
    if different_len:
        df = pandas.concat([df, df], ignore_index=True)

    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)

    df_equals(
        pd.concat([modin_df, modin_df2], axis=1), pandas.concat([df, df2], axis=1)
    )

    df_equals(
        pd.concat([modin_df, modin_df2], axis="columns"),
        pandas.concat([df, df2], axis="columns"),
    )

    modin_result = pd.concat(
        [pd.Series(np.ones(10)), pd.Series(np.ones(10))], axis=1, ignore_index=True
    )
    pandas_result = pandas.concat(
        [pandas.Series(np.ones(10)), pandas.Series(np.ones(10))],
        axis=1,
        ignore_index=True,
    )
    df_equals(modin_result, pandas_result)
    assert modin_result.dtypes.equals(pandas_result.dtypes)


def test_invalid_axis_errors():
    df, df2 = generate_dfs()
    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)

    with pytest.raises(ValueError):
        pd.concat([modin_df, modin_df2], axis=2)


def test_mixed_concat():
    df, df2 = generate_dfs()
    df3 = df.copy()

    mixed_dfs = [from_pandas(df), from_pandas(df2), df3]

    df_equals(pd.concat(mixed_dfs), pandas.concat([df, df2, df3]))


def test_mixed_inner_concat():
    df, df2 = generate_dfs()
    df3 = df.copy()

    mixed_dfs = [from_pandas(df), from_pandas(df2), df3]

    df_equals(
        pd.concat(mixed_dfs, join="inner"),
        pandas.concat([df, df2, df3], join="inner"),
        # https://github.com/modin-project/modin/issues/5963
        check_dtypes=False,
    )


def test_mixed_none_concat():
    df, df2 = generate_none_dfs()
    df3 = df.copy()

    mixed_dfs = [from_pandas(df), from_pandas(df2), df3]

    df_equals(pd.concat(mixed_dfs), pandas.concat([df, df2, df3]))


def test_ignore_index_concat():
    df, df2 = generate_dfs()

    df_equals(
        pd.concat([df, df2], ignore_index=True),
        pandas.concat([df, df2], ignore_index=True),
    )


def test_concat_non_subscriptable_keys():
    frame_data = np.random.randint(0, 100, size=(2**10, 2**6))
    df = pd.DataFrame(frame_data).add_prefix("col")
    pdf = pandas.DataFrame(frame_data).add_prefix("col")

    modin_dict = {"c": df.copy(), "b": df.copy()}
    pandas_dict = {"c": pdf.copy(), "b": pdf.copy()}
    modin_result = pd.concat(modin_dict.values(), keys=modin_dict.keys())
    pandas_result = pandas.concat(pandas_dict.values(), keys=pandas_dict.keys())
    df_equals(modin_result, pandas_result)


def test_concat_series_only():
    modin_series = pd.Series(list(range(1000)))
    pandas_series = pandas.Series(list(range(1000)))

    df_equals(
        pd.concat([modin_series, modin_series]),
        pandas.concat([pandas_series, pandas_series]),
    )


def test_concat_5776():
    modin_data = {key: pd.Series(index=range(3)) for key in ["a", "b"]}
    pandas_data = {key: pandas.Series(index=range(3)) for key in ["a", "b"]}
    df_equals(
        pd.concat(modin_data, axis="columns"),
        pandas.concat(pandas_data, axis="columns"),
    )


def test_concat_6840():
    groupby_objs = []
    for idx, lib in enumerate((pd, pandas)):
        df1 = lib.DataFrame(
            [["a", 1], ["b", 2], ["b", 4]], columns=["letter", "number"]
        )
        df1_g = df1.groupby("letter", as_index=False)["number"].agg("sum")

        df2 = lib.DataFrame(
            [["a", 3], ["a", 4], ["b", 1]], columns=["letter", "number"]
        )
        df2_g = df2.groupby("letter", as_index=False)["number"].agg("sum")
        groupby_objs.append([df1_g, df2_g])

    df_equals(
        pd.concat(groupby_objs[0]),
        pandas.concat(groupby_objs[1]),
    )


def test_concat_with_empty_frame():
    modin_empty_df = pd.DataFrame()
    pandas_empty_df = pandas.DataFrame()
    modin_row = pd.Series({0: "a", 1: "b"})
    pandas_row = pandas.Series({0: "a", 1: "b"})
    df_equals(
        pd.concat([modin_empty_df, modin_row]),
        pandas.concat([pandas_empty_df, pandas_row]),
    )

    md_empty1, pd_empty1 = create_test_dfs(index=[1, 2, 3])
    md_empty2, pd_empty2 = create_test_dfs(index=[2, 3, 4])

    df_equals(
        pd.concat([md_empty1, md_empty2], axis=0),
        pandas.concat([pd_empty1, pd_empty2], axis=0),
    )
    df_equals(
        pd.concat([md_empty1, md_empty2], axis=1),
        pandas.concat([pd_empty1, pd_empty2], axis=1),
    )


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("names", [False, True])
def test_concat_multiindex(axis, names):
    pd_df1, pd_df2 = generate_multiindex_dfs(axis=axis)
    md_df1, md_df2 = map(from_pandas, [pd_df1, pd_df2])

    keys = ["first", "second"]
    if names:
        names = [str(i) for i in np.arange(pd_df1.axes[axis].nlevels + 1)]
    else:
        names = None

    df_equals(
        pd.concat([md_df1, md_df2], keys=keys, axis=axis, names=names),
        pandas.concat([pd_df1, pd_df2], keys=keys, axis=axis, names=names),
    )


@pytest.mark.parametrize("axis", [0, 1])
def test_concat_dictionary(axis):
    pandas_df, pandas_df2 = generate_dfs()
    modin_df, modin_df2 = from_pandas(pandas_df), from_pandas(pandas_df2)

    df_equals(
        pd.concat({"A": modin_df, "B": modin_df2}, axis=axis),
        pandas.concat({"A": pandas_df, "B": pandas_df2}, axis=axis),
    )


@pytest.mark.parametrize("sort", [False, True])
@pytest.mark.parametrize("join", ["inner", "outer"])
@pytest.mark.parametrize("axis", [0, 1])
def test_sort_order(sort, join, axis):
    pandas_df = pandas.DataFrame({"c": [3], "d": [4]}, columns=["d", "c"])
    pandas_df2 = pandas.DataFrame({"a": [1], "b": [2]}, columns=["b", "a"])
    modin_df, modin_df2 = from_pandas(pandas_df), from_pandas(pandas_df2)
    pandas_concat = pandas.concat([pandas_df, pandas_df2], join=join, sort=sort)
    modin_concat = pd.concat([modin_df, modin_df2], join=join, sort=sort)
    df_equals(
        pandas_concat,
        modin_concat,
        # https://github.com/modin-project/modin/issues/5963
        check_dtypes=join != "inner",
    )
    assert list(pandas_concat.columns) == list(modin_concat.columns)


@pytest.mark.parametrize(
    "data1, index1, data2, index2",
    [
        (None, None, None, None),
        (None, None, {"A": [1, 2, 3]}, pandas.Index([1, 2, 3], name="Test")),
        ({"A": [1, 2, 3]}, pandas.Index([1, 2, 3], name="Test"), None, None),
        ({"A": [1, 2, 3]}, None, None, None),
        (None, None, {"A": [1, 2, 3]}, None),
        (None, pandas.Index([1, 2, 3], name="Test"), None, None),
        (None, None, None, pandas.Index([1, 2, 3], name="Test")),
    ],
)
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("join", ["inner", "outer"])
def test_concat_empty(data1, index1, data2, index2, axis, join):
    pdf1 = pandas.DataFrame(data1, index=index1)
    pdf2 = pandas.DataFrame(data2, index=index2)
    pdf = pandas.concat((pdf1, pdf2), axis=axis, join=join)
    mdf1 = pd.DataFrame(data1, index=index1)
    mdf2 = pd.DataFrame(data2, index=index2)
    mdf = pd.concat((mdf1, mdf2), axis=axis, join=join)
    df_equals(
        pdf,
        mdf,
        # https://github.com/modin-project/modin/issues/5963
        check_dtypes=join != "inner",
    )


def test_concat_empty_df_series():
    pdf = pandas.concat((pandas.DataFrame({"A": [1, 2, 3]}), pandas.Series()))
    mdf = pd.concat((pd.DataFrame({"A": [1, 2, 3]}), pd.Series()))
    df_equals(
        pdf,
        mdf,
        # https://github.com/modin-project/modin/issues/5964
        check_dtypes=False,
    )
    pdf = pandas.concat((pandas.DataFrame(), pandas.Series([1, 2, 3])))
    mdf = pd.concat((pd.DataFrame(), pd.Series([1, 2, 3])))
    df_equals(
        pdf,
        mdf,
        # https://github.com/modin-project/modin/issues/5964
        check_dtypes=False,
    )


@pytest.mark.skipif(
    StorageFormat.get() != "Base",
    reason="https://github.com/modin-project/modin/issues/5696",
)
@pytest.mark.parametrize("col_type", [None, "str"])
@pytest.mark.parametrize("df1_cols", [0, 90, 100])
@pytest.mark.parametrize("df2_cols", [0, 90, 100])
@pytest.mark.parametrize("df1_rows", [0, 100])
@pytest.mark.parametrize("df2_rows", [0, 100])
@pytest.mark.parametrize("idx_type", [None, "str"])
@pytest.mark.parametrize("ignore_index", [True, False])
@pytest.mark.parametrize("sort", [True, False])
@pytest.mark.parametrize("join", ["inner", "outer"])
def test_concat_different_num_cols(
    col_type,
    df1_cols,
    df2_cols,
    df1_rows,
    df2_rows,
    idx_type,
    ignore_index,
    sort,
    join,
):
    def create_frame(frame_type, ncols, nrows):
        def to_str(val):
            return f"str_{val}"

        off = 0
        data = {}
        for n in range(1, ncols + 1):
            row = range(off + 1, off + nrows + 1)
            if col_type == "str":
                row = map(to_str, row)
            data[f"Col_{n}"] = list(row)
            off += nrows

        idx = None
        if idx_type == "str":
            idx = pandas.Index(map(to_str, range(1, nrows + 1)), name=f"Index_{nrows}")
        df = frame_type(data=data, index=idx)
        return df

    def concat(frame_type, lib):
        df1 = create_frame(frame_type, df1_cols, df1_rows)
        df2 = create_frame(frame_type, df2_cols, df2_rows)
        return lib.concat([df1, df2], ignore_index=ignore_index, sort=sort, join=join)

    mdf = concat(pd.DataFrame, pd)
    pdf = concat(pandas.DataFrame, pandas)
    df_equals(
        pdf,
        mdf,
        # Empty slicing causes this bug:
        # https://github.com/modin-project/modin/issues/5974
        check_dtypes=not (
            get_current_execution() == "BaseOnPython"
            and any(o == 0 for o in (df1_cols, df2_cols, df1_rows, df2_rows))
        ),
    )


================================================
FILE: modin/tests/pandas/test_expanding.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NPartitions
from modin.tests.test_utils import (
    current_execution_is_native,
    warns_that_defaulting_to_pandas_if,
)

from .utils import (
    create_test_dfs,
    create_test_series,
    df_equals,
    eval_general,
    test_data,
    test_data_keys,
    test_data_values,
)

NPartitions.put(4)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
    "method, kwargs",
    [
        ("count", {}),
        ("sum", {}),
        ("mean", {}),
        ("median", {}),
        ("skew", {}),
        ("kurt", {}),
        ("var", {"ddof": 0}),
        ("std", {"ddof": 0}),
        ("min", {}),
        ("max", {}),
        ("rank", {}),
        ("sem", {"ddof": 0}),
        ("quantile", {"q": 0.1}),
    ],
)
def test_dataframe(data, min_periods, axis, method, kwargs):
    eval_general(
        *create_test_dfs(data),
        lambda df: getattr(df.expanding(min_periods=min_periods, axis=axis), method)(
            **kwargs
        )
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("method", ["corr", "cov"])
def test_dataframe_corr_cov(data, min_periods, axis, method):
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        eval_general(
            *create_test_dfs(data),
            lambda df: getattr(
                df.expanding(min_periods=min_periods, axis=axis), method
            )()
        )


@pytest.mark.parametrize("method", ["corr", "cov"])
def test_dataframe_corr_cov_with_self(method):
    mdf, pdf = create_test_dfs(test_data["float_nan_data"])
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        eval_general(
            mdf,
            pdf,
            lambda df, other: getattr(df.expanding(), method)(other=other),
            other=pdf,
            md_extra_kwargs={"other": mdf},
        )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("min_periods", [None, 5])
def test_dataframe_agg(data, min_periods):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    pandas_expanded = pandas_df.expanding(
        min_periods=min_periods,
        axis=0,
    )
    modin_expanded = modin_df.expanding(
        min_periods=min_periods,
        axis=0,
    )
    # aggregates are only supported on axis 0
    df_equals(modin_expanded.aggregate(np.sum), pandas_expanded.aggregate(np.sum))
    df_equals(
        pandas_expanded.aggregate([np.sum, np.mean]),
        modin_expanded.aggregate([np.sum, np.mean]),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize(
    "method, kwargs",
    [
        ("count", {}),
        ("sum", {}),
        ("mean", {}),
        ("median", {}),
        ("skew", {}),
        ("kurt", {}),
        ("corr", {}),
        ("cov", {}),
        ("var", {"ddof": 0}),
        ("std", {"ddof": 0}),
        ("min", {}),
        ("max", {}),
        ("rank", {}),
        ("sem", {"ddof": 0}),
        ("quantile", {"q": 0.1}),
    ],
)
def test_series(data, min_periods, method, kwargs):
    eval_general(
        *create_test_series(data),
        lambda df: getattr(df.expanding(min_periods=min_periods), method)(**kwargs)
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("min_periods", [None, 5])
def test_series_agg(data, min_periods):
    modin_series, pandas_series = create_test_series(data)
    pandas_expanded = pandas_series.expanding(min_periods=min_periods)
    modin_expanded = modin_series.expanding(min_periods=min_periods)

    df_equals(modin_expanded.aggregate(np.sum), pandas_expanded.aggregate(np.sum))
    df_equals(
        pandas_expanded.aggregate([np.sum, np.mean]),
        modin_expanded.aggregate([np.sum, np.mean]),
    )


@pytest.mark.parametrize("method", ["corr", "cov"])
def test_series_corr_cov_with_self(method):
    mdf, pdf = create_test_series(test_data["float_nan_data"])
    eval_general(
        mdf,
        pdf,
        lambda df, other: getattr(df.expanding(), method)(other=other),
        other=pdf,
        md_extra_kwargs={"other": mdf},
    )


================================================
FILE: modin/tests/pandas/test_general.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pytest
from numpy.testing import assert_array_equal

import modin.pandas as pd
from modin.pandas.io import to_pandas
from modin.pandas.testing import assert_frame_equal
from modin.tests.test_utils import (
    current_execution_is_native,
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)
from modin.utils import get_current_execution

from .utils import (
    bool_arg_keys,
    bool_arg_values,
    create_test_dfs,
    df_equals,
    eval_general,
    is_native_shallow_copy,
    sort_if_range_partitioning,
    sort_index_for_equal_values,
    test_data_keys,
    test_data_values,
)

pytestmark = pytest.mark.filterwarnings(
    "default:`DataFrame.insert` for empty DataFrame is not currently supported.*:UserWarning"
)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("append_na", [True, False])
@pytest.mark.parametrize("op", ["isna", "isnull", "notna", "notnull"])
def test_isna_isnull_notna_notnull(data, append_na, op):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(pandas_df)
    if append_na:
        pandas_df["NONE_COL"] = None
        pandas_df["NAN_COL"] = np.nan
        modin_df["NONE_COL"] = None
        modin_df["NAN_COL"] = np.nan

    pandas_result = getattr(pandas, op)(pandas_df)
    modin_result = getattr(pd, op)(modin_df)
    df_equals(modin_result, pandas_result)

    modin_result = getattr(pd, op)(pd.Series([1, np.nan, 2]))
    pandas_result = getattr(pandas, op)(pandas.Series([1, np.nan, 2]))
    df_equals(modin_result, pandas_result)

    assert pd.isna(np.nan) == pandas.isna(np.nan)


def test_merge():
    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 0, 1],
        "col4": [2, 4, 5, 6],
    }

    modin_df = pd.DataFrame(frame_data)
    pandas_df = pandas.DataFrame(frame_data)

    frame_data2 = {"col1": [0, 1, 2], "col2": [1, 5, 6]}
    modin_df2 = pd.DataFrame(frame_data2)
    pandas_df2 = pandas.DataFrame(frame_data2)

    join_types = ["outer", "inner"]
    for how in join_types:
        with warns_that_defaulting_to_pandas_if(
            how == "outer" and not df_or_series_using_native_execution(modin_df)
        ):
            modin_result = pd.merge(modin_df, modin_df2, how=how)
        pandas_result = pandas.merge(pandas_df, pandas_df2, how=how)
        df_equals(modin_result, pandas_result)

        # left_on and right_index
        with warns_that_defaulting_to_pandas_if(
            not df_or_series_using_native_execution(modin_df)
        ):
            modin_result = pd.merge(
                modin_df, modin_df2, how=how, left_on="col1", right_index=True
            )
        pandas_result = pandas.merge(
            pandas_df, pandas_df2, how=how, left_on="col1", right_index=True
        )
        df_equals(modin_result, pandas_result)

        # left_index and right_on
        with warns_that_defaulting_to_pandas_if(
            not df_or_series_using_native_execution(modin_df)
        ):
            modin_result = pd.merge(
                modin_df, modin_df2, how=how, left_index=True, right_on="col1"
            )
        pandas_result = pandas.merge(
            pandas_df, pandas_df2, how=how, left_index=True, right_on="col1"
        )
        df_equals(modin_result, pandas_result)

        # left_on and right_on col1
        with warns_that_defaulting_to_pandas_if(
            how == "outer" and not df_or_series_using_native_execution(modin_df)
        ):
            modin_result = pd.merge(
                modin_df, modin_df2, how=how, left_on="col1", right_on="col1"
            )
        pandas_result = pandas.merge(
            pandas_df, pandas_df2, how=how, left_on="col1", right_on="col1"
        )
        df_equals(modin_result, pandas_result)

        # left_on and right_on col2
        with warns_that_defaulting_to_pandas_if(
            how == "outer" and not df_or_series_using_native_execution(modin_df)
        ):
            modin_result = pd.merge(
                modin_df, modin_df2, how=how, left_on="col2", right_on="col2"
            )
        pandas_result = pandas.merge(
            pandas_df, pandas_df2, how=how, left_on="col2", right_on="col2"
        )
        df_equals(modin_result, pandas_result)

        # left_index and right_index
        modin_result = pd.merge(
            modin_df, modin_df2, how=how, left_index=True, right_index=True
        )
        pandas_result = pandas.merge(
            pandas_df, pandas_df2, how=how, left_index=True, right_index=True
        )
        df_equals(modin_result, pandas_result)

    s = pd.Series(frame_data.get("col1"))
    with pytest.raises(ValueError):
        pd.merge(s, modin_df2)

    with pytest.raises(TypeError):
        pd.merge("Non-valid type", modin_df2)


def test_merge_ordered():
    data_a = {
        "key": list("aceace"),
        "lvalue": [1, 2, 3, 1, 2, 3],
        "group": list("aaabbb"),
    }
    data_b = {"key": list("bcd"), "rvalue": [1, 2, 3]}

    modin_df_a = pd.DataFrame(data_a)
    modin_df_b = pd.DataFrame(data_b)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.merge_ordered(
            modin_df_a, modin_df_b, fill_method="ffill", left_by="group"
        )
        assert isinstance(df, pd.DataFrame)

    with pytest.raises(TypeError):
        pd.merge_ordered(data_a, data_b, fill_method="ffill", left_by="group")


@pytest.mark.parametrize("right_index", [None, [0] * 5], ids=["default", "non_unique"])
def test_merge_asof(right_index):
    left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
    right = pd.DataFrame(
        {"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}, index=right_index
    )

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.merge_asof(left, right, on="a")
        assert isinstance(df, pd.DataFrame)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.merge_asof(left, right, on="a", allow_exact_matches=False)
        assert isinstance(df, pd.DataFrame)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.merge_asof(left, right, on="a", direction="forward")
        assert isinstance(df, pd.DataFrame)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.merge_asof(left, right, on="a", direction="nearest")
        assert isinstance(df, pd.DataFrame)

    left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10])
    right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7])

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.merge_asof(left, right, left_index=True, right_index=True)
        assert isinstance(df, pd.DataFrame)

    with pytest.raises(ValueError):
        pd.merge_asof(
            {"left_val": ["a", "b", "c"]},
            {"right_val": [1, 2, 3, 6, 7]},
            left_index=True,
            right_index=True,
        )


def test_merge_asof_on_variations():
    """on=,left_on=,right_on=,right_index=,left_index= options match Pandas."""
    left = {"a": [1, 5, 10], "left_val": ["a", "b", "c"]}
    left_index = [6, 8, 12]
    right = {"a": [1, 2, 3, 6, 7], "right_val": ["d", "e", "f", "g", "h"]}
    right_index = [6, 7, 8, 9, 15]
    pandas_left, pandas_right = (
        pandas.DataFrame(left, index=left_index),
        pandas.DataFrame(right, index=right_index),
    )
    modin_left, modin_right = (
        pd.DataFrame(left, index=left_index),
        pd.DataFrame(right, index=right_index),
    )
    for on_arguments in [
        {"on": "a"},
        {"left_on": "a", "right_on": "a"},
        {"left_on": "a", "right_index": True},
        {"left_index": True, "right_on": "a"},
        {"left_index": True, "right_index": True},
    ]:
        pandas_merged = pandas.merge_asof(pandas_left, pandas_right, **on_arguments)
        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            modin_merged = pd.merge_asof(modin_left, modin_right, **on_arguments)
        df_equals(pandas_merged, modin_merged)


def test_merge_asof_suffixes():
    """Suffix variations are handled the same as Pandas."""
    left = {"a": [1, 5, 10]}
    right = {"a": [2, 3, 6]}
    pandas_left, pandas_right = (pandas.DataFrame(left), pandas.DataFrame(right))
    modin_left, modin_right = pd.DataFrame(left), pd.DataFrame(right)
    for suffixes in [("a", "b"), (False, "c"), ("d", False)]:
        pandas_merged = pandas.merge_asof(
            pandas_left,
            pandas_right,
            left_index=True,
            right_index=True,
            suffixes=suffixes,
        )
        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            modin_merged = pd.merge_asof(
                modin_left,
                modin_right,
                left_index=True,
                right_index=True,
                suffixes=suffixes,
            )
        df_equals(pandas_merged, modin_merged)

    with pytest.raises(ValueError):
        pandas.merge_asof(
            pandas_left,
            pandas_right,
            left_index=True,
            right_index=True,
            suffixes=(False, False),
        )
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(
            modin_left,
            modin_right,
            left_index=True,
            right_index=True,
            suffixes=(False, False),
        )


def test_merge_asof_bad_arguments():
    left = {"a": [1, 5, 10], "b": [5, 7, 9]}
    right = {"a": [2, 3, 6], "b": [6, 5, 20]}
    pandas_left, pandas_right = (pandas.DataFrame(left), pandas.DataFrame(right))
    modin_left, modin_right = pd.DataFrame(left), pd.DataFrame(right)

    # Can't mix by with left_by/right_by
    with pytest.raises(ValueError):
        pandas.merge_asof(
            pandas_left, pandas_right, on="a", by="b", left_by="can't do with by"
        )
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(
            modin_left, modin_right, on="a", by="b", left_by="can't do with by"
        )
    with pytest.raises(ValueError):
        pandas.merge_asof(
            pandas_left, pandas_right, by="b", on="a", right_by="can't do with by"
        )
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(
            modin_left, modin_right, by="b", on="a", right_by="can't do with by"
        )

    # Can't mix on with left_on/right_on
    with pytest.raises(ValueError):
        pandas.merge_asof(pandas_left, pandas_right, on="a", left_on="can't do with by")
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(modin_left, modin_right, on="a", left_on="can't do with by")
    with pytest.raises(ValueError):
        pandas.merge_asof(
            pandas_left, pandas_right, on="a", right_on="can't do with by"
        )
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(modin_left, modin_right, on="a", right_on="can't do with by")

    # Can't mix left_index with left_on or on, similarly for right.
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(modin_left, modin_right, on="a", right_index=True)
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(
            modin_left, modin_right, left_on="a", right_on="a", right_index=True
        )
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(modin_left, modin_right, on="a", left_index=True)
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(
            modin_left, modin_right, left_on="a", right_on="a", left_index=True
        )

    # Need both left and right
    with pytest.raises(Exception):  # Pandas bug, didn't validate inputs sufficiently
        pandas.merge_asof(pandas_left, pandas_right, left_on="a")
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(modin_left, modin_right, left_on="a")
    with pytest.raises(Exception):  # Pandas bug, didn't validate inputs sufficiently
        pandas.merge_asof(pandas_left, pandas_right, right_on="a")
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(modin_left, modin_right, right_on="a")
    with pytest.raises(ValueError):
        pandas.merge_asof(pandas_left, pandas_right)
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(
        not current_execution_is_native()
    ):
        pd.merge_asof(modin_left, modin_right)


def test_merge_asof_merge_options():
    modin_quotes = pd.DataFrame(
        {
            "time": [
                pd.Timestamp("2016-05-25 13:30:00.023"),
                pd.Timestamp("2016-05-25 13:30:00.023"),
                pd.Timestamp("2016-05-25 13:30:00.030"),
                pd.Timestamp("2016-05-25 13:30:00.041"),
                pd.Timestamp("2016-05-25 13:30:00.048"),
                pd.Timestamp("2016-05-25 13:30:00.049"),
                pd.Timestamp("2016-05-25 13:30:00.072"),
                pd.Timestamp("2016-05-25 13:30:00.075"),
            ],
            "ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL", "GOOG", "MSFT"],
            "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
            "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
        }
    )
    modin_trades = pd.DataFrame(
        {
            "time": [
                pd.Timestamp("2016-05-25 13:30:00.023"),
                pd.Timestamp("2016-05-25 13:30:00.038"),
                pd.Timestamp("2016-05-25 13:30:00.048"),
                pd.Timestamp("2016-05-25 13:30:00.048"),
                pd.Timestamp("2016-05-25 13:30:00.048"),
            ],
            "ticker2": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
            "price": [51.95, 51.95, 720.77, 720.92, 98.0],
            "quantity": [75, 155, 100, 100, 100],
        }
    )
    pandas_quotes, pandas_trades = to_pandas(modin_quotes), to_pandas(modin_trades)

    # left_by + right_by
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_result = pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            left_by="ticker",
            right_by="ticker2",
        )
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            left_by="ticker",
            right_by="ticker2",
        ),
        modin_result,
    )

    # Just by:
    pandas_trades["ticker"] = pandas_trades["ticker2"]
    modin_trades["ticker"] = modin_trades["ticker2"]
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_result = pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            by="ticker",
        )
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            by="ticker",
        ),
        modin_result,
    )

    # Tolerance
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_result = pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            by="ticker",
            tolerance=pd.Timedelta("2ms"),
        )
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            by="ticker",
            tolerance=pd.Timedelta("2ms"),
        ),
        modin_result,
    )

    # Direction
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_result = pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            by="ticker",
            direction="forward",
        )
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            by="ticker",
            direction="forward",
        ),
        modin_result,
    )

    # Allow exact matches
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_result = pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            by="ticker",
            tolerance=pd.Timedelta("10ms"),
            allow_exact_matches=False,
        )
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            by="ticker",
            tolerance=pd.Timedelta("10ms"),
            allow_exact_matches=False,
        ),
        modin_result,
    )


def test_pivot():
    test_df = pd.DataFrame(
        {
            "foo": ["one", "one", "one", "two", "two", "two"],
            "bar": ["A", "B", "C", "A", "B", "C"],
            "baz": [1, 2, 3, 4, 5, 6],
            "zoo": ["x", "y", "z", "q", "w", "t"],
        }
    )

    df = pd.pivot(test_df, index="foo", columns="bar", values="baz")
    assert isinstance(df, pd.DataFrame)

    with pytest.raises(ValueError):
        pd.pivot(test_df["bar"], index="foo", columns="bar", values="baz")

    if not (get_current_execution() == "BaseOnPython" or current_execution_is_native()):
        # FIXME: Failed for some reason on 'BaseOnPython' and 'NativeOnNative'
        # https://github.com/modin-project/modin/issues/6240
        df_equals(
            pd.pivot(test_df, columns="bar"),
            pandas.pivot(test_df._to_pandas(), columns="bar"),
        )

        df_equals(
            pd.pivot(test_df, index="foo", columns="bar"),
            pandas.pivot(test_df._to_pandas(), index="foo", columns="bar"),
        )


def test_pivot_values_is_none():
    test_df = pd.DataFrame(
        {
            "foo": ["one", "one", "one", "two", "two", "two"],
            "bar": ["A", "B", "C", "A", "B", "C"],
            "baz": [1, 2, 3, 4, 5, 6],
            "zoo": ["x", "y", "z", "q", "w", "t"],
        }
    )
    df = pd.pivot(test_df, index="foo", columns="bar")
    assert isinstance(df, pd.DataFrame)


def test_pivot_table():
    test_df = pd.DataFrame(
        {
            "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
            "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
            "C": [
                "small",
                "large",
                "large",
                "small",
                "small",
                "large",
                "small",
                "small",
                "large",
            ],
            "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
            "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
        }
    )

    df = pd.pivot_table(
        test_df, values="D", index=["A", "B"], columns=["C"], aggfunc=np.sum
    )
    assert isinstance(df, pd.DataFrame)

    with pytest.raises(ValueError):
        pd.pivot_table(
            test_df["C"], values="D", index=["A", "B"], columns=["C"], aggfunc=np.sum
        )


def test_unique():
    comparator = lambda *args: sort_if_range_partitioning(  # noqa: E731
        *args, comparator=assert_array_equal
    )

    modin_result = pd.unique([2, 1, 3, 3])
    pandas_result = pandas.unique([2, 1, 3, 3])
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape

    modin_result = pd.unique(pd.Series([2] + [1] * 5))
    pandas_result = pandas.unique(pandas.Series([2] + [1] * 5))
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape

    modin_result = pd.unique(
        pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])
    )
    pandas_result = pandas.unique(
        pandas.Series([pandas.Timestamp("20160101"), pandas.Timestamp("20160101")])
    )
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape

    modin_result = pd.unique(
        pd.Series(
            [
                pd.Timestamp("20160101", tz="US/Eastern"),
                pd.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    pandas_result = pandas.unique(
        pandas.Series(
            [
                pandas.Timestamp("20160101", tz="US/Eastern"),
                pandas.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape

    modin_result = pd.unique(
        pd.Index(
            [
                pd.Timestamp("20160101", tz="US/Eastern"),
                pd.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    pandas_result = pandas.unique(
        pandas.Index(
            [
                pandas.Timestamp("20160101", tz="US/Eastern"),
                pandas.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape

    modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc"))))
    pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list("baabc"))))
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape


@pytest.mark.parametrize("normalize, bins, dropna", [(True, 3, False)])
def test_value_counts(normalize, bins, dropna):
    # We sort indices for Modin and pandas result because of issue #1650
    values = np.array([3, 1, 2, 3, 4, np.nan])
    modin_result = sort_index_for_equal_values(
        pd.value_counts(values, normalize=normalize, ascending=False), False
    )
    pandas_result = sort_index_for_equal_values(
        pandas.value_counts(values, normalize=normalize, ascending=False), False
    )
    df_equals(modin_result, pandas_result)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_result = sort_index_for_equal_values(
            pd.value_counts(values, bins=bins, ascending=False), False
        )
    pandas_result = sort_index_for_equal_values(
        pandas.value_counts(values, bins=bins, ascending=False), False
    )
    df_equals(modin_result, pandas_result)

    modin_result = sort_index_for_equal_values(
        pd.value_counts(values, dropna=dropna, ascending=True), True
    )
    pandas_result = sort_index_for_equal_values(
        pandas.value_counts(values, dropna=dropna, ascending=True), True
    )
    df_equals(modin_result, pandas_result)


def test_to_datetime():
    # DataFrame input for to_datetime
    modin_df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
    pandas_df = pandas.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
    df_equals(pd.to_datetime(modin_df), pandas.to_datetime(pandas_df))

    # Series input for to_datetime
    modin_s = pd.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000)
    pandas_s = pandas.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000)
    df_equals(pd.to_datetime(modin_s), pandas.to_datetime(pandas_s))

    # Other inputs for to_datetime
    value = 1490195805
    assert pd.to_datetime(value, unit="s") == pandas.to_datetime(value, unit="s")
    value = 1490195805433502912
    assert pd.to_datetime(value, unit="ns") == pandas.to_datetime(value, unit="ns")
    value = [1, 2, 3]
    assert pd.to_datetime(value, unit="D", origin=pd.Timestamp("2000-01-01")).equals(
        pandas.to_datetime(value, unit="D", origin=pandas.Timestamp("2000-01-01"))
    )


def test_to_datetime_inplace_side_effect():
    # See GH#3063
    times = list(range(1617993360, 1618193360))
    values = list(range(215441, 415441))
    modin_df = pd.DataFrame({"time": times, "value": values})
    pandas_df = pandas.DataFrame({"time": times, "value": values})
    df_equals(
        pd.to_datetime(modin_df["time"], unit="s"),
        pandas.to_datetime(pandas_df["time"], unit="s"),
    )


@pytest.mark.parametrize(
    "data, errors, downcast",
    [
        (["1.0", "2", -3], "raise", None),
        (["1.0", "2", -3], "raise", "float"),
        (["1.0", "2", -3], "raise", "signed"),
        (["apple", "1.0", "2", -3], "ignore", None),
        (["apple", "1.0", "2", -3], "coerce", None),
    ],
)
def test_to_numeric(data, errors, downcast):
    modin_series = pd.Series(data)
    pandas_series = pandas.Series(data)
    modin_result = pd.to_numeric(modin_series, errors=errors, downcast=downcast)
    pandas_result = pandas.to_numeric(pandas_series, errors=errors, downcast=downcast)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("retbins", bool_arg_values, ids=bool_arg_keys)
def test_qcut(retbins):
    # test case from https://github.com/modin-project/modin/issues/5610
    pandas_series = pandas.Series(range(10))
    modin_series = pd.Series(range(10))
    pandas_result = pandas.qcut(pandas_series, 4, retbins=retbins)
    # NOTE that qcut() defaults to pandas at the API layer.
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_result = pd.qcut(modin_series, 4, retbins=retbins)
    if retbins:
        df_equals(modin_result[0], pandas_result[0])
        df_equals(modin_result[0].cat.categories, pandas_result[0].cat.categories)
        assert_array_equal(modin_result[1], pandas_result[1])
    else:
        df_equals(modin_result, pandas_result)
        df_equals(modin_result.cat.categories, pandas_result.cat.categories)

    # test case for fallback to pandas, taken from pandas docs
    pandas_result = pandas.qcut(range(5), 4)
    modin_result = pd.qcut(range(5), 4)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "bins, labels",
    [
        pytest.param(
            [-int(1e18), -1000, 0, 1000, 2000, int(1e18)],
            [
                "-inf_to_-1000",
                "-1000_to_0",
                "0_to_1000",
                "1000_to_2000",
                "2000_to_inf",
            ],
            id="bin_list_spanning_entire_range_with_custom_labels",
        ),
        pytest.param(
            [-int(1e18), -1000, 0, 1000, 2000, int(1e18)],
            None,
            id="bin_list_spanning_entire_range_with_default_labels",
        ),
        pytest.param(
            [-1000, 0, 1000, 2000], None, id="bin_list_not_spanning_entire_range"
        ),
        pytest.param(
            10,
            [f"custom_label{i}" for i in range(9)],
            id="int_bin_10_with_custom_labels",
        ),
        pytest.param(1, None, id="int_bin_1_with_default_labels"),
        pytest.param(-1, None, id="int_bin_-1_with_default_labels"),
        pytest.param(111, None, id="int_bin_111_with_default_labels"),
    ],
)
@pytest.mark.parametrize("retbins", bool_arg_values, ids=bool_arg_keys)
def test_cut(retbins, bins, labels):
    # Would use `eval_general` here, but `eval_general` expects the operation
    # to be supported by Modin, and so errors out when we give the defaulting
    # to pandas UserWarning. We could get around this by using
    # @pytest.mark.filterwarnings("ignore"), but then `eval_general` fails because
    # sometimes the return type of pd.cut is an np.ndarray, and `eval_general` does
    # not know how to handle that.
    try:
        pd_result = pandas.cut(
            pandas.Series(range(1000)), retbins=retbins, bins=bins, labels=labels
        )
    except Exception as pd_e:
        with pytest.raises(Exception) as md_e:
            with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
                md_result = pd.cut(
                    pd.Series(range(1000)), retbins=retbins, bins=bins, labels=labels
                )
        assert isinstance(
            md_e.value, type(pd_e)
        ), f"Got Modin Exception type {type(md_e.value)}, but pandas Exception type {type(pd_e)} was expected"
    else:
        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            md_result = pd.cut(
                pd.Series(range(1000)), retbins=retbins, bins=bins, labels=labels
            )
        if not isinstance(pd_result, tuple):
            df_equals(md_result, pd_result)
        else:
            assert isinstance(
                md_result, tuple
            ), "Modin returned single value, but pandas returned tuple of values"
            for pd_res, md_res in zip(pd_result, md_result):
                if isinstance(pd_res, pandas.Series):
                    df_equals(pd_res, md_res)
                else:
                    np.testing.assert_array_equal(pd_res, md_res)


def test_cut_fallback():
    # Test case for falling back to pandas for cut.
    pandas_result = pandas.cut(range(5), 4)
    # note that we default to pandas at the API layer here, so we warn
    # regardless of whether we are on native execution.
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        modin_result = pd.cut(range(5), 4)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data", [test_data_values[0], []], ids=["test_data_values[0]", "[]"]
)
def test_to_pandas_indices(data):
    md_df = pd.DataFrame(data)
    index = pandas.MultiIndex.from_tuples(
        [(i, i * 2) for i in np.arange(len(md_df) + 1)], names=["A", "B"]
    ).drop(0)
    columns = pandas.MultiIndex.from_tuples(
        [(i, i * 2) for i in np.arange(len(md_df.columns) + 1)], names=["A", "B"]
    ).drop(0)

    md_df.index = index
    md_df.columns = columns

    pd_df = md_df._to_pandas()

    for axis in [0, 1]:
        assert md_df.axes[axis].equals(
            pd_df.axes[axis]
        ), f"Indices at axis {axis} are different!"
        assert not hasattr(md_df.axes[axis], "equal_levels") or md_df.axes[
            axis
        ].equal_levels(
            pd_df.axes[axis]
        ), f"Levels of indices at axis {axis} are different!"


def test_to_pandas_read_only_issue():
    df = pd.DataFrame(
        [
            [np.nan, 2, np.nan, 0],
            [3, 4, np.nan, 1],
            [np.nan, np.nan, np.nan, np.nan],
            [np.nan, 3, np.nan, 4],
        ],
        columns=list("ABCD"),
    )
    pdf = df._to_pandas()
    # there shouldn't be `ValueError: putmask: output array is read-only`
    pdf.fillna(0, inplace=True)


def test_to_numpy_read_only_issue():
    df = pd.DataFrame(
        [
            [np.nan, 2, np.nan, 0],
            [3, 4, np.nan, 1],
            [np.nan, np.nan, np.nan, np.nan],
            [np.nan, 3, np.nan, 4],
        ],
        columns=list("ABCD"),
    )
    arr = df.to_numpy()
    # there shouldn't be `ValueError: putmask: output array is read-only`
    np.putmask(arr, np.isnan(arr), 0)


def test_create_categorical_dataframe_with_duplicate_column_name():
    # This tests for https://github.com/modin-project/modin/issues/4312
    pd_df = pandas.DataFrame(
        {
            "a": pandas.Categorical([1, 2]),
            "b": [4, 5],
            "c": pandas.Categorical([7, 8]),
        }
    )
    pd_df.columns = ["a", "b", "a"]
    md_df = pd.DataFrame(pd_df)
    # Use assert_frame_equal instead of the common modin util df_equals because
    # we should check dtypes of the new categorical with check_dtype=True.
    # TODO(https://github.com/modin-project/modin/issues/3804): Make
    # df_equals set check_dtype=True and use df_equals instead.
    assert_frame_equal(
        md_df._to_pandas(),
        pd_df,
        check_dtype=True,
        check_index_type=True,
        check_column_type=True,
        check_names=True,
        check_categorical=True,
    )


@pytest.mark.skipif(
    get_current_execution() != "BaseOnPython",
    reason="This test make sense only on BaseOnPython execution.",
)
@pytest.mark.parametrize(
    "func, regex",
    [
        (lambda df: df.mean(), r"DataFrame\.mean"),
        (lambda df: df + df, r"DataFrame\.add"),
        (lambda df: df.index, r"DataFrame\.get_axis\(0\)"),
        (
            lambda df: df.drop(columns="col1").squeeze().repeat(2),
            r"Series\.repeat",
        ),
        (lambda df: df.groupby("col1").prod(), r"GroupBy\.prod"),
        (lambda df: df.rolling(1).count(), r"Rolling\.count"),
    ],
)
def test_default_to_pandas_warning_message(func, regex):
    data = {"col1": [1, 2, 3], "col2": [4, 5, 6]}
    df = pd.DataFrame(data)

    with pytest.warns(UserWarning, match=regex):
        func(df)


def test_empty_dataframe():
    df = pd.DataFrame(columns=["a", "b"])
    # NOTE that we default to pandas at the API layer.
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df[(df.a == 1) & (df.b == 2)]


def test_empty_series():
    s = pd.Series([])
    pd.to_numeric(s)


@pytest.mark.parametrize(
    "arg",
    [[1, 2], ["a"], 1, "a"],
    ids=["list_of_ints", "list_of_invalid_strings", "scalar", "invalid_scalar"],
)
def test_to_timedelta(arg, request):
    # This test case comes from
    # https://github.com/modin-project/modin/issues/4966
    expected_exception = None
    if request.node.callspec.id == "list_of_invalid_strings":
        expected_exception = ValueError("Could not convert 'a' to NumPy timedelta")
    elif request.node.callspec.id == "invalid_scalar":
        expected_exception = ValueError("unit abbreviation w/o a number")
    eval_general(
        pd,
        pandas,
        lambda lib: lib.to_timedelta(arg),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_series_to_timedelta(data):
    def make_frame(lib):
        series = lib.Series(
            next(iter(data.values())) if isinstance(data, dict) else data
        )
        return lib.to_timedelta(series).to_frame(name="timedelta")

    eval_general(pd, pandas, make_frame)


@pytest.mark.parametrize(
    "key",
    [["col0"], "col0", "col1"],
    ids=["valid_list_of_string", "valid_string", "invalid_string"],
)
def test_get(key):
    modin_df, pandas_df = create_test_dfs({"col0": [0, 1]})
    eval_general(modin_df, pandas_df, lambda df: df.get(key))


@pytest.mark.xfail(
    condition=is_native_shallow_copy(),
    reason="native pandas backend does not deep copy inputs by default",
    strict=True,
)
def test_df_immutability():
    """
    Verify that modifications of the source data doesn't propagate to Modin's DataFrame objects.
    """
    src_data = pandas.DataFrame({"a": [1]})

    md_df = pd.DataFrame(src_data)
    src_data.iloc[0, 0] = 100

    assert md_df._to_pandas().iloc[0, 0] == 1


def test_np_array_function():
    # first argument is a numpy array, second argument is modin frame
    assert_array_equal(
        np.where(np.array([1, 0]), pd.Series([9, 9]), [-1, -1]), np.array([9, -1])
    )
    # multiple arguments are modin objects
    assert_array_equal(
        np.where(pd.DataFrame([[1, 0]]), pd.Series([9, 9]), [-1, -1]),
        np.array([[9, -1]]),
    )


================================================
FILE: modin/tests/pandas/test_groupby.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import datetime
import itertools
from unittest import mock

import numpy as np
import pandas
import pandas._libs.lib as lib
import pytest

import modin.pandas as pd
from modin.config import (
    IsRayCluster,
    NPartitions,
    RangePartitioning,
    StorageFormat,
    context,
)
from modin.core.dataframe.algebra.default2pandas.groupby import GroupBy
from modin.core.dataframe.pandas.partitioning.axis_partition import (
    PandasDataframeAxisPartition,
)
from modin.core.storage_formats.pandas.query_compiler_caster import (
    _assert_casting_functions_wrap_same_implementation,
)
from modin.pandas.io import from_pandas
from modin.pandas.utils import is_scalar
from modin.tests.test_utils import (
    current_execution_is_native,
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)
from modin.utils import (
    MODIN_UNNAMED_SERIES_LABEL,
    get_current_execution,
    hashable,
    try_cast_to_pandas,
)

from .utils import (
    assert_set_of_rows_identical,
    check_df_columns_have_nans,
    create_test_dfs,
    create_test_series,
    default_to_pandas_ignore_string,
    df_equals,
    dict_equals,
    eval_general,
    generate_multiindex,
    modin_df_almost_equals_pandas,
    test_data,
    test_data_values,
    test_groupby_data,
    try_modin_df_almost_equals_compare,
    value_equals,
)

NPartitions.put(4)

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
# of defaulting to pandas.
pytestmark = [
    pytest.mark.filterwarnings(default_to_pandas_ignore_string),
    # TO MAKE SURE ALL FUTUREWARNINGS ARE CONSIDERED
    pytest.mark.filterwarnings("error::FutureWarning"),
    # ... except for this expected Ray warning due to https://github.com/ray-project/ray/issues/54868
    pytest.mark.filterwarnings(
        "ignore:.*In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None:FutureWarning"
    ),
    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT
    pytest.mark.filterwarnings(
        "ignore:DataFrame.groupby with axis=1 is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:DataFrameGroupBy.dtypes is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:DataFrameGroupBy.diff with axis=1 is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:DataFrameGroupBy.pct_change with axis=1 is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:The 'fill_method' keyword being not None and the 'limit' keyword "
        + "in (DataFrame|DataFrameGroupBy).pct_change are deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:DataFrameGroupBy.shift with axis=1 is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:(DataFrameGroupBy|SeriesGroupBy).fillna is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:(DataFrame|Series).fillna with 'method' is deprecated:FutureWarning"
    ),
    # FIXME: these cases inconsistent between modin and pandas
    pytest.mark.filterwarnings(
        "ignore:A grouping was used that is not in the columns of the DataFrame and so was excluded from the result:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:The default of observed=False is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:.*DataFrame.idxmax with all-NA values, or any-NA and skipna=False, is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:.*DataFrame.idxmin with all-NA values, or any-NA and skipna=False, is deprecated:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:.*In a future version of pandas, the provided callable will be used directly.*:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:(DataFrameGroupBy|SeriesGroupBy).apply operated on the grouping columns:FutureWarning"
    ),
]


def get_external_groupers(df, columns, drop_from_original_df=False, add_plus_one=False):
    """
    Construct ``by`` argument containing external groupers.

    Parameters
    ----------
    df : pandas.DataFrame or modin.pandas.DataFrame
    columns : list[tuple[bool, str]]
        Columns to group on. If ``True`` do ``df[col]``, otherwise keep the column name.
        '''
        >>> columns = [(True, "a"), (False, "b")]
        >>> get_external_groupers(df, columns)
        [
            pandas.Series(..., name="a"),
            "b"
        ]
        '''
    drop_from_original_df : bool, default: False
        Whether to drop selected external columns from `df`.
    add_plus_one : bool, default: False
        Whether to do ``df[name] + 1`` for external groupers (so they won't be considered as
        sibling with `df`).

    Returns
    -------
    new_df : pandas.DataFrame or modin.pandas.DataFrame
        If `drop_from_original_df` was True, returns a new dataframe with
        dropped external columns, otherwise returns `df`.
    by : list
        Groupers to pass to `df.groupby(by)`.
    """
    new_df = df
    by = []
    for lookup, name in columns:
        if lookup:
            ser = df[name].copy()
            if add_plus_one:
                ser = ser + 1
            by.append(ser)
            if drop_from_original_df:
                new_df = new_df.drop(columns=[name])
        else:
            by.append(name)
    return new_df, by


def modin_groupby_equals_pandas(modin_groupby, pandas_groupby):
    eval_general(
        modin_groupby, pandas_groupby, lambda grp: grp.indices, comparator=dict_equals
    )
    # FIXME: https://github.com/modin-project/modin/issues/7032
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda grp: grp.groups,
        comparator=dict_equals,
        expected_exception=False,
    )

    for g1, g2 in itertools.zip_longest(modin_groupby, pandas_groupby):
        value_equals(g1[0], g2[0])
        df_equals(g1[1], g2[1])


def eval_aggregation(md_df, pd_df, operation=None, by=None, *args, **kwargs):
    if by is None:
        by = md_df.columns[0]
    if operation is None:
        operation = {}
    return eval_general(
        md_df,
        pd_df,
        lambda df, *args, **kwargs: df.groupby(by=by).agg(operation, *args, **kwargs),
        *args,
        **kwargs,
    )


def build_types_asserter(comparator):
    def wrapper(obj1, obj2, *args, **kwargs):
        error_str = f"obj1 and obj2 has incorrect types: {type(obj1)} and {type(obj2)}"
        assert not (is_scalar(obj1) ^ is_scalar(obj2)), error_str
        assert obj1.__module__.split(".")[0] == "modin", error_str
        assert obj2.__module__.split(".")[0] == "pandas", error_str
        comparator(obj1, obj2, *args, **kwargs)

    return wrapper


@pytest.mark.parametrize("as_index", [True, False])
def test_mixed_dtypes_groupby(as_index):
    frame_data = np.random.RandomState(42).randint(97, 198, size=(2**6, 2**4))
    pandas_df = pandas.DataFrame(frame_data).add_prefix("col")
    # Convert every other column to string
    for col in pandas_df.iloc[
        :, [i for i in range(len(pandas_df.columns)) if i % 2 == 0]
    ]:
        pandas_df[col] = [str(chr(i)) for i in pandas_df[col]]
    modin_df = from_pandas(pandas_df)

    n = 1

    by_values = [
        ("col1",),
        (lambda x: x % 2,),
        (modin_df["col0"].copy(), pandas_df["col0"].copy()),
        ("col3",),
    ]

    for by in by_values:
        if isinstance(by[0], str) and by[0] == "col3":
            modin_groupby = modin_df.set_index(by[0]).groupby(
                by=by[0], as_index=as_index
            )
            pandas_groupby = pandas_df.set_index(by[0]).groupby(
                by=by[-1], as_index=as_index
            )
            # difference in behaviour between .groupby().ffill() and
            # .groupby.fillna(method='ffill') on duplicated indices
            # caused by https://github.com/pandas-dev/pandas/issues/43412
            # is hurting the tests, for now sort the frames
            md_sorted_grpby = (
                modin_df.set_index(by[0])
                .sort_index()
                .groupby(by=by[0], as_index=as_index)
            )
            pd_sorted_grpby = (
                pandas_df.set_index(by[0])
                .sort_index()
                .groupby(by=by[0], as_index=as_index)
            )
        else:
            modin_groupby = modin_df.groupby(by=by[0], as_index=as_index)
            pandas_groupby = pandas_df.groupby(by=by[-1], as_index=as_index)
            md_sorted_grpby, pd_sorted_grpby = modin_groupby, pandas_groupby

        modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
        eval_ngroups(modin_groupby, pandas_groupby)
        eval_general(
            md_sorted_grpby,
            pd_sorted_grpby,
            lambda df: df.ffill(),
            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),
        )
        # FIXME: https://github.com/modin-project/modin/issues/7032
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.sem(),
            modin_df_almost_equals_pandas,
            expected_exception=False,
        )
        eval_general(
            modin_groupby, pandas_groupby, lambda df: df.sample(random_state=1)
        )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.ewm(com=0.5).std(),
            expected_exception=pandas.errors.DataError(
                "Cannot aggregate non-numeric type: object"
            ),
        )
        eval_shift(
            modin_groupby,
            pandas_groupby,
            comparator=(
                # We should sort the result before comparison for transform functions
                # in case of range-partitioning groupby (https://github.com/modin-project/modin/issues/5924).
                # This test though produces so much NaN values in the result, so it's impossible to sort,
                # using manual comparison of set of rows instead
                assert_set_of_rows_identical
                if RangePartitioning.get()
                else None
            ),
        )
        eval_mean(modin_groupby, pandas_groupby, numeric_only=True)
        eval_any(modin_groupby, pandas_groupby)
        eval_min(modin_groupby, pandas_groupby)
        eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())
        eval_ndim(modin_groupby, pandas_groupby)
        eval_cumsum(modin_groupby, pandas_groupby, numeric_only=True)
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.pct_change(),
            modin_df_almost_equals_pandas,
            # FIXME: https://github.com/modin-project/modin/issues/7032
            expected_exception=False,
        )
        eval_cummax(modin_groupby, pandas_groupby, numeric_only=True)

        # TODO Add more apply functions
        apply_functions = [lambda df: df.sum(), min]
        for func in apply_functions:
            eval_apply(modin_groupby, pandas_groupby, func)

        eval_dtypes(modin_groupby, pandas_groupby)
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.first(),
            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),
        )
        eval_cummin(modin_groupby, pandas_groupby, numeric_only=True)
        eval_general(
            md_sorted_grpby,
            pd_sorted_grpby,
            lambda df: df.bfill(),
            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),
        )
        # numeric_only=False doesn't work
        eval_general(
            modin_groupby, pandas_groupby, lambda df: df.idxmin(numeric_only=True)
        )
        eval_prod(modin_groupby, pandas_groupby, numeric_only=True)
        if as_index:
            eval_std(modin_groupby, pandas_groupby, numeric_only=True)
            eval_var(modin_groupby, pandas_groupby, numeric_only=True)
            eval_skew(modin_groupby, pandas_groupby, numeric_only=True)

        agg_functions = [
            lambda df: df.sum(),
            "min",
            min,
            "max",
            max,
            sum,
            {"col2": "sum"},
            {"col2": sum},
            {"col2": "max", "col4": "sum", "col5": "min"},
            {"col2": max, "col4": sum, "col5": "min"},
            # Intersection of 'by' and agg cols for TreeReduce impl
            {"col0": "count", "col1": "count", "col2": "count"},
            # Intersection of 'by' and agg cols for FullAxis impl
            {"col0": "nunique", "col1": "nunique", "col2": "nunique"},
        ]
        for func in agg_functions:
            eval_agg(modin_groupby, pandas_groupby, func)

        eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
        eval_max(modin_groupby, pandas_groupby)
        eval_len(modin_groupby, pandas_groupby)
        eval_sum(modin_groupby, pandas_groupby)
        if not RangePartitioning.get():
            # `.group` fails with experimental groupby
            # https://github.com/modin-project/modin/issues/6083
            eval_ngroup(modin_groupby, pandas_groupby)
        eval_nunique(modin_groupby, pandas_groupby)
        eval_value_counts(modin_groupby, pandas_groupby)
        eval_median(modin_groupby, pandas_groupby, numeric_only=True)
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.head(n),
            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),
        )
        eval_cumprod(modin_groupby, pandas_groupby, numeric_only=True)
        # numeric_only=False doesn't work
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.cov(numeric_only=True),
            modin_df_almost_equals_pandas,
        )

        transform_functions = [lambda df: df, lambda df: df + df]
        for func in transform_functions:
            eval_transform(modin_groupby, pandas_groupby, func)

        pipe_functions = [lambda dfgb: dfgb.sum()]
        for func in pipe_functions:
            eval_pipe(modin_groupby, pandas_groupby, func)

        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.corr(numeric_only=True),
            modin_df_almost_equals_pandas,
        )
        eval_fillna(modin_groupby, pandas_groupby)
        eval_count(modin_groupby, pandas_groupby)
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.tail(n),
            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),
        )
        eval_quantile(modin_groupby, pandas_groupby)
        eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))
        eval___getattr__(modin_groupby, pandas_groupby, "col2")
        eval_groups(modin_groupby, pandas_groupby)


class GetColumn:
    """Indicate to the test that it should do gc(df)."""

    def __init__(self, name):
        self.name = name

    def __call__(self, df):
        return df[self.name]


def test_aggregate_alias():
    # It's optimization. If failed, groupby().aggregate should be tested explicitly
    from modin.pandas.groupby import DataFrameGroupBy, SeriesGroupBy

    _assert_casting_functions_wrap_same_implementation(
        DataFrameGroupBy.aggregate, DataFrameGroupBy.agg
    )
    _assert_casting_functions_wrap_same_implementation(
        SeriesGroupBy.aggregate, SeriesGroupBy.agg
    )


@pytest.mark.parametrize(
    "by",
    [
        [1, 2, 1, 2],
        lambda x: x % 3,
        "col1",
        ["col1"],
        # col2 contains NaN, is it necessary to test functions like size()
        "col2",
        ["col2"],  # 5
        pytest.param(
            ["col1", "col2"],
            marks=pytest.mark.xfail(reason="Excluded because of bug #1554"),
        ),
        pytest.param(
            ["col2", "col4"],
            marks=pytest.mark.xfail(reason="Excluded because of bug #1554"),
        ),
        pytest.param(
            ["col4", "col2"],
            marks=pytest.mark.xfail(reason="Excluded because of bug #1554"),
        ),
        pytest.param(
            ["col3", "col4", "col2"],
            marks=pytest.mark.xfail(reason="Excluded because of bug #1554"),
        ),
        # but cum* functions produce undefined results with NaNs so we need to test the same combinations without NaN too
        ["col5"],  # 10
        ["col1", "col5"],
        ["col5", "col4"],
        ["col4", "col5"],
        ["col5", "col4", "col1"],
        ["col1", pd.Series([1, 5, 7, 8])],  # 15
        [pd.Series([1, 5, 7, 8])],
        [
            pd.Series([1, 5, 7, 8]),
            pd.Series([1, 5, 7, 8]),
            pd.Series([1, 5, 7, 8]),
            pd.Series([1, 5, 7, 8]),
            pd.Series([1, 5, 7, 8]),
        ],
        ["col1", GetColumn("col5")],
        [GetColumn("col1"), GetColumn("col5")],
        [GetColumn("col1")],  # 20
    ],
)
@pytest.mark.parametrize("as_index", [True, False], ids=lambda v: f"as_index={v}")
@pytest.mark.parametrize(
    "col1_category", [True, False], ids=lambda v: f"col1_category={v}"
)
def test_simple_row_groupby(by, as_index, col1_category):
    pandas_df = pandas.DataFrame(
        {
            "col1": [0, 1, 2, 3],
            "col2": [4, 5, np.nan, 7],
            "col3": [np.nan, np.nan, 12, 10],
            "col4": [17, 13, 16, 15],
            "col5": [-4, -5, -6, -7],
        }
    )

    if col1_category:
        pandas_df = pandas_df.astype({"col1": "category"})
        # As of pandas 1.4.0 operators like min cause TypeErrors to be raised on unordered
        # categorical columns. We need to specify the categorical column as ordered to bypass this.
        pandas_df["col1"] = pandas_df["col1"].cat.as_ordered()

    modin_df = from_pandas(pandas_df)
    n = 1

    def maybe_get_columns(df, by):
        if isinstance(by, list):
            return [o(df) if isinstance(o, GetColumn) else o for o in by]
        else:
            return by

    modin_groupby = modin_df.groupby(
        by=maybe_get_columns(modin_df, by), as_index=as_index
    )

    pandas_by = maybe_get_columns(pandas_df, try_cast_to_pandas(by))
    pandas_groupby = pandas_df.groupby(by=pandas_by, as_index=as_index)

    modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
    eval_ngroups(modin_groupby, pandas_groupby)
    eval_shift(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())
    if as_index:
        eval_general(modin_groupby, pandas_groupby, lambda df: df.nth(0))
    else:
        # FIXME: df.groupby(as_index=False).nth() does not produce correct index in Modin,
        #        it should maintain values from df.index, not create a new one or re-order it;
        #        it also produces completely wrong result for multi-column `by` :(
        if not isinstance(pandas_by, list) or len(pandas_by) <= 1:
            eval_general(
                modin_groupby,
                pandas_groupby,
                lambda df: df.nth(0).sort_values("col1").reset_index(drop=True),
            )

    expected_exception = None
    if col1_category:
        expected_exception = TypeError(
            "category dtype does not support aggregation 'sem'"
        )
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.sem(),
        modin_df_almost_equals_pandas,
        expected_exception=expected_exception,
    )
    eval_mean(modin_groupby, pandas_groupby, numeric_only=True)
    eval_any(modin_groupby, pandas_groupby)
    eval_min(modin_groupby, pandas_groupby)
    # FIXME: https://github.com/modin-project/modin/issues/7033
    eval_general(
        modin_groupby, pandas_groupby, lambda df: df.idxmax(), expected_exception=False
    )
    eval_ndim(modin_groupby, pandas_groupby)
    if not check_df_columns_have_nans(modin_df, by):
        # cum* functions produce undefined results for columns with NaNs so we run them only when "by" columns contain no NaNs

        expected_exception = None
        if col1_category:
            expected_exception = TypeError(
                "category type does not support cumsum operations"
            )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.cumsum(),
            expected_exception=expected_exception,
        )
        expected_exception = None
        if col1_category:
            expected_exception = TypeError(
                "category type does not support cummax operations"
            )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.cummax(),
            expected_exception=expected_exception,
        )
        expected_exception = None
        if col1_category:
            expected_exception = TypeError(
                "category type does not support cummin operations"
            )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.cummin(),
            expected_exception=expected_exception,
        )
        expected_exception = None
        if col1_category:
            expected_exception = TypeError(
                "category type does not support cumprod operations"
            )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.cumprod(),
            expected_exception=expected_exception,
        )
        expected_exception = None
        if col1_category:
            expected_exception = TypeError(
                "category type does not support cumcount operations"
            )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.cumcount(),
            expected_exception=expected_exception,
        )

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.pct_change(
            periods=2, fill_method="bfill", limit=1, freq=None, axis=1
        ),
        modin_df_almost_equals_pandas,
    )

    apply_functions = [
        lambda df: df.sum(numeric_only=True),
        lambda df: pandas.Series([1, 2, 3, 4], name="result"),
        min,
    ]
    for func in apply_functions:
        eval_apply(modin_groupby, pandas_groupby, func)

    eval_dtypes(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
    eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
    # FIXME: https://github.com/modin-project/modin/issues/7033
    eval_general(
        modin_groupby, pandas_groupby, lambda df: df.idxmin(), expected_exception=False
    )
    expected_exception = None
    if col1_category:
        expected_exception = TypeError("category type does not support prod operations")
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda grp: grp.prod(),
        expected_exception=expected_exception,
    )

    if as_index:
        eval_std(modin_groupby, pandas_groupby, numeric_only=True)
        eval_var(modin_groupby, pandas_groupby, numeric_only=True)
        eval_skew(modin_groupby, pandas_groupby, numeric_only=True)

    agg_functions = [
        lambda df: df.sum(),
        "min",
        "max",
        min,
        sum,
        # Intersection of 'by' and agg cols for TreeReduce impl
        {"col1": "count", "col2": "count"},
        # Intersection of 'by' and agg cols for FullAxis impl
        {"col1": "nunique", "col2": "nunique"},
    ]
    for func in agg_functions:
        # Pandas raises an exception when 'by' contains categorical key and `as_index=False`
        # because of this bug: https://github.com/pandas-dev/pandas/issues/36698
        # Modin correctly processes the result
        is_pandas_bug_case = not as_index and col1_category and isinstance(func, dict)
        expected_exception = None
        if col1_category:
            # FIXME: https://github.com/modin-project/modin/issues/7033
            expected_exception = False
        if not is_pandas_bug_case:
            eval_general(
                modin_groupby,
                pandas_groupby,
                lambda grp: grp.agg(func),
                expected_exception=expected_exception,
            )

    eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
    eval_general(modin_groupby, pandas_groupby, lambda df: df.rank())
    eval_max(modin_groupby, pandas_groupby)
    eval_len(modin_groupby, pandas_groupby)
    expected_exception = None
    if col1_category:
        expected_exception = TypeError("category type does not support sum operations")
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.sum(),
        expected_exception=expected_exception,
    )

    eval_ngroup(modin_groupby, pandas_groupby)
    # Pandas raising exception when 'by' contains categorical key and `as_index=False`
    # because of a bug: https://github.com/pandas-dev/pandas/issues/36698
    # Modin correctly processes the result
    if not (col1_category and not as_index):
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.nunique(),
        )
    expected_exception = None
    if col1_category:
        expected_exception = TypeError(
            "category dtype does not support aggregation 'median'"
        )
    # TypeError: category type does not support median operations
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.median(),
        modin_df_almost_equals_pandas,
        expected_exception=expected_exception,
    )

    eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.cov(),
        modin_df_almost_equals_pandas,
    )

    if not check_df_columns_have_nans(modin_df, by):
        # Pandas groupby.transform does not work correctly with NaN values in grouping columns. See Pandas bug 17093.
        transform_functions = [lambda df: df + 4, lambda df: -df - 10]
        for idx, func in enumerate(transform_functions):
            expected_exception = None
            if col1_category:
                if idx == 0:
                    expected_exception = TypeError(
                        "unsupported operand type(s) for +: 'Categorical' and 'int'"
                    )
                elif idx == 1:
                    expected_exception = TypeError(
                        "bad operand type for unary -: 'Categorical'"
                    )
            eval_general(
                modin_groupby,
                pandas_groupby,
                lambda df: df.transform(func),
                expected_exception=expected_exception,
            )

    pipe_functions = [lambda dfgb: dfgb.sum()]
    for func in pipe_functions:
        expected_exception = None
        if col1_category:
            expected_exception = TypeError(
                "category type does not support sum operations"
            )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.pipe(func),
            expected_exception=expected_exception,
        )

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.corr(),
        modin_df_almost_equals_pandas,
    )
    eval_fillna(modin_groupby, pandas_groupby)
    eval_count(modin_groupby, pandas_groupby)
    if get_current_execution() != "BaseOnPython" and not current_execution_is_native():
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.size(),
        )
    eval_general(modin_groupby, pandas_groupby, lambda df: df.tail(n))
    eval_quantile(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))
    if isinstance(by, list) and not any(
        isinstance(o, (pd.Series, pandas.Series)) for o in by
    ):
        # Not yet supported for non-original-column-from-dataframe Series in by:
        eval___getattr__(modin_groupby, pandas_groupby, "col3")
        # FIXME: https://github.com/modin-project/modin/issues/7033
        eval___getitem__(
            modin_groupby, pandas_groupby, "col3", expected_exception=False
        )
    eval_groups(modin_groupby, pandas_groupby)
    # Intersection of the selection and 'by' columns is not yet supported
    non_by_cols = (
        # Potential selection starts only from the second column, because the first may
        # be categorical in this test, which is not yet supported
        [col for col in pandas_df.columns[1:] if col not in modin_groupby._internal_by]
        if isinstance(by, list)
        else ["col3", "col4"]
    )
    # FIXME: https://github.com/modin-project/modin/issues/7033
    eval___getitem__(
        modin_groupby, pandas_groupby, non_by_cols, expected_exception=False
    )
    # When GroupBy.__getitem__ meets an intersection of the selection and 'by' columns
    # it throws a warning with the suggested workaround. The following code tests
    # that this workaround works as expected.
    if len(modin_groupby._internal_by) != 0:
        if not isinstance(by, list):
            by = [by]
        by_from_workaround = [
            (
                modin_df[getattr(col, "name", col)].copy()
                if (hashable(col) and col in modin_groupby._internal_by)
                or isinstance(col, GetColumn)
                else col
            )
            for col in by
        ]
        # GroupBy result with 'as_index=False' depends on the 'by' origin, since we forcibly changed
        # the origin of 'by' for modin by doing a copy, set 'as_index=True' to compare results.
        modin_groupby = modin_df.groupby(
            maybe_get_columns(modin_df, by_from_workaround), as_index=True
        )
        pandas_groupby = pandas_df.groupby(pandas_by, as_index=True)
        eval___getitem__(
            modin_groupby,
            pandas_groupby,
            list(modin_groupby._internal_by) + non_by_cols[:1],
        )


def test_single_group_row_groupby():
    pandas_df = pandas.DataFrame(
        {
            "col1": [0, 1, 2, 3],
            "col2": [4, 5, 36, 7],
            "col3": [3, 8, 12, 10],
            "col4": [17, 3, 16, 15],
            "col5": [-4, 5, -6, -7],
        }
    )

    modin_df = from_pandas(pandas_df)

    by = ["1", "1", "1", "1"]
    n = 6

    modin_groupby = modin_df.groupby(by=by)
    pandas_groupby = pandas_df.groupby(by=by)

    modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
    eval_ngroups(modin_groupby, pandas_groupby)
    eval_shift(modin_groupby, pandas_groupby)
    eval_skew(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.sem(),
        modin_df_almost_equals_pandas,
    )
    eval_mean(modin_groupby, pandas_groupby)
    eval_any(modin_groupby, pandas_groupby)
    eval_min(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())
    eval_ndim(modin_groupby, pandas_groupby)
    eval_cumsum(modin_groupby, pandas_groupby)
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.pct_change(),
        modin_df_almost_equals_pandas,
    )
    eval_cummax(modin_groupby, pandas_groupby)

    apply_functions = [lambda df: df.sum(), lambda df: -df]
    for func in apply_functions:
        eval_apply(modin_groupby, pandas_groupby, func)

    eval_dtypes(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
    eval_cummin(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
    eval_prod(modin_groupby, pandas_groupby)
    eval_std(modin_groupby, pandas_groupby)

    agg_functions = [
        lambda df: df.sum(),
        "min",
        "max",
        max,
        sum,
        {"col2": "sum"},
        {"col2": "max", "col4": "sum", "col5": "min"},
    ]
    for func in agg_functions:
        eval_agg(modin_groupby, pandas_groupby, func)

    eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
    eval_rank(modin_groupby, pandas_groupby)
    eval_max(modin_groupby, pandas_groupby)
    eval_var(modin_groupby, pandas_groupby)
    eval_len(modin_groupby, pandas_groupby)
    eval_sum(modin_groupby, pandas_groupby)
    eval_ngroup(modin_groupby, pandas_groupby)
    eval_nunique(modin_groupby, pandas_groupby)
    eval_value_counts(modin_groupby, pandas_groupby)
    eval_median(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))
    eval_cumprod(modin_groupby, pandas_groupby)
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.cov(),
        modin_df_almost_equals_pandas,
    )

    transform_functions = [lambda df: df + 4, lambda df: -df - 10]
    for func in transform_functions:
        eval_transform(modin_groupby, pandas_groupby, func)

    pipe_functions = [lambda dfgb: dfgb.sum()]
    for func in pipe_functions:
        eval_pipe(modin_groupby, pandas_groupby, func)

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.corr(),
        modin_df_almost_equals_pandas,
    )
    eval_fillna(modin_groupby, pandas_groupby)
    eval_count(modin_groupby, pandas_groupby)
    eval_size(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.tail(n))
    eval_quantile(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))
    eval___getattr__(modin_groupby, pandas_groupby, "col2")
    eval_groups(modin_groupby, pandas_groupby)


@pytest.mark.parametrize("is_by_category", [True, False])
def test_large_row_groupby(is_by_category):
    pandas_df = pandas.DataFrame(
        np.random.randint(0, 8, size=(100, 4)), columns=list("ABCD")
    )

    modin_df = from_pandas(pandas_df)

    by = [str(i) for i in pandas_df["A"].tolist()]

    if is_by_category:
        by = pandas.Categorical(by)

    n = 4

    modin_groupby = modin_df.groupby(by=by)
    pandas_groupby = pandas_df.groupby(by=by)

    modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
    eval_ngroups(modin_groupby, pandas_groupby)
    eval_shift(modin_groupby, pandas_groupby)
    eval_skew(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.sem(),
        modin_df_almost_equals_pandas,
    )
    eval_mean(modin_groupby, pandas_groupby)
    eval_any(modin_groupby, pandas_groupby)
    eval_min(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())
    eval_ndim(modin_groupby, pandas_groupby)
    eval_cumsum(modin_groupby, pandas_groupby)

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.diff(periods=2),
    )
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.diff(periods=-1),
    )
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.diff(axis=1),
    )

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.pct_change(),
        modin_df_almost_equals_pandas,
    )
    eval_cummax(modin_groupby, pandas_groupby)

    apply_functions = [lambda df: df.sum(), lambda df: -df]
    for func in apply_functions:
        eval_apply(modin_groupby, pandas_groupby, func)

    eval_dtypes(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
    eval_cummin(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
    # eval_prod(modin_groupby, pandas_groupby) causes overflows
    eval_std(modin_groupby, pandas_groupby)

    agg_functions = [
        lambda df: df.sum(),
        "min",
        "max",
        min,
        sum,
        {"A": "sum"},
        {"A": lambda df: df.sum()},
        {"A": "max", "B": "sum", "C": "min"},
    ]
    for func in agg_functions:
        eval_agg(modin_groupby, pandas_groupby, func)

    eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
    eval_rank(modin_groupby, pandas_groupby)
    eval_max(modin_groupby, pandas_groupby)
    eval_var(modin_groupby, pandas_groupby)
    eval_len(modin_groupby, pandas_groupby)
    eval_sum(modin_groupby, pandas_groupby)
    eval_ngroup(modin_groupby, pandas_groupby)
    eval_nunique(modin_groupby, pandas_groupby)
    eval_value_counts(modin_groupby, pandas_groupby)
    eval_median(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))
    # eval_cumprod(modin_groupby, pandas_groupby) causes overflows
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.cov(),
        modin_df_almost_equals_pandas,
    )

    transform_functions = [lambda df: df + 4, lambda df: -df - 10]
    for func in transform_functions:
        eval_transform(modin_groupby, pandas_groupby, func)

    pipe_functions = [lambda dfgb: dfgb.sum()]
    for func in pipe_functions:
        eval_pipe(modin_groupby, pandas_groupby, func)

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.corr(),
        modin_df_almost_equals_pandas,
    )
    eval_fillna(modin_groupby, pandas_groupby)
    eval_count(modin_groupby, pandas_groupby)
    eval_size(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.tail(n))
    eval_quantile(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))
    eval_groups(modin_groupby, pandas_groupby)


def test_simple_col_groupby():
    pandas_df = pandas.DataFrame(
        {
            "col1": [0, 3, 2, 3],
            "col2": [4, 1, 6, 7],
            "col3": [3, 8, 2, 10],
            "col4": [1, 13, 6, 15],
            "col5": [-4, 5, 6, -7],
        }
    )

    modin_df = from_pandas(pandas_df)

    by = [1, 2, 3, 2, 1]

    modin_groupby = modin_df.groupby(axis=1, by=by)
    pandas_groupby = pandas_df.groupby(axis=1, by=by)

    modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
    eval_ngroups(modin_groupby, pandas_groupby)
    eval_shift(modin_groupby, pandas_groupby)
    eval_skew(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.sem(),
        modin_df_almost_equals_pandas,
    )
    eval_mean(modin_groupby, pandas_groupby)
    eval_any(modin_groupby, pandas_groupby)
    eval_min(modin_groupby, pandas_groupby)
    eval_ndim(modin_groupby, pandas_groupby)

    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())
    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
    eval_quantile(modin_groupby, pandas_groupby)

    # https://github.com/pandas-dev/pandas/issues/21127
    # eval_cumsum(modin_groupby, pandas_groupby)
    # eval_cummax(modin_groupby, pandas_groupby)
    # eval_cummin(modin_groupby, pandas_groupby)
    # eval_cumprod(modin_groupby, pandas_groupby)

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.pct_change(),
        modin_df_almost_equals_pandas,
    )
    apply_functions = [lambda df: -df, lambda df: df.sum(axis=1)]
    for func in apply_functions:
        eval_apply(modin_groupby, pandas_groupby, func)

    eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
    eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
    eval_prod(modin_groupby, pandas_groupby)
    eval_std(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
    eval_max(modin_groupby, pandas_groupby)
    eval_var(modin_groupby, pandas_groupby)
    eval_len(modin_groupby, pandas_groupby)
    eval_sum(modin_groupby, pandas_groupby)

    # Pandas fails on this case with ValueError
    # eval_ngroup(modin_groupby, pandas_groupby)
    # eval_nunique(modin_groupby, pandas_groupby)
    # NotImplementedError: DataFrameGroupBy.value_counts only handles axis=0
    # eval_value_counts(modin_groupby, pandas_groupby)
    eval_median(modin_groupby, pandas_groupby)
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.cov(),
        modin_df_almost_equals_pandas,
    )

    transform_functions = [lambda df: df + 4, lambda df: -df - 10]
    for func in transform_functions:
        eval_transform(modin_groupby, pandas_groupby, func)

    pipe_functions = [lambda dfgb: dfgb.sum()]
    for func in pipe_functions:
        eval_pipe(modin_groupby, pandas_groupby, func)

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda df: df.corr(),
        modin_df_almost_equals_pandas,
    )
    eval_fillna(modin_groupby, pandas_groupby)
    eval_count(modin_groupby, pandas_groupby)
    eval_size(modin_groupby, pandas_groupby)
    eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))

    # https://github.com/pandas-dev/pandas/issues/54858
    # eval_groups(modin_groupby, pandas_groupby)


@pytest.mark.parametrize(
    "by", [np.random.randint(0, 100, size=2**8), lambda x: x % 3, None]
)
@pytest.mark.parametrize("as_index_series_or_dataframe", [0, 1, 2])
def test_series_groupby(by, as_index_series_or_dataframe):
    if as_index_series_or_dataframe <= 1:
        as_index = as_index_series_or_dataframe == 1
        series_data = np.random.randint(97, 198, size=2**8)
        modin_series = pd.Series(series_data)
        pandas_series = pandas.Series(series_data)
    else:
        as_index = True
        pandas_series = pandas.DataFrame(
            {
                "col1": [0, 1, 2, 3],
                "col2": [4, 5, 6, 7],
                "col3": [3, 8, 12, 10],
                "col4": [17, 13, 16, 15],
                "col5": [-4, -5, -6, -7],
            }
        )
        modin_series = from_pandas(pandas_series)
        if isinstance(by, np.ndarray) or by is None:
            by = np.random.randint(0, 100, size=len(pandas_series.index))

    n = 1

    try:
        pandas_groupby = pandas_series.groupby(by, as_index=as_index)
        if as_index_series_or_dataframe == 2:
            pandas_groupby = pandas_groupby["col1"]
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.groupby(by, as_index=as_index)
    else:
        modin_groupby = modin_series.groupby(by, as_index=as_index)
        if as_index_series_or_dataframe == 2:
            modin_groupby = modin_groupby["col1"]

        modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
        eval_ngroups(modin_groupby, pandas_groupby)
        eval_shift(modin_groupby, pandas_groupby)
        eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.sem(),
            modin_df_almost_equals_pandas,
        )
        eval_general(
            modin_groupby, pandas_groupby, lambda df: df.sample(random_state=1)
        )
        eval_general(modin_groupby, pandas_groupby, lambda df: df.ewm(com=0.5).std())
        eval_general(
            modin_groupby, pandas_groupby, lambda df: df.is_monotonic_decreasing
        )
        eval_general(
            modin_groupby, pandas_groupby, lambda df: df.is_monotonic_increasing
        )
        eval_general(modin_groupby, pandas_groupby, lambda df: df.nlargest())
        eval_general(modin_groupby, pandas_groupby, lambda df: df.nsmallest())
        eval_general(modin_groupby, pandas_groupby, lambda df: df.unique())
        eval_general(modin_groupby, pandas_groupby, lambda df: df.dtype)
        eval_mean(modin_groupby, pandas_groupby)
        eval_any(modin_groupby, pandas_groupby)
        eval_min(modin_groupby, pandas_groupby)
        eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())
        eval_ndim(modin_groupby, pandas_groupby)
        eval_cumsum(modin_groupby, pandas_groupby)
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.pct_change(),
            modin_df_almost_equals_pandas,
        )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.diff(periods=2),
        )
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda df: df.diff(periods=-1),
        )
        eval_cummax(modin_groupby, pandas_groupby)

        apply_functions = [lambda df: df.sum(), min]
        for func in apply_functions:
            eval_apply(modin_groupby, pandas_groupby, func)

        eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
        eval_cummin(modin_groupby, pandas_groupby)
        eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
        eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
        eval_prod(modin_groupby, pandas_groupby)
        if as_index:
            eval_std(modin_groupby, pandas_groupby)
            eval_var(modin_groupby, pandas_groupby)
            eval_skew(modin_groupby, pandas_groupby)

        agg_functions = [
            lambda df: df.sum(),
            "min",
            "max",
            max,
            sum,
            np.mean,
            ["min", "max"],
            [np.mean, np.std, np.var, np.max, np.min],
        ]
        for func in agg_functions:
            eval_agg(modin_groupby, pandas_groupby, func)

        eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
        eval_rank(modin_groupby, pandas_groupby)
        eval_max(modin_groupby, pandas_groupby)
        eval_len(modin_groupby, pandas_groupby)
        eval_sum(modin_groupby, pandas_groupby)
        eval_size(modin_groupby, pandas_groupby)
        eval_ngroup(modin_groupby, pandas_groupby)
        eval_nunique(modin_groupby, pandas_groupby)
        eval_value_counts(modin_groupby, pandas_groupby)
        eval_median(modin_groupby, pandas_groupby)
        eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))
        eval_cumprod(modin_groupby, pandas_groupby)

        transform_functions = [lambda df: df + 4, lambda df: -df - 10]
        for func in transform_functions:
            eval_transform(modin_groupby, pandas_groupby, func)

        pipe_functions = [lambda dfgb: dfgb.sum()]
        for func in pipe_functions:
            eval_pipe(modin_groupby, pandas_groupby, func)

        eval_fillna(modin_groupby, pandas_groupby)
        eval_count(modin_groupby, pandas_groupby)
        eval_general(modin_groupby, pandas_groupby, lambda df: df.tail(n))
        eval_quantile(modin_groupby, pandas_groupby)
        eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))
        eval_groups(modin_groupby, pandas_groupby)


def test_agg_udf_6600():
    data = {
        "name": ["Mariners", "Lakers"] * 50,
        "league_abbreviation": ["MLB", "NBA"] * 50,
    }
    modin_teams, pandas_teams = create_test_dfs(data)

    def my_first_item(s):
        return s.iloc[0]

    for agg in (my_first_item, [my_first_item], ["nunique", my_first_item]):
        eval_general(
            modin_teams,
            pandas_teams,
            operation=lambda df: df.groupby("league_abbreviation").name.agg(agg),
        )


def test_multi_column_groupby():
    pandas_df = pandas.DataFrame(
        {
            "col1": np.random.randint(0, 100, size=1000),
            "col2": np.random.randint(0, 100, size=1000),
            "col3": np.random.randint(0, 100, size=1000),
            "col4": np.random.randint(0, 100, size=1000),
            "col5": np.random.randint(0, 100, size=1000),
        },
        index=["row{}".format(i) for i in range(1000)],
    )

    modin_df = from_pandas(pandas_df)
    by = ["col1", "col2"]

    df_equals(modin_df.groupby(by).count(), pandas_df.groupby(by).count())

    with pytest.warns(UserWarning):
        for k, _ in modin_df.groupby(by):
            assert isinstance(k, tuple)

    by = ["row0", "row1"]
    with pytest.raises(KeyError):
        modin_df.groupby(by, axis=1).count()


def sort_if_experimental_groupby(*dfs):
    """
    This method should be applied before comparing results of ``groupby.transform`` as
    the experimental implementation changes the order of rows for that:
    https://github.com/modin-project/modin/issues/5924
    """
    result = dfs
    if RangePartitioning.get():
        dfs = try_cast_to_pandas(dfs)
        result = []
        for df in dfs:
            if df.ndim == 1:
                # Series case
                result.append(df.sort_index())
                continue

            # filtering out index names in order to avoid:
            # ValueError: 'col' is both an index level and a column label, which is ambiguous.
            cols_no_idx_names = df.columns.difference(
                df.index.names, sort=False
            ).tolist()
            df = df.sort_values(cols_no_idx_names)
            result.append(df)
    return result


def eval_ngroups(modin_groupby, pandas_groupby):
    assert modin_groupby.ngroups == pandas_groupby.ngroups


def eval_skew(modin_groupby, pandas_groupby, numeric_only=False):
    modin_df_almost_equals_pandas(
        modin_groupby.skew(numeric_only=numeric_only),
        pandas_groupby.skew(numeric_only=numeric_only),
    )


def eval_mean(modin_groupby, pandas_groupby, numeric_only=False):
    modin_df_almost_equals_pandas(
        modin_groupby.mean(numeric_only=numeric_only),
        pandas_groupby.mean(numeric_only=numeric_only),
    )


def eval_any(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.any(), pandas_groupby.any())


def eval_min(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.min(), pandas_groupby.min())


def eval_ndim(modin_groupby, pandas_groupby):
    assert modin_groupby.ndim == pandas_groupby.ndim


def eval_cumsum(modin_groupby, pandas_groupby, axis=lib.no_default, numeric_only=False):
    df_equals(
        *sort_if_experimental_groupby(
            modin_groupby.cumsum(axis=axis, numeric_only=numeric_only),
            pandas_groupby.cumsum(axis=axis, numeric_only=numeric_only),
        )
    )


def eval_cummax(modin_groupby, pandas_groupby, axis=lib.no_default, numeric_only=False):
    df_equals(
        *sort_if_experimental_groupby(
            modin_groupby.cummax(axis=axis, numeric_only=numeric_only),
            pandas_groupby.cummax(axis=axis, numeric_only=numeric_only),
        )
    )


def eval_cummin(modin_groupby, pandas_groupby, axis=lib.no_default, numeric_only=False):
    df_equals(
        *sort_if_experimental_groupby(
            modin_groupby.cummin(axis=axis, numeric_only=numeric_only),
            pandas_groupby.cummin(axis=axis, numeric_only=numeric_only),
        )
    )


def eval_apply(modin_groupby, pandas_groupby, func):
    df_equals(modin_groupby.apply(func), pandas_groupby.apply(func))


def eval_dtypes(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.dtypes, pandas_groupby.dtypes)


def eval_prod(modin_groupby, pandas_groupby, numeric_only=False):
    df_equals(
        modin_groupby.prod(numeric_only=numeric_only),
        pandas_groupby.prod(numeric_only=numeric_only),
    )


def eval_std(modin_groupby, pandas_groupby, numeric_only=False):
    modin_df_almost_equals_pandas(
        modin_groupby.std(numeric_only=numeric_only),
        pandas_groupby.std(numeric_only=numeric_only),
    )


def eval_agg(modin_groupby, pandas_groupby, func):
    df_equals(modin_groupby.agg(func), pandas_groupby.agg(func))


def eval_rank(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.rank(), pandas_groupby.rank())


def eval_max(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.max(), pandas_groupby.max())


def eval_var(modin_groupby, pandas_groupby, numeric_only=False):
    modin_df_almost_equals_pandas(
        modin_groupby.var(numeric_only=numeric_only),
        pandas_groupby.var(numeric_only=numeric_only),
    )


def eval_len(modin_groupby, pandas_groupby):
    assert len(modin_groupby) == len(pandas_groupby)


def eval_sum(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.sum(), pandas_groupby.sum())


def eval_ngroup(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.ngroup(), pandas_groupby.ngroup())


def eval_nunique(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.nunique(), pandas_groupby.nunique())


def eval_value_counts(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.value_counts(), pandas_groupby.value_counts())


def eval_median(modin_groupby, pandas_groupby, numeric_only=False):
    modin_df_almost_equals_pandas(
        modin_groupby.median(numeric_only=numeric_only),
        pandas_groupby.median(numeric_only=numeric_only),
    )


def eval_cumprod(
    modin_groupby, pandas_groupby, axis=lib.no_default, numeric_only=False
):
    df_equals(
        *sort_if_experimental_groupby(
            modin_groupby.cumprod(numeric_only=numeric_only),
            pandas_groupby.cumprod(numeric_only=numeric_only),
        )
    )
    df_equals(
        *sort_if_experimental_groupby(
            modin_groupby.cumprod(axis=axis, numeric_only=numeric_only),
            pandas_groupby.cumprod(axis=axis, numeric_only=numeric_only),
        )
    )


def eval_transform(modin_groupby, pandas_groupby, func):
    df_equals(
        *sort_if_experimental_groupby(
            modin_groupby.transform(func), pandas_groupby.transform(func)
        )
    )


def eval_fillna(modin_groupby, pandas_groupby):
    df_equals(
        *sort_if_experimental_groupby(
            modin_groupby.fillna(method="ffill"), pandas_groupby.fillna(method="ffill")
        )
    )


def eval_count(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.count(), pandas_groupby.count())


def eval_size(modin_groupby, pandas_groupby):
    df_equals(modin_groupby.size(), pandas_groupby.size())


def eval_pipe(modin_groupby, pandas_groupby, func):
    df_equals(modin_groupby.pipe(func), pandas_groupby.pipe(func))


def eval_quantile(modin_groupby, pandas_groupby):
    try:
        pandas_result = pandas_groupby.quantile(q=0.4, numeric_only=True)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_groupby.quantile(q=0.4, numeric_only=True)
    else:
        df_equals(modin_groupby.quantile(q=0.4, numeric_only=True), pandas_result)


def eval___getattr__(modin_groupby, pandas_groupby, item):
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda grp: grp[item].count(),
        comparator=build_types_asserter(df_equals),
    )
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda grp: getattr(grp, item).count(),
        comparator=build_types_asserter(df_equals),
    )


def eval___getitem__(md_grp, pd_grp, item, expected_exception=None):
    eval_general(
        md_grp,
        pd_grp,
        lambda grp: grp[item].mean(),
        comparator=build_types_asserter(df_equals),
        expected_exception=expected_exception,
    )
    eval_general(
        md_grp,
        pd_grp,
        lambda grp: grp[item].count(),
        comparator=build_types_asserter(df_equals),
        expected_exception=expected_exception,
    )

    def build_list_agg(fns):
        def test(grp):
            res = grp[item].agg(fns)
            if res.ndim == 2:
                # `as_index=False` case
                new_axis = fns
                if "index" in res.columns:
                    new_axis = ["index"] + new_axis
                # Modin's frame has an extra level in the result. Alligning columns to compare.
                # https://github.com/modin-project/modin/issues/3490
                res = res.set_axis(new_axis, axis=1)
            return res

        return test

    eval_general(
        md_grp,
        pd_grp,
        build_list_agg(["mean"]),
        comparator=build_types_asserter(df_equals),
        expected_exception=expected_exception,
    )
    eval_general(
        md_grp,
        pd_grp,
        build_list_agg(["mean", "count"]),
        comparator=build_types_asserter(df_equals),
        expected_exception=expected_exception,
    )

    # Explicit default-to-pandas test
    eval_general(
        md_grp,
        pd_grp,
        # Defaulting to pandas only for Modin groupby objects
        lambda grp: (
            grp[item].sum()
            if not isinstance(grp, pd.groupby.DataFrameGroupBy)
            else grp[item]._default_to_pandas(lambda df: df.sum())
        ),
        comparator=build_types_asserter(df_equals),
        expected_exception=expected_exception,
    )


def eval_groups(modin_groupby, pandas_groupby):
    for k, v in modin_groupby.groups.items():
        assert v.equals(pandas_groupby.groups[k])
    if RangePartitioning.get():
        # `.get_group()` doesn't work correctly with experimental groupby:
        # https://github.com/modin-project/modin/issues/6093
        return
    for name in pandas_groupby.groups:
        df_equals(modin_groupby.get_group(name), pandas_groupby.get_group(name))


def eval_shift(modin_groupby, pandas_groupby, comparator=None):
    if comparator is None:

        def comparator(df1, df2):
            df_equals(*sort_if_experimental_groupby(df1, df2))

    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda groupby: groupby.shift(),
        comparator=comparator,
    )
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda groupby: groupby.shift(periods=0),
        comparator=comparator,
    )
    eval_general(
        modin_groupby,
        pandas_groupby,
        lambda groupby: groupby.shift(periods=-3),
        comparator=comparator,
    )

    # Disabled for `BaseOnPython` because of the issue with `getitem_array`.
    # groupby.shift internally masks the source frame with a Series boolean mask,
    # doing so ends up in the `getitem_array` method, that is broken for `BaseOnPython`:
    # https://github.com/modin-project/modin/issues/3701
    if get_current_execution() != "BaseOnPython" and not current_execution_is_native():
        if isinstance(pandas_groupby, pandas.core.groupby.DataFrameGroupBy):
            pandas_res = pandas_groupby.shift(axis=1, fill_value=777)
            modin_res = modin_groupby.shift(axis=1, fill_value=777)
            # Pandas produces unexpected index order (pandas GH 44269).
            # Here we align index of Modin result with pandas to make test passed.
            import pandas.core.algorithms as algorithms

            indexer, _ = modin_res.index.get_indexer_non_unique(modin_res.index._values)
            indexer = algorithms.unique1d(indexer)
            modin_res = modin_res.take(indexer)

            comparator(modin_res, pandas_res)
        else:
            eval_general(
                modin_groupby,
                pandas_groupby,
                lambda groupby: groupby.shift(fill_value=777),
                comparator=comparator,
            )


def test_groupby_on_index_values_with_loop():
    length = 2**6
    data = {
        "a": np.random.randint(0, 100, size=length),
        "b": np.random.randint(0, 100, size=length),
        "c": np.random.randint(0, 100, size=length),
    }
    idx = ["g1" if i % 3 != 0 else "g2" for i in range(length)]
    modin_df = pd.DataFrame(data, index=idx, columns=list("aba"))
    pandas_df = pandas.DataFrame(data, index=idx, columns=list("aba"))
    modin_groupby_obj = modin_df.groupby(modin_df.index)
    pandas_groupby_obj = pandas_df.groupby(pandas_df.index)

    modin_dict = {k: v for k, v in modin_groupby_obj}
    pandas_dict = {k: v for k, v in pandas_groupby_obj}

    for k in modin_dict:
        df_equals(modin_dict[k], pandas_dict[k])

    modin_groupby_obj = modin_df.groupby(modin_df.columns, axis=1)
    pandas_groupby_obj = pandas_df.groupby(pandas_df.columns, axis=1)

    modin_dict = {k: v for k, v in modin_groupby_obj}
    pandas_dict = {k: v for k, v in pandas_groupby_obj}

    for k in modin_dict:
        df_equals(modin_dict[k], pandas_dict[k])


def test_groupby_getitem_preserves_key_order_issue_6154():
    a = np.tile(["a", "b", "c", "d", "e"], (1, 10))
    np.random.shuffle(a[0])
    df = pd.DataFrame(
        np.hstack((a.T, np.arange(100).reshape((50, 2)))),
        columns=["col 1", "col 2", "col 3"],
    )
    eval_general(
        df, df._to_pandas(), lambda df: df.groupby("col 1")[["col 3", "col 2"]].count()
    )


@pytest.mark.parametrize(
    "groupby_kwargs",
    [
        pytest.param({"level": 1, "axis": 1}, id="level_idx_axis=1"),
        pytest.param({"level": 1}, id="level_idx"),
        pytest.param({"level": [1, "four"]}, id="level_idx+name"),
        pytest.param({"by": "four"}, id="level_name"),
        pytest.param({"by": ["one", "two"]}, id="level_name_multi_by"),
        pytest.param({"by": ["item0", "one", "two"]}, id="col_name+level_name"),
    ],
)
def test_groupby_multiindex(groupby_kwargs):
    frame_data = np.random.randint(0, 100, size=(2**6, 2**6))
    modin_df = pd.DataFrame(frame_data)
    pandas_df = pandas.DataFrame(frame_data)

    new_index = pandas.Index([f"item{i}" for i in range(len(pandas_df))])
    new_columns = pandas.MultiIndex.from_tuples(
        [(i // 4, i // 2, i) for i in modin_df.columns], names=["four", "two", "one"]
    )
    modin_df.columns = new_columns
    modin_df.index = new_index
    pandas_df.columns = new_columns
    pandas_df.index = new_index

    if groupby_kwargs.get("axis", 0) == 0:
        modin_df = modin_df.T
        pandas_df = pandas_df.T

    md_grp, pd_grp = (
        modin_df.groupby(**groupby_kwargs),
        pandas_df.groupby(**groupby_kwargs),
    )
    modin_groupby_equals_pandas(md_grp, pd_grp)
    df_equals(md_grp.sum(), pd_grp.sum())
    df_equals(md_grp.size(), pd_grp.size())
    # Grouping on level works incorrect in case of aggregation:
    # https://github.com/modin-project/modin/issues/2912
    # df_equals(md_grp.quantile(), pd_grp.quantile())
    df_equals(md_grp.first(), pd_grp.first())


@pytest.mark.parametrize("dropna", [True, False])
@pytest.mark.parametrize(
    "groupby_kwargs",
    [
        pytest.param({"level": 1, "axis": 1}, id="level_idx_axis=1"),
        pytest.param({"level": 1}, id="level_idx"),
        pytest.param({"level": [1, "four"]}, id="level_idx+name"),
        pytest.param({"by": "four"}, id="level_name"),
        pytest.param({"by": ["one", "two"]}, id="level_name_multi_by"),
        pytest.param(
            {"by": ["item0", "one", "two"]},
            id="col_name+level_name",
        ),
        pytest.param(
            {"by": ["item0"]},
            id="col_name",
        ),
        pytest.param(
            {"by": ["item0", "item1"]},
            id="col_name_multi_by",
        ),
    ],
)
def test_groupby_with_kwarg_dropna(groupby_kwargs, dropna):
    modin_df = pd.DataFrame(test_data["float_nan_data"])
    pandas_df = pandas.DataFrame(test_data["float_nan_data"])

    new_index = pandas.Index([f"item{i}" for i in range(len(pandas_df))])
    new_columns = pandas.MultiIndex.from_tuples(
        [(i // 4, i // 2, i) for i in range(len(modin_df.columns))],
        names=["four", "two", "one"],
    )
    modin_df.columns = new_columns
    modin_df.index = new_index
    pandas_df.columns = new_columns
    pandas_df.index = new_index

    if groupby_kwargs.get("axis", 0) == 0:
        modin_df = modin_df.T
        pandas_df = pandas_df.T

    md_grp, pd_grp = (
        modin_df.groupby(**groupby_kwargs, dropna=dropna),
        pandas_df.groupby(**groupby_kwargs, dropna=dropna),
    )
    modin_groupby_equals_pandas(md_grp, pd_grp)

    by_kwarg = groupby_kwargs.get("by", [])
    # Disabled because of broken `dropna=False` for TreeReduce implemented aggs:
    # https://github.com/modin-project/modin/issues/3817
    if not (
        not dropna
        and len(by_kwarg) > 1
        and any(col in modin_df.columns for col in by_kwarg)
    ):
        df_equals(md_grp.sum(), pd_grp.sum())
        df_equals(md_grp.size(), pd_grp.size())
    # Grouping on level works incorrect in case of aggregation:
    # https://github.com/modin-project/modin/issues/2912
    # "BaseOnPython" tests are disabled because of the bug:
    # https://github.com/modin-project/modin/issues/3827
    if (
        get_current_execution() != "BaseOnPython"
        and not current_execution_is_native()
        and any(col in modin_df.columns for col in by_kwarg)
    ):
        df_equals(md_grp.quantile(), pd_grp.quantile())
    # Default-to-pandas tests are disabled for multi-column 'by' because of the bug:
    # https://github.com/modin-project/modin/issues/3827
    if not (not dropna and len(by_kwarg) > 1):
        df_equals(md_grp.first(), pd_grp.first())
        df_equals(md_grp._default_to_pandas(lambda df: df.sum()), pd_grp.sum())


@pytest.mark.parametrize("groupby_axis", [lib.no_default, 1])
@pytest.mark.parametrize("shift_axis", [lib.no_default, 1])
@pytest.mark.parametrize("groupby_sort", [True, False])
def test_shift_freq(groupby_axis, shift_axis, groupby_sort):
    pandas_df = pandas.DataFrame(
        {
            "col1": [1, 0, 2, 3],
            "col2": [4, 5, np.nan, 7],
            "col3": [np.nan, np.nan, 12, 10],
            "col4": [17, 13, 16, 15],
        }
    )
    modin_df = from_pandas(pandas_df)

    new_index = pandas.date_range("1/12/2020", periods=4, freq="s")
    if groupby_axis == 0 and shift_axis == 0:
        pandas_df.index = modin_df.index = new_index
        by = [["col2", "col3"], ["col2"], ["col4"], [0, 1, 0, 2]]
    else:
        pandas_df.index = modin_df.index = new_index
        pandas_df.columns = modin_df.columns = new_index
        by = [[0, 1, 0, 2]]

    for _by in by:
        pandas_groupby = pandas_df.groupby(by=_by, axis=groupby_axis, sort=groupby_sort)
        modin_groupby = modin_df.groupby(by=_by, axis=groupby_axis, sort=groupby_sort)
        eval_general(
            modin_groupby,
            pandas_groupby,
            lambda groupby: groupby.shift(axis=shift_axis, freq="s"),
        )


@pytest.mark.parametrize(
    "by_and_agg_dict",
    [
        {
            "by": [
                list(test_data["int_data"].keys())[0],
                list(test_data["int_data"].keys())[1],
            ],
            "agg_dict": {
                "max": (list(test_data["int_data"].keys())[2], np.max),
                "min": (list(test_data["int_data"].keys())[2], np.min),
            },
        },
        {
            "by": ["col1"],
            "agg_dict": {
                "max": (list(test_data["int_data"].keys())[0], np.max),
                "min": (list(test_data["int_data"].keys())[-1], np.min),
            },
        },
        {
            "by": [
                list(test_data["int_data"].keys())[0],
                list(test_data["int_data"].keys())[-1],
            ],
            "agg_dict": {
                "max": (list(test_data["int_data"].keys())[1], max),
                "min": (list(test_data["int_data"].keys())[-2], min),
            },
        },
        pytest.param(
            {
                "by": [
                    list(test_data["int_data"].keys())[0],
                    list(test_data["int_data"].keys())[-1],
                ],
                "agg_dict": {
                    "max": (list(test_data["int_data"].keys())[1], max),
                    "min": (list(test_data["int_data"].keys())[-1], min),
                },
            },
            marks=pytest.mark.skip("See Modin issue #3602"),
        ),
    ],
)
@pytest.mark.parametrize("as_index", [True, False])
def test_agg_func_None_rename(by_and_agg_dict, as_index):
    modin_df, pandas_df = create_test_dfs(test_data["int_data"])

    modin_result = modin_df.groupby(by_and_agg_dict["by"], as_index=as_index).agg(
        **by_and_agg_dict["agg_dict"]
    )
    pandas_result = pandas_df.groupby(by_and_agg_dict["by"], as_index=as_index).agg(
        **by_and_agg_dict["agg_dict"]
    )
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "as_index",
    [
        True,
        pytest.param(
            False,
            marks=pytest.mark.skipif(
                get_current_execution() == "BaseOnPython"
                or RangePartitioning.get()
                or current_execution_is_native(),
                reason="See Pandas issue #39103",
            ),
        ),
    ],
)
@pytest.mark.parametrize("by_length", [1, 3])
@pytest.mark.parametrize(
    "agg_fns",
    [["sum", "min", "max"], ["mean", "quantile"]],
    ids=["reduce", "aggregation"],
)
@pytest.mark.parametrize(
    "intersection_with_by_cols",
    [pytest.param(True, marks=pytest.mark.skip("See Modin issue #3602")), False],
)
def test_dict_agg_rename_mi_columns(
    as_index, by_length, agg_fns, intersection_with_by_cols
):
    md_df, pd_df = create_test_dfs(test_data["int_data"])
    mi_columns = generate_multiindex(len(md_df.columns), nlevels=4)

    md_df.columns, pd_df.columns = mi_columns, mi_columns

    by = list(md_df.columns[:by_length])
    agg_cols = (
        list(md_df.columns[by_length - 1 : by_length + 2])
        if intersection_with_by_cols
        else list(md_df.columns[by_length : by_length + 3])
    )

    agg_dict = {
        f"custom-{i}" + str(agg_fns[i % len(agg_fns)]): (col, agg_fns[i % len(agg_fns)])
        for i, col in enumerate(agg_cols)
    }

    md_res = md_df.groupby(by, as_index=as_index).agg(**agg_dict)
    pd_res = pd_df.groupby(by, as_index=as_index).agg(**agg_dict)

    df_equals(md_res, pd_res)


def test_agg_4604():
    data = {"col1": [1, 2], "col2": [3, 4]}
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
    # add another partition
    modin_df["col3"] = modin_df["col1"]
    pandas_df["col3"] = pandas_df["col1"]

    # problem only with custom aggregation function
    def col3(x):
        return np.max(x)

    by = ["col1"]
    agg_func = {"col2": ["sum", "min"], "col3": col3}

    modin_groupby, pandas_groupby = modin_df.groupby(by), pandas_df.groupby(by)
    eval_agg(modin_groupby, pandas_groupby, agg_func)


@pytest.mark.parametrize(
    "operation",
    [
        "quantile",
        "mean",
        "sum",
        "median",
        "cumprod",
    ],
)
def test_agg_exceptions(operation):
    N = 256
    fill_data = [
        (
            "nan_column",
            [
                np.datetime64("2010"),
                None,
                np.datetime64("2007"),
                np.datetime64("2010"),
                np.datetime64("2006"),
                np.datetime64("2012"),
                None,
                np.datetime64("2011"),
            ]
            * (N // 8),
        ),
        (
            "date_column",
            [
                np.datetime64("2010"),
                np.datetime64("2011"),
                np.datetime64("2011-06-15T00:00"),
                np.datetime64("2009-01-01"),
            ]
            * (N // 4),
        ),
    ]

    data1 = {
        "column_to_by": ["foo", "bar", "baz", "bar"] * (N // 4),
        # Earlier, the type of this column was `object`. In such a situation,
        # when performing aggregation on different column partitions, different
        # exceptions were thrown. The exception that engines return to the main
        # process was non-deterministic, either `TypeError` or `NotImplementedError`.
        "nan_column": [np.nan] * N,
    }

    data2 = {
        f"{key}{i}": value
        for key, value in fill_data
        for i in range(N // len(fill_data))
    }

    data = {**data1, **data2}

    def comparator(df1, df2):
        from modin.core.dataframe.algebra.default2pandas.groupby import GroupBy

        if GroupBy.is_transformation_kernel(operation):
            df1, df2 = sort_if_experimental_groupby(df1, df2)

        df_equals(df1, df2)

    expected_exception = None
    if operation == "sum":
        expected_exception = TypeError(
            "datetime64 type does not support sum operations"
        )
    elif operation == "cumprod":
        expected_exception = TypeError(
            "datetime64 type does not support cumprod operations"
        )
    eval_aggregation(
        *create_test_dfs(data),
        operation=operation,
        comparator=comparator,
        expected_exception=expected_exception,
    )


@pytest.mark.skip(
    "Pandas raises a ValueError on empty dictionary aggregation since 1.2.0"
    + "It's unclear is that was made on purpose or it is a bug. That question"
    + "was asked in https://github.com/pandas-dev/pandas/issues/39609."
    + "So until the answer this test is disabled."
)
@pytest.mark.parametrize(
    "kwargs",
    [
        {
            "Max": ("cnt", np.max),
            "Sum": ("cnt", np.sum),
            "Num": ("c", pd.Series.nunique),
            "Num1": ("c", pandas.Series.nunique),
        },
        {
            "func": {
                "Max": ("cnt", np.max),
                "Sum": ("cnt", np.sum),
                "Num": ("c", pd.Series.nunique),
                "Num1": ("c", pandas.Series.nunique),
            }
        },
    ],
)
def test_to_pandas_convertion(kwargs):
    data = {"a": [1, 2], "b": [3, 4], "c": [5, 6]}
    by = ["a", "b"]

    eval_aggregation(*create_test_dfs(data), by=by, **kwargs)


@pytest.mark.parametrize(
    # When True, do df[name], otherwise just use name
    "columns",
    [
        [(False, "a"), (False, "b"), (False, "c")],
        [(False, "a"), (False, "b")],
        [(True, "b"), (True, "a"), (True, "c")],
        [(True, "a"), (True, "b")],
        [(True, "c"), (False, "a"), (False, "b")],
        [(False, "a"), (True, "c")],
    ],
)
@pytest.mark.parametrize("drop_from_original_df", [True, False])
@pytest.mark.parametrize("as_index", [True, False])
def test_mixed_columns(columns, drop_from_original_df, as_index):
    data = {
        "a": [1, 1, 2, 2] * 64,
        "b": [11, 11, 22, 22] * 64,
        "c": [111, 111, 222, 222] * 64,
        "data": [1, 2, 3, 4] * 64,
    }

    md_df, pd_df = create_test_dfs(data)
    md_df, md_by = get_external_groupers(md_df, columns, drop_from_original_df)
    pd_df, pd_by = get_external_groupers(pd_df, columns, drop_from_original_df)

    md_grp = md_df.groupby(md_by, as_index=as_index)
    pd_grp = pd_df.groupby(pd_by, as_index=as_index)

    df_equals(md_grp.size(), pd_grp.size())
    df_equals(md_grp.sum(), pd_grp.sum())
    df_equals(
        md_grp.apply(lambda df: df.sum(), include_groups=False),
        pd_grp.apply(lambda df: df.sum(), include_groups=False),
    )


@pytest.mark.parametrize("as_index", [True, False])
def test_groupby_external_grouper_duplicated_names(as_index):
    data = {
        "a": [1, 1, 2, 2] * 64,
        "b": [11, 11, 22, 22] * 64,
        "c": [111, 111, 222, 222] * 64,
        "data": [1, 2, 3, 4] * 64,
    }

    md_df, pd_df = create_test_dfs(data)

    md_unnamed_series1, pd_unnamed_series1 = create_test_series([1, 1, 2, 2] * 64)
    md_unnamed_series2, pd_unnamed_series2 = create_test_series([10, 10, 20, 20] * 64)

    md_grp = md_df.groupby([md_unnamed_series1, md_unnamed_series2], as_index=as_index)
    pd_grp = pd_df.groupby([pd_unnamed_series1, pd_unnamed_series2], as_index=as_index)

    df_equals(md_grp.sum(), pd_grp.sum())

    md_same_named_series1, pd_same_named_series1 = create_test_series(
        [1, 1, 2, 2] * 64, name="series_name"
    )
    md_same_named_series2, pd_same_named_series2 = create_test_series(
        [10, 10, 20, 20] * 64, name="series_name"
    )

    md_grp = md_df.groupby(
        [md_same_named_series1, md_same_named_series2], as_index=as_index
    )
    pd_grp = pd_df.groupby(
        [pd_same_named_series1, pd_same_named_series2], as_index=as_index
    )

    df_equals(md_grp.sum(), pd_grp.sum())


@pytest.mark.parametrize(
    # When True, use (df[name] + 1), otherwise just use name
    "columns",
    [
        [(True, "a"), (True, "b"), (True, "c")],
        [(True, "a"), (True, "b")],
        [(False, "a"), (False, "b"), (True, "c")],
        [(False, "a"), (True, "c")],
        [(False, "a"), (True, "c"), (False, [1, 1, 2])],
        [(False, "a"), (False, "b"), (False, "c")],
        [(False, "a"), (False, "b"), (False, "c"), (False, [1, 1, 2])],
    ],
)
def test_internal_by_detection(columns):
    data = {"a": [1, 1, 2], "b": [11, 11, 22], "c": [111, 111, 222]}

    md_df = pd.DataFrame(data)
    _, by = get_external_groupers(md_df, columns, add_plus_one=True)
    md_grp = md_df.groupby(by)

    ref = frozenset(
        col for is_lookup, col in columns if not is_lookup and hashable(col)
    )
    exp = frozenset(md_grp._internal_by)

    assert ref == exp


@pytest.mark.parametrize(
    # When True, use (df[name] + 1), otherwise just use name
    "columns",
    [
        [(True, "a"), (True, "b"), (True, "c")],
        [(True, "a"), (True, "b")],
        [(False, "a"), (False, "b"), (True, "c")],
        [(False, "a"), (True, "c")],
        [(False, "a"), (True, "c"), (False, [1, 1, 2])],
    ],
)
@pytest.mark.parametrize("as_index", [True, False])
def test_mixed_columns_not_from_df(columns, as_index):
    """
    Unlike the previous test, in this case the Series is not just a column from
    the original DataFrame, so you can't use a fasttrack.
    """
    data = {"a": [1, 1, 2], "b": [11, 11, 22], "c": [111, 111, 222]}
    groupby_kw = {"as_index": as_index}

    md_df, pd_df = create_test_dfs(data)
    (_, by_md), (_, by_pd) = map(
        lambda df: get_external_groupers(df, columns, add_plus_one=True), [md_df, pd_df]
    )

    pd_grp = pd_df.groupby(by_pd, **groupby_kw)
    md_grp = md_df.groupby(by_md, **groupby_kw)

    modin_groupby_equals_pandas(md_grp, pd_grp)
    eval_general(md_grp, pd_grp, lambda grp: grp.size())
    eval_general(
        md_grp, pd_grp, lambda grp: grp.apply(lambda df: df.sum(), include_groups=False)
    )
    eval_general(md_grp, pd_grp, lambda grp: grp.first())


@pytest.mark.parametrize(
    # When True, do df[obj], otherwise just use the obj
    "columns",
    [
        [(False, "a")],
        [(False, "a"), (False, "b"), (False, "c")],
        [(False, "a"), (False, "b")],
        [(False, "b"), (False, "a")],
        [(True, "a"), (True, "b"), (True, "c")],
        [(True, "a"), (True, "b")],
        [(False, "a"), (False, "b"), (True, "c")],
        [(False, "a"), (True, "c")],
        [(False, "a"), (False, pd.Series([5, 6, 7, 8]))],
    ],
)
def test_unknown_groupby(columns):
    data = {"b": [11, 11, 22, 200], "c": [111, 111, 222, 7000]}
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    with pytest.raises(KeyError):
        pandas_df.groupby(by=get_external_groupers(pandas_df, columns)[1])
    with pytest.raises(KeyError):
        modin_df.groupby(by=get_external_groupers(modin_df, columns)[1])


@pytest.mark.parametrize(
    "func_to_apply",
    [
        lambda df: df.sum(),
        lambda df: df.size(),
        lambda df: df.quantile(),
        lambda df: df.dtypes,
        lambda df: df.apply(lambda df: df.sum()),
        pytest.param(
            lambda df: df.apply(lambda df: pandas.Series([1, 2, 3, 4])),
            marks=pytest.mark.skip("See modin issue #2511"),
        ),
        lambda grp: grp.agg(
            {
                list(test_data_values[0].keys())[1]: (max, min, sum),
                list(test_data_values[0].keys())[-2]: (sum, min, max),
            }
        ),
        lambda grp: grp.agg(
            {
                list(test_data_values[0].keys())[1]: [
                    ("new_sum", "sum"),
                    ("new_min", "min"),
                ],
                list(test_data_values[0].keys())[-2]: np.sum,
            }
        ),
        pytest.param(
            lambda grp: grp.agg(
                {
                    list(test_data_values[0].keys())[1]: [
                        ("new_sum", "sum"),
                        ("new_mean", "mean"),
                    ],
                    list(test_data_values[0].keys())[-2]: "skew",
                }
            ),
            id="renaming_aggs_at_different_partitions",
        ),
        pytest.param(
            lambda grp: grp.agg(
                {
                    list(test_data_values[0].keys())[1]: [
                        ("new_sum", "sum"),
                        ("new_mean", "mean"),
                    ],
                    list(test_data_values[0].keys())[2]: "skew",
                }
            ),
            id="renaming_aggs_at_same_partition",
        ),
        pytest.param(
            lambda grp: grp.agg(
                {
                    list(test_data_values[0].keys())[1]: "mean",
                    list(test_data_values[0].keys())[-2]: "skew",
                }
            ),
            id="custom_aggs_at_different_partitions",
        ),
        pytest.param(
            lambda grp: grp.agg(
                {
                    list(test_data_values[0].keys())[1]: "mean",
                    list(test_data_values[0].keys())[2]: "skew",
                }
            ),
            id="custom_aggs_at_same_partition",
        ),
        pytest.param(
            lambda grp: grp.agg(
                {
                    list(test_data_values[0].keys())[1]: "mean",
                    list(test_data_values[0].keys())[-2]: "sum",
                }
            ),
            id="native_and_custom_aggs_at_different_partitions",
        ),
        pytest.param(
            lambda grp: grp.agg(
                {
                    list(test_data_values[0].keys())[1]: "mean",
                    list(test_data_values[0].keys())[2]: "sum",
                }
            ),
            id="native_and_custom_aggs_at_same_partition",
        ),
        pytest.param(
            lambda grp: grp.agg(
                {
                    list(test_data_values[0].keys())[1]: (max, "mean", sum),
                    list(test_data_values[0].keys())[-1]: (sum, "skew", max),
                }
            ),
            id="Agg_and_by_intersection_TreeReduce_implementation",
        ),
        pytest.param(
            lambda grp: grp.agg(
                {
                    list(test_data_values[0].keys())[1]: (max, "mean", "nunique"),
                    list(test_data_values[0].keys())[-1]: (sum, min, max),
                }
            ),
            id="Agg_and_by_intersection_FullAxis_implementation",
        ),
        pytest.param(
            lambda grp: grp.agg({list(test_data_values[0].keys())[0]: "count"}),
            id="Agg_and_by_intersection_issue_3376",
        ),
    ],
)
@pytest.mark.parametrize("as_index", [True, False])
@pytest.mark.parametrize("by_length", [1, 2])
@pytest.mark.parametrize(
    "categorical_by",
    [pytest.param(True, marks=pytest.mark.skip("See modin issue #2513")), False],
)
def test_multi_column_groupby_different_partitions(
    func_to_apply, as_index, by_length, categorical_by, request
):
    if (
        not categorical_by
        and by_length == 1
        and "custom_aggs_at_same_partition" in request.node.name
        or "renaming_aggs_at_same_partition" in request.node.name
    ):
        pytest.xfail(
            "After upgrade to pandas 2.1 skew results are different: AssertionError: 1.0 >= 0.0001."
            + " See https://github.com/modin-project/modin/issues/6530 for details."
        )
    data = test_data_values[0]
    md_df, pd_df = create_test_dfs(data)

    by = [pd_df.columns[-i if i % 2 else i] for i in range(by_length)]

    if categorical_by:
        md_df = md_df.astype({by[0]: "category"})
        pd_df = pd_df.astype({by[0]: "category"})

    md_grp, pd_grp = (
        md_df.groupby(by, as_index=as_index),
        pd_df.groupby(by, as_index=as_index),
    )
    eval_general(
        md_grp,
        pd_grp,
        func_to_apply,
        # 'skew' and 'mean' results are not 100% equal to pandas as they use
        # different formulas and so precision errors come into play. Thus
        # using a custom comparator that allows slight numeric deviations.
        comparator=try_modin_df_almost_equals_compare,
    )
    # FIXME: https://github.com/modin-project/modin/issues/7034
    eval___getitem__(md_grp, pd_grp, md_df.columns[1], expected_exception=False)
    # FIXME: https://github.com/modin-project/modin/issues/7034
    eval___getitem__(
        md_grp, pd_grp, [md_df.columns[1], md_df.columns[2]], expected_exception=False
    )


def test_empty_partitions_after_groupby():
    def func_to_apply(grp):
        return grp.agg(
            {
                list(test_data_values[0].keys())[1]: "sum",
                list(test_data_values[0].keys())[-1]: "sum",
            }
        )

    data = test_data_values[0]
    md_df, pd_df = create_test_dfs(data)
    by = pd_df.columns[0]

    with context(DynamicPartitioning=True):
        md_grp, pd_grp = (
            md_df.groupby(by),
            pd_df.groupby(by),
        )
        eval_general(
            md_grp,
            pd_grp,
            func_to_apply,
        )


@pytest.mark.parametrize(
    "by",
    [
        0,
        1.5,
        "str",
        pandas.Timestamp("2020-02-02"),
        [0, "str"],
        [pandas.Timestamp("2020-02-02"), 1.5],
    ],
)
@pytest.mark.parametrize("as_index", [True, False])
def test_not_str_by(by, as_index):
    columns = pandas.Index([0, 1.5, "str", pandas.Timestamp("2020-02-02")])
    data = {col: np.arange(5) for col in columns}
    md_df, pd_df = create_test_dfs(data)

    md_grp, pd_grp = (
        md_df.groupby(by, as_index=as_index),
        pd_df.groupby(by, as_index=as_index),
    )

    modin_groupby_equals_pandas(md_grp, pd_grp)
    eval_general(md_grp, pd_grp, lambda grp: grp.sum())
    eval_general(md_grp, pd_grp, lambda grp: grp.size())
    eval_general(md_grp, pd_grp, lambda grp: grp.agg(lambda df: df.mean()))
    eval_general(md_grp, pd_grp, lambda grp: grp.dtypes)
    eval_general(md_grp, pd_grp, lambda grp: grp.first())


@pytest.mark.parametrize("internal_by_length", [0, 1, 2])
@pytest.mark.parametrize("external_by_length", [0, 1, 2])
@pytest.mark.parametrize("has_categorical_by", [True, False])
@pytest.mark.parametrize(
    "agg_func",
    [
        pytest.param(
            lambda grp: grp.apply(lambda df: df.dtypes), id="modin_dtypes_impl"
        ),
        pytest.param(
            lambda grp: grp.apply(lambda df: df.sum(numeric_only=True)), id="apply_sum"
        ),
        pytest.param(lambda grp: grp.count(), id="count"),
        pytest.param(lambda grp: grp.nunique(), id="nunique"),
        # Integer key means the index of the column to replace it with.
        # 0 and -1 are considered to be the indices of the columns to group on.
        pytest.param({1: "sum", 2: "nunique"}, id="dict_agg_no_intersection_with_by"),
        pytest.param(
            {0: "mean", 1: "sum", 2: "nunique"},
            id="dict_agg_has_intersection_with_by",
        ),
        pytest.param(
            {1: "sum", 2: "nunique", -1: "nunique"},
            id="dict_agg_has_intersection_with_categorical_by",
        ),
    ],
)
# There are two versions of the `handle_as_index` method: the one accepting pandas.DataFrame from
# the execution kernel and backend agnostic. This parameter indicates which one implementation to use.
@pytest.mark.parametrize("use_backend_agnostic_method", [True, False])
def test_handle_as_index(
    internal_by_length,
    external_by_length,
    has_categorical_by,
    agg_func,
    use_backend_agnostic_method,
    request,
):
    """
    Test ``modin.core.dataframe.algebra.default2pandas.groupby.GroupBy.handle_as_index``.

    The role of the ``handle_as_index`` method is to build a groupby result considering
    ``as_index=False`` from the result that was computed with ``as_index=True``.

    So the testing flow is the following:
        1. Compute GroupBy result with the ``as_index=True`` parameter via Modin.
        2. Build ``as_index=False`` result from the ``as_index=True`` using ``handle_as_index`` method.
        3. Compute GroupBy result with the ``as_index=False`` parameter via pandas as the reference result.
        4. Compare the result from the second step with the reference.
    """
    by_length = internal_by_length + external_by_length
    if by_length == 0:
        pytest.skip("No keys to group on were passed, skipping the test.")

    if (
        has_categorical_by
        and by_length > 1
        and (
            isinstance(agg_func, dict)
            or ("nunique" in request.node.callspec.id.split("-"))
        )
    ):
        pytest.skip(
            "The linked bug makes pandas raise an exception when 'by' is categorical: "
            + "https://github.com/pandas-dev/pandas/issues/36698"
        )

    df = pandas.DataFrame(test_groupby_data)
    external_by_cols = GroupBy.validate_by(df.add_prefix("external_"))

    if has_categorical_by:
        df = df.astype({df.columns[-1]: "category"})

    if isinstance(agg_func, dict):
        agg_func = {df.columns[key]: value for key, value in agg_func.items()}
        selection = list(agg_func.keys())
        agg_dict = agg_func
        agg_func = lambda grp: grp.agg(agg_dict)  # noqa: E731 (lambda assignment)
    else:
        selection = None

    # Selecting 'by' columns from both sides of the frame so they located in different partitions
    internal_by = df.columns[
        range(-internal_by_length // 2, internal_by_length // 2)
    ].tolist()
    external_by = external_by_cols[:external_by_length]

    pd_by = internal_by + external_by
    md_by = internal_by + [pd.Series(ser) for ser in external_by]

    grp_result = pd.DataFrame(df).groupby(md_by, as_index=True)
    grp_reference = df.groupby(pd_by, as_index=False)

    agg_result = agg_func(grp_result)
    agg_reference = agg_func(grp_reference)

    if use_backend_agnostic_method:
        reset_index, drop, lvls_to_drop, cols_to_drop = GroupBy.handle_as_index(
            result_cols=agg_result.columns,
            result_index_names=agg_result.index.names,
            internal_by_cols=internal_by,
            by_cols_dtypes=df[internal_by].dtypes.values,
            by_length=len(md_by),
            selection=selection,
            drop=len(internal_by) != 0,
        )

        if len(lvls_to_drop) > 0:
            agg_result.index = agg_result.index.droplevel(lvls_to_drop)
        if len(cols_to_drop) > 0:
            agg_result = agg_result.drop(columns=cols_to_drop)
        if reset_index:
            agg_result = agg_result.reset_index(drop=drop)
    else:
        GroupBy.handle_as_index_for_dataframe(
            result=agg_result,
            internal_by_cols=internal_by,
            by_cols_dtypes=df[internal_by].dtypes.values,
            by_length=len(md_by),
            selection=selection,
            drop=len(internal_by) != 0,
            inplace=True,
        )

    df_equals(agg_result, agg_reference)


def test_validate_by():
    """Test ``modin.core.dataframe.algebra.default2pandas.groupby.GroupBy.validate_by``."""

    def compare(obj1, obj2):
        assert type(obj1) is type(
            obj2
        ), f"Both objects must be instances of the same type: {type(obj1)} != {type(obj2)}."
        if isinstance(obj1, list):
            for val1, val2 in itertools.zip_longest(obj1, obj2):
                df_equals(val1, val2)
        else:
            df_equals(obj1, obj2)

    # This emulates situation when the Series's query compiler being passed as a 'by':
    #   1. The Series at the QC level is represented as a single-column frame with the `MODIN_UNNAMED_SERIES_LABEL` columns.
    #   2. The valid representation of such QC is an unnamed Series.
    reduced_frame = pandas.DataFrame({MODIN_UNNAMED_SERIES_LABEL: [1, 2, 3]})
    series_result = GroupBy.validate_by(reduced_frame)
    series_reference = [pandas.Series([1, 2, 3], name=None)]
    compare(series_reference, series_result)

    # This emulates situation when several 'by' columns of the group frame are passed as a single QueryCompiler:
    #   1. If grouping on several columns the 'by' at the QC level is the following: ``df[by]._query_compiler``.
    #   2. The valid representation of such QC is a list of Series.
    splited_df = [pandas.Series([1, 2, 3], name=f"col{i}") for i in range(3)]
    splited_df_result = GroupBy.validate_by(
        pandas.concat(splited_df, axis=1, copy=True)
    )
    compare(splited_df, splited_df_result)

    # This emulates situation of mixed by (two column names and an external Series):
    by = ["col1", "col2", pandas.DataFrame({MODIN_UNNAMED_SERIES_LABEL: [1, 2, 3]})]
    result_by = GroupBy.validate_by(by)
    reference_by = ["col1", "col2", pandas.Series([1, 2, 3], name=None)]
    compare(reference_by, result_by)


@pytest.mark.skipif(
    get_current_execution() == "BaseOnPython" or current_execution_is_native(),
    reason="The test only make sense for partitioned executions",
)
def test_groupby_with_virtual_partitions():
    # from https://github.com/modin-project/modin/issues/4464
    modin_df, pandas_df = create_test_dfs(test_data["int_data"])

    # Concatenate DataFrames here to make virtual partitions.
    big_modin_df = pd.concat([modin_df for _ in range(5)])
    big_pandas_df = pandas.concat([pandas_df for _ in range(5)])

    # Check that the constructed Modin DataFrame has virtual partitions when
    assert issubclass(
        type(big_modin_df._query_compiler._modin_frame._partitions[0][0]),
        PandasDataframeAxisPartition,
    )
    eval_general(
        big_modin_df, big_pandas_df, lambda df: df.groupby(df.columns[0]).count()
    )


@pytest.mark.parametrize("sort", [True, False])
@pytest.mark.parametrize("is_categorical_by", [True, False])
def test_groupby_sort(sort, is_categorical_by):
    # from issue #3571
    by = np.array(["a"] * 50000 + ["b"] * 10000 + ["c"] * 1000)
    random_state = np.random.RandomState(seed=42)
    random_state.shuffle(by)

    data = {"key_col": by, "data_col": np.arange(len(by))}
    md_df, pd_df = create_test_dfs(data)

    if is_categorical_by:
        md_df = md_df.astype({"key_col": "category"})
        pd_df = pd_df.astype({"key_col": "category"})

    md_grp = md_df.groupby("key_col", sort=sort)
    pd_grp = pd_df.groupby("key_col", sort=sort)

    modin_groupby_equals_pandas(md_grp, pd_grp)
    eval_general(md_grp, pd_grp, lambda grp: grp.sum(numeric_only=True))
    eval_general(md_grp, pd_grp, lambda grp: grp.size())
    eval_general(md_grp, pd_grp, lambda grp: grp.agg(lambda df: df.mean()))
    eval_general(md_grp, pd_grp, lambda grp: grp.dtypes)
    eval_general(md_grp, pd_grp, lambda grp: grp.first())


def test_groupby_with_frozenlist():
    pandas_df = pandas.DataFrame(data={"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
    pandas_df = pandas_df.set_index(["a", "b"])
    modin_df = from_pandas(pandas_df)
    eval_general(modin_df, pandas_df, lambda df: df.groupby(df.index.names).count())


@pytest.mark.parametrize(
    "by_func",
    [
        lambda df: "timestamp0",
        lambda df: ["timestamp0", "timestamp1"],
        lambda df: ["timestamp0", df["timestamp1"]],
    ],
)
def test_mean_with_datetime(by_func):
    data = {
        "timestamp0": [pd.to_datetime(1490195805, unit="s")],
        "timestamp1": [pd.to_datetime(1490195805, unit="s")],
        "numeric": [0],
    }

    modin_df, pandas_df = create_test_dfs(data)
    eval_general(modin_df, pandas_df, lambda df: df.groupby(by=by_func(df)).mean())


def test_groupby_ohlc():
    pandas_df = pandas.DataFrame(
        np.random.randint(0, 100, (50, 2)), columns=["stock A", "stock B"]
    )
    pandas_df["Date"] = pandas.concat(
        [pandas.date_range("1/1/2000", periods=10, freq="min").to_series()] * 5
    ).reset_index(drop=True)
    modin_df = pd.DataFrame(pandas_df)
    eval_general(modin_df, pandas_df, lambda df: df.groupby("Date")["stock A"].ohlc())
    pandas_multiindex_result = pandas_df.groupby("Date")[["stock A"]].ohlc()

    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_df)
    ):
        modin_multiindex_result = modin_df.groupby("Date")[["stock A"]].ohlc()
    df_equals(modin_multiindex_result, pandas_multiindex_result)

    pandas_multiindex_result = pandas_df.groupby("Date")[["stock A", "stock B"]].ohlc()
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_df)
    ):
        modin_multiindex_result = modin_df.groupby("Date")[
            ["stock A", "stock B"]
        ].ohlc()
    df_equals(modin_multiindex_result, pandas_multiindex_result)


@pytest.mark.parametrize(
    "modin_df_recipe",
    ["non_lazy_frame", "frame_with_deferred_index", "lazy_frame"],
)
def test_groupby_on_empty_data(modin_df_recipe):
    class ModinDfConstructor:
        def __init__(self, recipe, df_kwargs):
            self._recipe = recipe
            self._mock_obj = None
            self._df_kwargs = df_kwargs

        def non_lazy_frame(self):
            return pd.DataFrame(**self._df_kwargs)

        def frame_with_deferred_index(self):
            df = pd.DataFrame(**self._df_kwargs)
            try:
                # The frame would stop being lazy once index computation is triggered
                df._query_compiler.set_frame_index_cache(None)
            except AttributeError:
                pytest.skip(
                    reason="Selected execution doesn't support deferred indices."
                )

            return df

        def lazy_frame(self):
            donor_obj = pd.DataFrame()._query_compiler

            self._mock_obj = mock.patch(
                f"{donor_obj.__module__}.{donor_obj.__class__.__name__}.lazy_shape",
                new_callable=mock.PropertyMock,
            )
            patch_obj = self._mock_obj.__enter__()
            patch_obj.return_value = True

            df = pd.DataFrame(**self._df_kwargs)
            # The frame is lazy until `self.__exit__()` is called
            assert df._query_compiler.lazy_shape
            return df

        def __enter__(self):
            return getattr(self, self._recipe)()

        def __exit__(self, *args, **kwargs):
            if self._mock_obj is not None:
                self._mock_obj.__exit__(*args, **kwargs)

    def run_test(eval_function, *args, **kwargs):
        df_kwargs = {"columns": ["a", "b", "c"]}
        with ModinDfConstructor(modin_df_recipe, df_kwargs) as modin_df:
            pandas_df = pandas.DataFrame(**df_kwargs)

            modin_grp = modin_df.groupby(modin_df.columns[0])
            pandas_grp = pandas_df.groupby(pandas_df.columns[0])

            eval_function(modin_grp, pandas_grp, *args, **kwargs)

    run_test(eval___getattr__, item="b")
    run_test(eval___getitem__, item="b")
    run_test(eval_agg, func=lambda df: df.mean())
    run_test(eval_any)
    run_test(eval_apply, func=lambda df: df.mean())
    run_test(eval_count)
    run_test(eval_cummax, numeric_only=True)
    run_test(eval_cummin, numeric_only=True)
    run_test(eval_cumprod, numeric_only=True)
    run_test(eval_cumsum, numeric_only=True)
    run_test(eval_dtypes)
    run_test(eval_fillna)
    run_test(eval_groups)
    run_test(eval_len)
    run_test(eval_max)
    run_test(eval_mean)
    run_test(eval_median)
    run_test(eval_min)
    run_test(eval_ndim)
    run_test(eval_ngroup)
    run_test(eval_ngroups)
    run_test(eval_nunique)
    run_test(eval_prod)
    run_test(eval_quantile)
    run_test(eval_rank)
    run_test(eval_size)
    run_test(eval_skew)
    run_test(eval_sum)
    run_test(eval_var)

    if modin_df_recipe != "lazy_frame":
        # TODO: these functions have their specific implementations in the
        # front-end that are unable to operate on empty frames and thus
        # fail on an empty lazy frame.
        # https://github.com/modin-project/modin/issues/5505
        # https://github.com/modin-project/modin/issues/5506
        run_test(eval_pipe, func=lambda df: df.mean())
        run_test(eval_shift)

    # TODO: these functions fail in case of empty data in the pandas itself,
    # we have to modify the `eval_*` functions to be able to check for
    # exceptions equality:
    # https://github.com/modin-project/modin/issues/5441
    # run_test(eval_transform, func=lambda df: df.mean())
    # run_test(eval_std)


def test_skew_corner_cases():
    """
    This test was inspired by https://github.com/modin-project/modin/issues/5545.

    The test verifies that modin acts exactly as pandas when the input data is
    bad for the 'skew' and so some components of the 'skew' formula appears to be invalid:
        ``(count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)``
    """
    # When 'm2 == m3 == 0' thus causing 0 / 0 division in the second multiplier.
    # Note: mX = 'sum((col - mean(col)) ^ x)'
    modin_df, pandas_df = create_test_dfs({"col0": [1, 1, 1], "col1": [10, 10, 10]})
    eval_general(modin_df, pandas_df, lambda df: df.groupby("col0").skew())

    # When 'count < 3' thus causing dividing by zero in the first multiplier
    # Note: count = group_size
    modin_df, pandas_df = create_test_dfs({"col0": [1, 1], "col1": [1, 2]})
    eval_general(modin_df, pandas_df, lambda df: df.groupby("col0").skew())

    # When 'count < 3' and 'm3 / m2 != 0'. The case comes from:
    # https://github.com/modin-project/modin/issues/5545
    modin_df, pandas_df = create_test_dfs({"col0": [1, 1], "col1": [171, 137]})
    eval_general(modin_df, pandas_df, lambda df: df.groupby("col0").skew())


@pytest.mark.parametrize(
    "by",
    [
        pandas.Grouper(key="time_stamp", freq="3D"),
        [pandas.Grouper(key="time_stamp", freq="1ME"), "count"],
    ],
)
def test_groupby_with_grouper(by):
    # See https://github.com/modin-project/modin/issues/5091 for more details
    # Generate larger data so that it can handle partitioning cases
    data = {
        "id": [i for i in range(200)],
        "time_stamp": [
            pd.Timestamp("2000-01-02") + datetime.timedelta(days=x) for x in range(200)
        ],
    }
    for i in range(200):
        data[f"count_{i}"] = [i, i + 1] * 100

    modin_df, pandas_df = create_test_dfs(data)
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.groupby(by).mean(),
        # FIXME: https://github.com/modin-project/modin/issues/7033
        expected_exception=False,
    )


def test_groupby_preserves_by_order():
    modin_df, pandas_df = create_test_dfs({"col0": [1, 1, 1], "col1": [10, 10, 10]})

    modin_res = modin_df.groupby([pd.Series([100, 100, 100]), "col0"]).mean()
    pandas_res = pandas_df.groupby([pandas.Series([100, 100, 100]), "col0"]).mean()

    df_equals(modin_res, pandas_res)


@pytest.mark.parametrize(
    "method",
    # test all aggregations from pandas.core.groupby.base.reduction_kernels except
    # nth and corrwith, both of which require extra arguments.
    [
        "all",
        "any",
        "count",
        "first",
        "idxmax",
        "idxmin",
        "last",
        "max",
        "mean",
        "median",
        "min",
        "nunique",
        "prod",
        "quantile",
        "sem",
        "size",
        "skew",
        "std",
        "sum",
        "var",
    ],
)
@pytest.mark.skipif(
    StorageFormat.get() != "Pandas",
    reason="only relevant to pandas execution",
)
def test_groupby_agg_with_empty_column_partition_6175(method):
    df = pd.concat(
        [
            pd.DataFrame({"col33": [0, 1], "index": [2, 3]}),
            pd.DataFrame({"col34": [4, 5]}),
        ],
        axis=1,
    )
    assert df._query_compiler._modin_frame._partitions.shape == (1, 2)
    eval_general(
        df,
        df._to_pandas(),
        lambda df: getattr(df.groupby(["col33", "index"]), method)(),
    )


def test_groupby_pct_change_diff_6194():
    df = pd.DataFrame(
        {
            "by": ["a", "b", "c", "a", "c"],
            "value": [1, 2, 4, 5, 1],
        }
    )
    # These methods should not crash
    eval_general(
        df,
        df._to_pandas(),
        lambda df: df.groupby(by="by").pct_change(),
    )
    eval_general(
        df,
        df._to_pandas(),
        lambda df: df.groupby(by="by").diff(),
    )


def test_groupby_datetime_diff_6628():
    dates = pd.date_range(start="2023-01-01", periods=10, freq="W")
    df = pd.DataFrame(
        {
            "date": dates,
            "group": "A",
        }
    )
    eval_general(
        df,
        df._to_pandas(),
        lambda df: df.groupby("group").diff(),
    )


def eval_rolling(md_window, pd_window):
    eval_general(md_window, pd_window, lambda window: window.count())
    eval_general(md_window, pd_window, lambda window: window.sum())
    eval_general(md_window, pd_window, lambda window: window.mean())
    eval_general(md_window, pd_window, lambda window: window.median())
    eval_general(md_window, pd_window, lambda window: window.var())
    eval_general(md_window, pd_window, lambda window: window.std())
    eval_general(md_window, pd_window, lambda window: window.min())
    eval_general(md_window, pd_window, lambda window: window.max())
    expected_exception = None
    if pd_window.on == "col4":
        expected_exception = ValueError(
            "Length mismatch: Expected axis has 450 elements, new values have 600 elements"
        )
    eval_general(
        md_window,
        pd_window,
        lambda window: window.corr(),
        expected_exception=expected_exception,
    )
    eval_general(
        md_window,
        pd_window,
        lambda window: window.cov(),
        expected_exception=expected_exception,
    )
    eval_general(md_window, pd_window, lambda window: window.skew())
    eval_general(md_window, pd_window, lambda window: window.kurt())
    eval_general(
        md_window, pd_window, lambda window: window.apply(lambda df: (df + 10).sum())
    )
    eval_general(md_window, pd_window, lambda window: window.agg("sum"))
    eval_general(md_window, pd_window, lambda window: window.quantile(0.2))
    eval_general(md_window, pd_window, lambda window: window.rank())

    expected_exception = None
    if pd_window.on == "col4":
        expected_exception = TypeError(
            "Addition/subtraction of integers and integer-arrays with DatetimeArray is no longer supported."
            + "  Instead of adding/subtracting `n`, use `n * obj.freq`"
        )

    if not md_window._as_index:
        # There's a mismatch in group columns when 'as_index=False'
        # see: https://github.com/modin-project/modin/issues/6291
        by_cols = list(md_window._groupby_obj._internal_by)
        eval_general(
            md_window,
            pd_window,
            lambda window: window.sem().drop(columns=by_cols, errors="ignore"),
            expected_exception=expected_exception,
        )
    else:
        eval_general(
            md_window,
            pd_window,
            lambda window: window.sem(),
            expected_exception=expected_exception,
        )


@pytest.mark.parametrize("center", [True, False])
@pytest.mark.parametrize("closed", ["right", "left", "both", "neither"])
@pytest.mark.parametrize("as_index", [True, False])
def test_rolling_int_window(center, closed, as_index):
    col_part1 = pd.DataFrame(
        {
            "by": np.tile(np.arange(15), 10),
            "col1": np.arange(150),
            "col2": np.arange(10, 160),
        }
    )
    col_part2 = pd.DataFrame({"col3": np.arange(20, 170)})

    md_df = pd.concat([col_part1, col_part2], axis=1)
    pd_df = md_df._to_pandas()

    if StorageFormat.get() == "Pandas":
        assert md_df._query_compiler._modin_frame._partitions.shape[1] == 2

    md_window = md_df.groupby("by", as_index=as_index).rolling(
        3, center=center, closed=closed
    )
    pd_window = pd_df.groupby("by", as_index=as_index).rolling(
        3, center=center, closed=closed
    )
    eval_rolling(md_window, pd_window)


@pytest.mark.parametrize("center", [True, False])
@pytest.mark.parametrize("closed", ["right", "left", "both", "neither"])
@pytest.mark.parametrize("as_index", [True, False])
@pytest.mark.parametrize("on", [None, "col4"])
def test_rolling_timedelta_window(center, closed, as_index, on):
    col_part1 = pd.DataFrame(
        {
            "by": np.tile(np.arange(15), 10),
            "col1": np.arange(150),
            "col2": np.arange(10, 160),
        }
    )
    col_part2 = pd.DataFrame({"col3": np.arange(20, 170)})

    if on is not None:
        col_part2[on] = pandas.DatetimeIndex(
            [
                datetime.date(2020, 1, 1) + datetime.timedelta(hours=12) * i
                for i in range(150)
            ]
        )

    md_df = pd.concat([col_part1, col_part2], axis=1)
    md_df.index = pandas.DatetimeIndex(
        [datetime.date(2020, 1, 1) + datetime.timedelta(days=1) * i for i in range(150)]
    )

    pd_df = md_df._to_pandas()

    if StorageFormat.get() == "Pandas":
        assert (
            md_df._query_compiler._modin_frame._partitions.shape[1] == 2
            if on is None
            else 3
        )

    md_window = md_df.groupby("by", as_index=as_index).rolling(
        datetime.timedelta(days=3), center=center, closed=closed, on=on
    )
    pd_window = pd_df.groupby("by", as_index=as_index).rolling(
        datetime.timedelta(days=3), center=center, closed=closed, on=on
    )
    eval_rolling(md_window, pd_window)


@pytest.mark.parametrize(
    "func",
    [
        pytest.param("sum", id="map_reduce_func"),
        pytest.param("median", id="full_axis_func"),
    ],
)
def test_groupby_deferred_index(func):
    # the test is copied from the issue:
    # https://github.com/modin-project/modin/issues/6368

    def perform(lib):
        df1 = lib.DataFrame({"a": [1, 1, 2, 2]})
        df2 = lib.DataFrame({"b": [3, 4, 5, 6], "c": [7, 5, 4, 3]})

        df = lib.concat([df1, df2], axis=1)
        df.index = [10, 11, 12, 13]

        grp = df.groupby("a")
        grp.indices

        return getattr(grp, func)()

    eval_general(pd, pandas, perform)


# there are two different implementations of partitions aligning for cluster and non-cluster mode,
# here we want to test both of them, so simply modifying the config for this test
@pytest.mark.parametrize(
    "modify_config",
    [
        {RangePartitioning: True, IsRayCluster: True},
        {RangePartitioning: True, IsRayCluster: False},
    ],
    indirect=True,
)
def test_shape_changing_udf(modify_config):
    modin_df, pandas_df = create_test_dfs(
        {
            "by_col1": ([1] * 50) + ([10] * 50),
            "col2": np.arange(100),
            "col3": np.arange(100),
        }
    )

    def func1(group):
        # changes the original shape and indexing of the 'group'
        return pandas.Series(
            [1, 2, 3, 4], index=["new_col1", "new_col2", "new_col4", "new_col3"]
        )

    eval_general(
        modin_df.groupby("by_col1"),
        pandas_df.groupby("by_col1"),
        lambda df: df.apply(func1),
    )

    def func2(group):
        # each group have different shape at the end
        # (we do .to_frame().T as otherwise this scenario doesn't work in pandas)
        if group.iloc[0, 0] == 1:
            return (
                pandas.Series(
                    [1, 2, 3, 4], index=["new_col1", "new_col2", "new_col4", "new_col3"]
                )
                .to_frame()
                .T
            )
        return (
            pandas.Series([20, 33, 44], index=["new_col2", "new_col3", "new_col4"])
            .to_frame()
            .T
        )

    eval_general(
        modin_df.groupby("by_col1"),
        pandas_df.groupby("by_col1"),
        lambda df: df.apply(func2),
    )

    def func3(group):
        # one of the groups produce an empty dataframe, in the result we should
        # have joined columns of both of these dataframes
        if group.iloc[0, 0] == 1:
            return pandas.DataFrame([[1, 2, 3]], index=["col1", "col2", "col3"])
        return pandas.DataFrame(columns=["col2", "col3", "col4", "col5"])

    eval_general(
        modin_df.groupby("by_col1"),
        pandas_df.groupby("by_col1"),
        lambda df: df.apply(func3),
    )


@pytest.mark.parametrize("modify_config", [{RangePartitioning: True}], indirect=True)
def test_reshuffling_groupby_on_strings(modify_config):
    # reproducer from https://github.com/modin-project/modin/issues/6509
    modin_df, pandas_df = create_test_dfs(
        {"col1": ["a"] * 50 + ["b"] * 50, "col2": range(100)}
    )

    modin_df = modin_df.astype({"col1": "string"})
    pandas_df = pandas_df.astype({"col1": "string"})

    md_grp = modin_df.groupby("col1")
    pd_grp = pandas_df.groupby("col1")

    eval_general(md_grp, pd_grp, lambda grp: grp.mean())
    eval_general(md_grp, pd_grp, lambda grp: grp.nth(2))
    eval_general(md_grp, pd_grp, lambda grp: grp.head(10))
    eval_general(md_grp, pd_grp, lambda grp: grp.tail(10))


@pytest.mark.parametrize("modify_config", [{RangePartitioning: True}], indirect=True)
def test_groupby_apply_series_result(modify_config):
    # reproducer from the issue:
    # https://github.com/modin-project/modin/issues/6632
    df = pd.DataFrame(
        np.random.randint(5, 10, size=5), index=[f"s{i+1}" for i in range(5)]
    )
    df["group"] = [1, 1, 2, 2, 3]

    eval_general(
        df,
        df._to_pandas(),
        lambda df: df.groupby("group").apply(
            lambda x: x.name + 2, include_groups=False
        ),
    )


def test_groupby_named_aggregation():
    modin_ser, pandas_ser = create_test_series([10, 10, 10, 1, 1, 1, 2, 3], name="data")
    eval_general(
        modin_ser, pandas_ser, lambda ser: ser.groupby(level=0).agg(result=("max"))
    )


def test_groupby_several_column_partitions():
    # see details in #6948
    columns = [
        "l_returnflag",
        "l_linestatus",
        "l_discount",
        "l_extendedprice",
        "l_quantity",
    ]
    modin_df, pandas_df = create_test_dfs(
        np.random.randint(0, 100, size=(1000, len(columns))), columns=columns
    )

    pandas_df["a"] = (pandas_df.l_extendedprice) * (1 - (pandas_df.l_discount))
    # to create another column partition
    modin_df["a"] = (modin_df.l_extendedprice) * (1 - (modin_df.l_discount))

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.groupby(["l_returnflag", "l_linestatus"])
        .agg(
            sum_qty=("l_quantity", "sum"),
            sum_base_price=("l_extendedprice", "sum"),
            sum_disc_price=("a", "sum"),
            # sum_charge=("b", "sum"),
            avg_qty=("l_quantity", "mean"),
            avg_price=("l_extendedprice", "mean"),
            avg_disc=("l_discount", "mean"),
            count_order=("l_returnflag", "count"),
        )
        .reset_index(),
    )


def test_groupby_named_agg():
    # from pandas docs

    data = {
        "A": [1, 1, 2, 2],
        "B": [1, 2, 3, 4],
        "C": [0.362838, 0.227877, 1.267767, -0.562860],
    }
    modin_df, pandas_df = create_test_dfs(data)
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.groupby("A").agg(
            b_min=pd.NamedAgg(column="B", aggfunc="min"),
            c_sum=pd.NamedAgg(column="C", aggfunc="sum"),
        ),
    )


### TEST GROUPBY WARNINGS ###


def test_groupby_axis_1_warning():
    data = {
        "col1": [0, 3, 2, 3],
        "col2": [4, 1, 6, 7],
    }
    modin_df, pandas_df = create_test_dfs(data)

    with pytest.warns(
        FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
    ):
        modin_df.groupby(by="col1", axis=1)
    with pytest.warns(
        FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
    ):
        pandas_df.groupby(by="col1", axis=1)


def test_groupby_dtypes_warning():
    data = {
        "col1": [0, 3, 2, 3],
        "col2": [4, 1, 6, 7],
    }
    modin_df, pandas_df = create_test_dfs(data)
    modin_groupby = modin_df.groupby(by="col1")
    pandas_groupby = pandas_df.groupby(by="col1")

    with pytest.warns(FutureWarning, match="DataFrameGroupBy.dtypes is deprecated"):
        modin_groupby.dtypes
    with pytest.warns(FutureWarning, match="DataFrameGroupBy.dtypes is deprecated"):
        pandas_groupby.dtypes


def test_groupby_diff_axis_1_warning():
    data = {
        "col1": [0, 3, 2, 3],
        "col2": [4, 1, 6, 7],
    }
    modin_df, pandas_df = create_test_dfs(data)
    modin_groupby = modin_df.groupby(by="col1")
    pandas_groupby = pandas_df.groupby(by="col1")

    with pytest.warns(
        FutureWarning, match="DataFrameGroupBy.diff with axis=1 is deprecated"
    ):
        modin_groupby.diff(axis=1)
    with pytest.warns(
        FutureWarning, match="DataFrameGroupBy.diff with axis=1 is deprecated"
    ):
        pandas_groupby.diff(axis=1)


def test_groupby_pct_change_axis_1_warning():
    data = {
        "col1": [0, 3, 2, 3],
        "col2": [4, 1, 6, 7],
    }
    modin_df, pandas_df = create_test_dfs(data)
    modin_groupby = modin_df.groupby(by="col1")
    pandas_groupby = pandas_df.groupby(by="col1")

    with pytest.warns(
        FutureWarning, match="DataFrameGroupBy.pct_change with axis=1 is deprecated"
    ):
        modin_groupby.pct_change(axis=1)
    with pytest.warns(
        FutureWarning, match="DataFrameGroupBy.pct_change with axis=1 is deprecated"
    ):
        pandas_groupby.pct_change(axis=1)


def test_groupby_pct_change_parameters_warning():
    data = {
        "col1": [0, 3, 2, 3],
        "col2": [4, 1, 6, 7],
    }
    modin_df, pandas_df = create_test_dfs(data)
    modin_groupby = modin_df.groupby(by="col1")
    pandas_groupby = pandas_df.groupby(by="col1")

    match_string = (
        "The 'fill_method' keyword being not None and the 'limit' keyword "
        + "in (DataFrame|DataFrameGroupBy).pct_change are deprecated"
    )

    with pytest.warns(
        FutureWarning,
        match=match_string,
    ):
        modin_groupby.pct_change(fill_method="bfill", limit=1)
    with pytest.warns(
        FutureWarning,
        match=match_string,
    ):
        pandas_groupby.pct_change(fill_method="bfill", limit=1)


def test_groupby_shift_axis_1_warning():
    data = {
        "col1": [0, 3, 2, 3],
        "col2": [4, 1, 6, 7],
    }
    modin_df, pandas_df = create_test_dfs(data)
    modin_groupby = modin_df.groupby(by="col1")
    pandas_groupby = pandas_df.groupby(by="col1")

    with pytest.warns(
        FutureWarning,
        match="DataFrameGroupBy.shift with axis=1 is deprecated",
    ):
        pandas_groupby.shift(axis=1, fill_value=777)
    with pytest.warns(
        FutureWarning,
        match="DataFrameGroupBy.shift with axis=1 is deprecated",
    ):
        modin_groupby.shift(axis=1, fill_value=777)


def test_groupby_fillna_axis_1_warning():
    data = {
        "col1": [0, 3, 2, 3],
        "col2": [4, None, 6, None],
    }
    modin_df, pandas_df = create_test_dfs(data)
    modin_groupby = modin_df.groupby(by="col1")
    pandas_groupby = pandas_df.groupby(by="col1")

    with pytest.warns(
        FutureWarning,
        match="DataFrameGroupBy.fillna is deprecated",
    ):
        modin_groupby.fillna(method="ffill")
    with pytest.warns(
        FutureWarning,
        match="DataFrameGroupBy.fillna is deprecated",
    ):
        pandas_groupby.fillna(method="ffill")


def test_groupby_agg_provided_callable_warning():
    data = {
        "col1": [0, 3, 2, 3],
        "col2": [4, 1, 6, 7],
    }
    modin_df, pandas_df = create_test_dfs(data)
    modin_groupby = modin_df.groupby(by="col1")
    pandas_groupby = pandas_df.groupby(by="col1")

    for func in (sum, max):
        with pytest.warns(
            FutureWarning,
            match="In a future version of pandas, the provided callable will be used directly",
        ):
            modin_groupby.agg(func)
        with pytest.warns(
            FutureWarning,
            match="In a future version of pandas, the provided callable will be used directly",
        ):
            pandas_groupby.agg(func)


@pytest.mark.parametrize("modify_config", [{RangePartitioning: True}], indirect=True)
@pytest.mark.parametrize("observed", [False])
@pytest.mark.parametrize("as_index", [True])
@pytest.mark.parametrize(
    "func",
    [
        pytest.param(lambda grp: grp.sum(), id="sum"),
        pytest.param(lambda grp: grp.size(), id="size"),
        pytest.param(lambda grp: grp.apply(lambda df: df.sum()), id="apply_sum"),
        pytest.param(
            lambda grp: grp.apply(
                lambda df: (
                    df.sum()
                    if len(df) > 0
                    else pandas.Series([10] * len(df.columns), index=df.columns)
                )
            ),
            id="apply_transform",
        ),
    ],
)
@pytest.mark.parametrize(
    "by_cols, cat_cols",
    [
        ("a", ["a"]),
        ("b", ["b"]),
        ("e", ["e"]),
        (["a", "e"], ["a"]),
        (["a", "e"], ["e"]),
        (["a", "e"], ["a", "e"]),
        (["b", "e"], ["b"]),
        (["b", "e"], ["e"]),
        (["b", "e"], ["b", "e"]),
        (["a", "b", "e"], ["a"]),
        (["a", "b", "e"], ["b"]),
        (["a", "b", "e"], ["e"]),
        (["a", "b", "e"], ["a", "e"]),
        (["a", "b", "e"], ["a", "b", "e"]),
    ],
)
@pytest.mark.parametrize(
    "exclude_values",
    [
        pytest.param(lambda row: ~row["a"].isin(["a", "e"]), id="exclude_from_a"),
        pytest.param(lambda row: ~row["b"].isin([4]), id="exclude_from_b"),
        pytest.param(lambda row: ~row["e"].isin(["x"]), id="exclude_from_e"),
        pytest.param(
            lambda row: ~row["a"].isin(["a", "e"]) & ~row["b"].isin([4]),
            id="exclude_from_a_b",
        ),
        pytest.param(
            lambda row: ~row["b"].isin([4]) & ~row["e"].isin(["x"]),
            id="exclude_from_b_e",
        ),
        pytest.param(
            lambda row: ~row["a"].isin(["a", "e"])
            & ~row["b"].isin([4])
            & ~row["e"].isin(["x"]),
            id="exclude_from_a_b_e",
        ),
    ],
)
def test_range_groupby_categories(
    observed, func, by_cols, cat_cols, exclude_values, as_index, modify_config
):
    data = {
        "a": ["a", "b", "c", "d", "e", "b", "g", "a"] * 32,
        "b": [1, 2, 3, 4] * 64,
        "c": range(256),
        "d": range(256),
        "e": ["x", "y"] * 128,
    }

    md_df, pd_df = create_test_dfs(data)
    md_df = md_df.astype({col: "category" for col in cat_cols})[exclude_values]
    pd_df = pd_df.astype({col: "category" for col in cat_cols})[exclude_values]

    md_res = func(md_df.groupby(by_cols, observed=observed, as_index=as_index))
    pd_res = func(pd_df.groupby(by_cols, observed=observed, as_index=as_index))

    # HACK, FIXME: there's a bug in range-partitioning impl that apparently can
    # break the order of rows in the result for multi-column groupbys. Placing the sorting-hack for now
    # https://github.com/modin-project/modin/issues/6875
    df_equals(md_res.sort_index(axis=0), pd_res.sort_index(axis=0))


@pytest.mark.parametrize("cat_cols", [["a"], ["b"], ["a", "b"]])
@pytest.mark.parametrize(
    "columns", [[(False, "a"), (True, "b")], [(True, "a")], [(True, "a"), (True, "b")]]
)
def test_range_groupby_categories_external_grouper(columns, cat_cols):
    data = {
        "a": [1, 1, 2, 2] * 64,
        "b": [11, 11, 22, 22] * 64,
        "c": [111, 111, 222, 222] * 64,
        "data": [1, 2, 3, 4] * 64,
    }

    md_df, pd_df = create_test_dfs(data)
    md_df = md_df.astype({col: "category" for col in cat_cols})
    pd_df = pd_df.astype({col: "category" for col in cat_cols})

    md_df, md_by = get_external_groupers(md_df, columns, drop_from_original_df=True)
    pd_df, pd_by = get_external_groupers(pd_df, columns, drop_from_original_df=True)

    eval_general(md_df.groupby(md_by), pd_df.groupby(pd_by), lambda grp: grp.count())


@pytest.mark.parametrize("by", [["a"], ["a", "b"]])
@pytest.mark.parametrize("as_index", [True, False])
@pytest.mark.parametrize("include_groups", [True, False])
def test_include_groups(by, as_index, include_groups):
    data = {
        "a": [1, 1, 2, 2] * 64,
        "b": [11, 11, 22, 22] * 64,
        "c": [111, 111, 222, 222] * 64,
        "data": [1, 2, 3, 4] * 64,
    }

    def func(df):
        if include_groups:
            assert len(df.columns.intersection(by)) == len(by)
        else:
            assert len(df.columns.intersection(by)) == 0
        return df.sum()

    md_df, pd_df = create_test_dfs(data)
    eval_general(
        md_df,
        pd_df,
        lambda df: df.groupby(by, as_index=as_index).apply(
            func, include_groups=include_groups
        ),
    )


@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("how", ["first", "last"])
def test_first_last_skipna(how, skipna):
    md_df, pd_df = create_test_dfs(
        {
            "a": [2, 1, 1, 2, 3, 3] * 20,
            "b": [np.nan, 3.0, np.nan, 4.0, np.nan, np.nan] * 20,
            "c": [np.nan, 3.0, np.nan, 4.0, np.nan, np.nan] * 20,
        }
    )

    pd_res = getattr(pd_df.groupby("a"), how)(skipna=skipna)
    md_res = getattr(md_df.groupby("a"), how)(skipna=skipna)
    df_equals(md_res, pd_res)


================================================
FILE: modin/tests/pandas/test_io.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import csv
import inspect
import os
import platform
import sys
import unittest.mock as mock
from collections import defaultdict
from io import BytesIO, StringIO
from pathlib import Path
from typing import Dict

import fastparquet
import numpy as np
import pandas
import pandas._libs.lib as lib
import pyarrow as pa
import pyarrow.dataset
import pytest
import sqlalchemy as sa
from packaging import version
from pandas._testing import ensure_clean
from pandas.errors import ParserWarning
from scipy import sparse

from modin.config import (
    AsyncReadMode,
    Engine,
    IsExperimental,
    MinRowPartitionSize,
    ReadSqlEngine,
    StorageFormat,
    TestDatasetSize,
    TestReadFromPostgres,
    TestReadFromSqlServer,
)
from modin.db_conn import ModinDatabaseConnection, UnsupportedDatabaseException
from modin.pandas.io import from_arrow, from_dask, from_map, from_ray, to_pandas
from modin.tests.test_utils import (
    current_execution_is_native,
    warns_that_defaulting_to_pandas_if,
)

from .utils import (
    check_file_leaks,
    create_test_dfs,
    create_test_series,
    default_to_pandas_ignore_string,
    df_equals,
    dummy_decorator,
    eval_general,
    eval_io,
    eval_io_from_str,
    generate_dataframe,
    get_unique_filename,
    json_long_bytes,
    json_long_string,
    json_short_bytes,
    json_short_string,
    parse_dates_values_by_id,
)
from .utils import test_data as utils_test_data
from .utils import (
    time_parsing_csv_path,
)

if StorageFormat.get() == "Pandas":
    import modin.pandas as pd
else:
    import modin.experimental.pandas as pd

try:
    import ray

    EXCEPTIONS = (ray.exceptions.WorkerCrashedError,)
except ImportError:
    EXCEPTIONS = ()


from modin.config import NPartitions

NPartitions.put(4)

DATASET_SIZE_DICT = {
    "Small": 64,
    "Normal": 2000,
    "Big": 20000,
}

# Number of rows in the test file
NROWS = DATASET_SIZE_DICT.get(TestDatasetSize.get(), DATASET_SIZE_DICT["Small"])

TEST_DATA = {
    "col1": [0, 1, 2, 3],
    "col2": [4, 5, 6, 7],
    "col3": [8, 9, 10, 11],
    "col4": [12, 13, 14, 15],
    "col5": [0, 0, 0, 0],
}


def assert_files_eq(path1, path2):
    with open(path1, "rb") as file1, open(path2, "rb") as file2:
        file1_content = file1.read()
        file2_content = file2.read()

        if file1_content == file2_content:
            return True
        else:
            return False


def setup_clipboard(row_size=NROWS):
    df = pandas.DataFrame({"col1": np.arange(row_size), "col2": np.arange(row_size)})
    df.to_clipboard()


def parquet_eval_to_file(tmp_dir, modin_obj, pandas_obj, fn, extension, **fn_kwargs):
    """
    Helper function to test `to_parquet` method.

    Parameters
    ----------
    tmp_dir : Union[str, Path]
        Temporary directory.
    modin_obj : pd.DataFrame
        A Modin DataFrame or a Series to test `to_parquet` method.
    pandas_obj: pandas.DataFrame
        A pandas DataFrame or a Series to test `to_parquet` method.
    fn : str
        Name of the method, that should be tested.
    extension : str
        Extension of the test file.
    """
    unique_filename_modin = get_unique_filename(extension=extension, data_dir=tmp_dir)
    unique_filename_pandas = get_unique_filename(extension=extension, data_dir=tmp_dir)

    engine = fn_kwargs.get("engine", "auto")

    getattr(modin_obj, fn)(unique_filename_modin, **fn_kwargs)
    getattr(pandas_obj, fn)(unique_filename_pandas, **fn_kwargs)

    pandas_df = pandas.read_parquet(unique_filename_pandas, engine=engine)
    modin_df = pd.read_parquet(unique_filename_modin, engine=engine)
    df_equals(pandas_df, modin_df)


def eval_to_file(tmp_dir, modin_obj, pandas_obj, fn, extension, **fn_kwargs):
    """
    Test `fn` method of `modin_obj` and `pandas_obj`.

    Parameters
    ----------
    tmp_dir : Union[str, Path]
        Temporary directory.
    modin_obj: Modin DataFrame or Series
        Object to test.
    pandas_obj: Pandas DataFrame or Series
        Object to test.
    fn: str
        Name of the method, that should be tested.
    extension: str
        Extension of the test file.
    """
    unique_filename_modin = get_unique_filename(extension=extension, data_dir=tmp_dir)
    unique_filename_pandas = get_unique_filename(extension=extension, data_dir=tmp_dir)

    # parameter `max_retries=0` is set for `to_csv` function on Ray engine,
    # in order to increase the stability of tests, we repeat the call of
    # the entire function manually
    last_exception = None
    for _ in range(3):
        try:
            getattr(modin_obj, fn)(unique_filename_modin, **fn_kwargs)
        except EXCEPTIONS as err:
            last_exception = err
            continue
        break
    # If we do have an exception that's valid let's raise it
    if last_exception:
        raise last_exception

    getattr(pandas_obj, fn)(unique_filename_pandas, **fn_kwargs)

    assert assert_files_eq(unique_filename_modin, unique_filename_pandas)


def eval_to_csv_file(tmp_dir, modin_obj, pandas_obj, extension, **kwargs):
    if extension is None:
        kwargs["mode"] = "t"
        kwargs["compression"] = "infer"
        modin_csv = modin_obj.to_csv(**kwargs)
        pandas_csv = pandas_obj.to_csv(**kwargs)
        if modin_csv == pandas_csv:
            return

        force_read = True
        modin_file = get_unique_filename(extension="csv", data_dir=tmp_dir)
        pandas_file = get_unique_filename(extension="csv", data_dir=tmp_dir)
        with open(modin_file, "w") as file:
            file.write(modin_csv)
        with open(pandas_file, "w") as file:
            file.write(pandas_csv)
    else:
        force_read = extension != "csv" or kwargs.get("compression", None)
        modin_file = get_unique_filename(extension=extension, data_dir=tmp_dir)
        pandas_file = get_unique_filename(extension=extension, data_dir=tmp_dir)
        modin_obj.to_csv(modin_file, **kwargs)
        pandas_obj.to_csv(pandas_file, **kwargs)

    if force_read or not assert_files_eq(modin_file, pandas_file):
        # If the files are not identical, make sure they can
        # be read by pandas and contains identical data.
        read_kwargs = {}
        if kwargs.get("index", None) is not False:
            read_kwargs["index_col"] = 0
        if (value := kwargs.get("sep", None)) is not None:
            read_kwargs["sep"] = value
        if (value := kwargs.get("compression", None)) is not None:
            read_kwargs["compression"] = value
        modin_obj = pandas.read_csv(modin_file, **read_kwargs)
        pandas_obj = pandas.read_csv(pandas_file, **read_kwargs)
        df_equals(pandas_obj, modin_obj)


@pytest.fixture
def make_parquet_dir(tmp_path):
    def _make_parquet_dir(
        dfs_by_filename: Dict[str, pandas.DataFrame], row_group_size: int
    ):
        for filename, df in dfs_by_filename.items():
            df.to_parquet(
                os.path.join(tmp_path, filename), row_group_size=row_group_size
            )
        return tmp_path

    yield _make_parquet_dir


@pytest.mark.usefixtures("TestReadCSVFixture")
@pytest.mark.skipif(
    IsExperimental.get() and StorageFormat.get() == "Pyarrow",
    reason="Segmentation fault; see PR #2347 ffor details",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestCsv:
    # delimiter tests
    @pytest.mark.parametrize("sep", ["_", ",", "."])
    @pytest.mark.parametrize("decimal", [".", "_"])
    @pytest.mark.parametrize("thousands", [None, ",", "_", " "])
    def test_read_csv_seps(self, make_csv_file, sep, decimal, thousands):
        unique_filename = make_csv_file(
            delimiter=sep,
            thousands_separator=thousands,
            decimal_separator=decimal,
        )
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=unique_filename,
            sep=sep,
            decimal=decimal,
            thousands=thousands,
        )

    @pytest.mark.parametrize("sep", [None, "_"])
    @pytest.mark.parametrize("delimiter", [".", "_"])
    def test_read_csv_seps_except(self, make_csv_file, sep, delimiter):
        unique_filename = make_csv_file(delimiter=delimiter)
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=unique_filename,
            delimiter=delimiter,
            sep=sep,
            expected_exception=ValueError(
                "Specified a sep and a delimiter; you can only specify one."
            ),
        )

    @pytest.mark.parametrize(
        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
    )
    def test_read_csv_dtype_backend(self, make_csv_file, dtype_backend):
        unique_filename = make_csv_file()

        def comparator(df1, df2):
            df_equals(df1, df2)
            df_equals(df1.dtypes, df2.dtypes)

        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=unique_filename,
            dtype_backend=dtype_backend,
            comparator=comparator,
        )

    # Column and Index Locations and Names tests
    @pytest.mark.parametrize("header", ["infer", None, 0])
    @pytest.mark.parametrize("index_col", [None, "col1"])
    @pytest.mark.parametrize(
        "names", [lib.no_default, ["col1"], ["c1", "c2", "c3", "c4", "c5", "c6"]]
    )
    @pytest.mark.parametrize(
        "usecols", [None, ["col1"], ["col1", "col2", "col6"], [0, 1, 5]]
    )
    @pytest.mark.parametrize("skip_blank_lines", [True, False])
    def test_read_csv_col_handling(
        self,
        header,
        index_col,
        names,
        usecols,
        skip_blank_lines,
    ):
        if names is lib.no_default:
            pytest.skip("some parameters combiantions fails: issue #2312")
        if header in ["infer", None] and names is not lib.no_default:
            pytest.skip(
                "Heterogeneous data in a column is not cast to a common type: issue #3346"
            )
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_blank_lines"],
            header=header,
            index_col=index_col,
            names=names,
            usecols=usecols,
            skip_blank_lines=skip_blank_lines,
            # FIXME: https://github.com/modin-project/modin/issues/7035
            expected_exception=False,
        )

    @pytest.mark.parametrize("usecols", [lambda col_name: col_name in ["a", "b", "e"]])
    def test_from_csv_with_callable_usecols(self, usecols):
        fname = "modin/tests/pandas/data/test_usecols.csv"
        pandas_df = pandas.read_csv(fname, usecols=usecols)
        modin_df = pd.read_csv(fname, usecols=usecols)
        df_equals(modin_df, pandas_df)

    # General Parsing Configuration
    @pytest.mark.parametrize("dtype", [None, True])
    @pytest.mark.parametrize("engine", [None, "python", "c"])
    @pytest.mark.parametrize(
        "converters",
        [
            None,
            {
                "col1": lambda x: np.int64(x) * 10,
                "col2": pandas.to_datetime,
                "col4": lambda x: x.replace(":", ";"),
            },
        ],
    )
    @pytest.mark.parametrize("skipfooter", [0, 10])
    def test_read_csv_parsing_1(
        self,
        dtype,
        engine,
        converters,
        skipfooter,
    ):
        if dtype:
            dtype = {
                col: "object"
                for col in pandas.read_csv(
                    pytest.csvs_names["test_read_csv_regular"], nrows=1
                ).columns
            }

        expected_exception = None
        if engine == "c" and skipfooter != 0:
            expected_exception = ValueError(
                "the 'c' engine does not support skipfooter"
            )
        eval_io(
            fn_name="read_csv",
            expected_exception=expected_exception,
            check_kwargs_callable=not callable(converters),
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"],
            dtype=dtype,
            engine=engine,
            converters=converters,
            skipfooter=skipfooter,
        )

    @pytest.mark.parametrize("header", ["infer", None, 0])
    @pytest.mark.parametrize(
        "skiprows",
        [
            2,
            lambda x: x % 2,
            lambda x: x > 25,
            lambda x: x > 128,
            np.arange(10, 50),
            np.arange(10, 50, 2),
        ],
    )
    @pytest.mark.parametrize("nrows", [35, None])
    @pytest.mark.parametrize(
        "names",
        [
            [f"c{col_number}" for col_number in range(4)],
            [f"c{col_number}" for col_number in range(6)],
            None,
        ],
    )
    @pytest.mark.parametrize("encoding", ["latin1", "windows-1251", None])
    def test_read_csv_parsing_2(
        self,
        make_csv_file,
        request,
        header,
        skiprows,
        nrows,
        names,
        encoding,
    ):
        if encoding:
            unique_filename = make_csv_file(encoding=encoding)
        else:
            unique_filename = pytest.csvs_names["test_read_csv_regular"]
        kwargs = {
            "filepath_or_buffer": unique_filename,
            "header": header,
            "skiprows": skiprows,
            "nrows": nrows,
            "names": names,
            "encoding": encoding,
        }

        if Engine.get() != "Python":
            df = pandas.read_csv(**dict(kwargs, nrows=1))
            # in that case first partition will contain str
            if df[df.columns[0]][df.index[0]] in ["c1", "col1", "c3", "col3"]:
                pytest.xfail("read_csv incorrect output with float data - issue #2634")

        eval_io(
            fn_name="read_csv",
            expected_exception=None,
            check_kwargs_callable=not callable(skiprows),
            # read_csv kwargs
            **kwargs,
        )

    @pytest.mark.parametrize("true_values", [["Yes"], ["Yes", "true"], None])
    @pytest.mark.parametrize("false_values", [["No"], ["No", "false"], None])
    @pytest.mark.parametrize("skipfooter", [0, 10])
    @pytest.mark.parametrize("nrows", [35, None])
    def test_read_csv_parsing_3(
        self,
        true_values,
        false_values,
        skipfooter,
        nrows,
    ):
        # TODO: Check #2446 as it was closed
        xfail_case = (false_values or true_values) and Engine.get() != "Python"
        if xfail_case:
            pytest.xfail("modin and pandas dataframes differs - issue #2446")

        expected_exception = None
        if skipfooter != 0 and nrows is not None:
            expected_exception = ValueError("'skipfooter' not supported with 'nrows'")
        eval_io(
            fn_name="read_csv",
            expected_exception=expected_exception,
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_yes_no"],
            true_values=true_values,
            false_values=false_values,
            skipfooter=skipfooter,
            nrows=nrows,
        )

    def test_read_csv_skipinitialspace(self):
        with ensure_clean(".csv") as unique_filename:
            str_initial_spaces = (
                "col1,col2,col3,col4\n"
                + "five,  six,  seven,  eight\n"
                + "    five,    six,    seven,    eight\n"
                + "five, six,  seven,   eight\n"
            )

            eval_io_from_str(str_initial_spaces, unique_filename, skipinitialspace=True)

    # NA and Missing Data Handling tests
    @pytest.mark.parametrize("na_values", ["custom_nan", "73"])
    @pytest.mark.parametrize("keep_default_na", [True, False])
    @pytest.mark.parametrize("na_filter", [True, False])
    @pytest.mark.parametrize("verbose", [True, False])
    @pytest.mark.parametrize("skip_blank_lines", [True, False])
    def test_read_csv_nans_handling(
        self,
        na_values,
        keep_default_na,
        na_filter,
        verbose,
        skip_blank_lines,
    ):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_nans"],
            na_values=na_values,
            keep_default_na=keep_default_na,
            na_filter=na_filter,
            verbose=verbose,
            skip_blank_lines=skip_blank_lines,
        )

    # Datetime Handling tests
    @pytest.mark.parametrize(
        "parse_dates", [True, False, ["col2"], ["col2", "col4"], [1, 3]]
    )
    @pytest.mark.parametrize("infer_datetime_format", [True, False])
    @pytest.mark.parametrize("keep_date_col", [True, False])
    @pytest.mark.parametrize(
        "date_parser",
        [lib.no_default, lambda x: pandas.to_datetime(x, format="%Y-%m-%d")],
        ids=["default", "format-Ymd"],
    )
    @pytest.mark.parametrize("dayfirst", [True, False])
    @pytest.mark.parametrize("cache_dates", [True, False])
    def test_read_csv_datetime(
        self,
        parse_dates,
        infer_datetime_format,
        keep_date_col,
        date_parser,
        dayfirst,
        cache_dates,
        request,
    ):
        expected_exception = None

        if "format-Ymd" in request.node.callspec.id and (
            "parse_dates3" in request.node.callspec.id
            or "parse_dates4" in request.node.callspec.id
        ):
            msg = (
                'time data "00:00:00" doesn\'t match format "%Y-%m-%d", at position 0. You might want to try:\n'
                + "    - passing `format` if your strings have a consistent format;\n"
                + "    - passing `format='ISO8601'` if your strings are all ISO8601 "
                + "but not necessarily in exactly the same format;\n"
                + "    - passing `format='mixed'`, and the format will be inferred "
                + "for each element individually. You might want to use `dayfirst` "
                + "alongside this."
            )
            expected_exception = ValueError(msg)

        eval_io(
            fn_name="read_csv",
            check_kwargs_callable=not callable(date_parser),
            expected_exception=expected_exception,
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"],
            parse_dates=parse_dates,
            infer_datetime_format=infer_datetime_format,
            keep_date_col=keep_date_col,
            date_parser=date_parser,
            dayfirst=dayfirst,
            cache_dates=cache_dates,
        )

    @pytest.mark.parametrize("date", ["2023-01-01 00:00:01.000000000", "2023"])
    @pytest.mark.parametrize("dtype", [None, "str", {"id": "int64"}])
    @pytest.mark.parametrize("parse_dates", [None, [], ["date"], [1]])
    def test_read_csv_dtype_parse_dates(self, date, dtype, parse_dates):
        with ensure_clean(".csv") as filename:
            with open(filename, "w") as file:
                file.write(f"id,date\n1,{date}")
            eval_io(
                fn_name="read_csv",
                # read_csv kwargs
                filepath_or_buffer=filename,
                dtype=dtype,
                parse_dates=parse_dates,
            )

    # Iteration tests
    @pytest.mark.parametrize("iterator", [True, False])
    def test_read_csv_iteration(self, iterator):
        filename = pytest.csvs_names["test_read_csv_regular"]

        # Tests __next__ and correctness of reader as an iterator
        # Use larger chunksize to read through file quicker
        rdf_reader = pd.read_csv(filename, chunksize=500, iterator=iterator)
        pd_reader = pandas.read_csv(filename, chunksize=500, iterator=iterator)

        for modin_df, pd_df in zip(rdf_reader, pd_reader):
            df_equals(modin_df, pd_df)

        # Tests that get_chunk works correctly
        rdf_reader = pd.read_csv(filename, chunksize=1, iterator=iterator)
        pd_reader = pandas.read_csv(filename, chunksize=1, iterator=iterator)

        modin_df = rdf_reader.get_chunk(1)
        pd_df = pd_reader.get_chunk(1)

        df_equals(modin_df, pd_df)

        # Tests that read works correctly
        rdf_reader = pd.read_csv(filename, chunksize=1, iterator=iterator)
        pd_reader = pandas.read_csv(filename, chunksize=1, iterator=iterator)

        modin_df = rdf_reader.read()
        pd_df = pd_reader.read()

        df_equals(modin_df, pd_df)

        # Tests #6553
        if iterator:
            rdf_reader = pd.read_csv(filename, iterator=iterator)
            pd_reader = pandas.read_csv(filename, iterator=iterator)

            modin_df = rdf_reader.read()
            pd_df = pd_reader.read()

            df_equals(modin_df, pd_df)

    @pytest.mark.parametrize("pathlike", [False, True])
    def test_read_csv_encoding_976(self, pathlike):
        file_name = "modin/tests/pandas/data/issue_976.csv"
        if pathlike:
            file_name = Path(file_name)
        names = [str(i) for i in range(11)]

        kwargs = {
            "sep": ";",
            "names": names,
            "encoding": "windows-1251",
        }
        df1 = pd.read_csv(file_name, **kwargs)
        df2 = pandas.read_csv(file_name, **kwargs)
        # these columns contain data of various types in partitions
        # see #1931 for details;
        df1 = df1.drop(["4", "5"], axis=1)
        df2 = df2.drop(["4", "5"], axis=1)

        df_equals(df1, df2)

    # Quoting, Compression parameters tests
    @pytest.mark.parametrize("compression", ["infer", "gzip", "bz2", "xz", "zip"])
    @pytest.mark.parametrize("encoding", [None, "latin8", "utf16"])
    @pytest.mark.parametrize("engine", [None, "python", "c", "pyarrow"])
    def test_read_csv_compression(self, make_csv_file, compression, encoding, engine):
        unique_filename = make_csv_file(encoding=encoding, compression=compression)
        expected_exception = None
        if encoding == "utf16" and compression in ("bz2", "xz"):
            expected_exception = UnicodeError("UTF-16 stream does not start with BOM")

        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=unique_filename,
            compression=compression,
            encoding=encoding,
            engine=engine,
            expected_exception=expected_exception,
        )

    @pytest.mark.parametrize(
        "encoding",
        [
            None,
            "ISO-8859-1",
            "latin1",
            "iso-8859-1",
            "cp1252",
            "utf8",
            pytest.param(
                "unicode_escape",
                marks=pytest.mark.skipif(
                    condition=sys.version_info < (3, 9),
                    reason="https://bugs.python.org/issue45461",
                ),
            ),
            "raw_unicode_escape",
            "utf_16_le",
            "utf_16_be",
            "utf32",
            "utf_32_le",
            "utf_32_be",
            "utf-8-sig",
        ],
    )
    def test_read_csv_encoding(self, make_csv_file, encoding):
        unique_filename = make_csv_file(encoding=encoding)
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=unique_filename,
            encoding=encoding,
        )

    @pytest.mark.parametrize("thousands", [None, ",", "_", " "])
    @pytest.mark.parametrize("decimal", [".", "_"])
    @pytest.mark.parametrize("lineterminator", [None, "x", "\n"])
    @pytest.mark.parametrize("escapechar", [None, "d", "x"])
    @pytest.mark.parametrize("dialect", ["test_csv_dialect", "use_dialect_name", None])
    def test_read_csv_file_format(
        self,
        make_csv_file,
        thousands,
        decimal,
        lineterminator,
        escapechar,
        dialect,
    ):
        if dialect:
            test_csv_dialect_params = {
                "delimiter": "_",
                "doublequote": False,
                "escapechar": "\\",
                "quotechar": "d",
                "quoting": csv.QUOTE_ALL,
            }
            csv.register_dialect(dialect, **test_csv_dialect_params)
            if dialect != "use_dialect_name":
                # otherwise try with dialect name instead of `_csv.Dialect` object
                dialect = csv.get_dialect(dialect)
            unique_filename = make_csv_file(**test_csv_dialect_params)
        else:
            unique_filename = make_csv_file(
                thousands_separator=thousands,
                decimal_separator=decimal,
                escapechar=escapechar,
                lineterminator=lineterminator,
            )

        expected_exception = None
        if dialect is None:
            # FIXME: https://github.com/modin-project/modin/issues/7035
            expected_exception = False

        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=unique_filename,
            thousands=thousands,
            decimal=decimal,
            lineterminator=lineterminator,
            escapechar=escapechar,
            dialect=dialect,
            expected_exception=expected_exception,
        )

    @pytest.mark.parametrize(
        "quoting",
        [csv.QUOTE_ALL, csv.QUOTE_MINIMAL, csv.QUOTE_NONNUMERIC, csv.QUOTE_NONE],
    )
    @pytest.mark.parametrize("quotechar", ['"', "_", "d"])
    @pytest.mark.parametrize("doublequote", [True, False])
    @pytest.mark.parametrize("comment", [None, "#", "x"])
    def test_read_csv_quoting(
        self,
        make_csv_file,
        quoting,
        quotechar,
        doublequote,
        comment,
    ):
        # in these cases escapechar should be set, otherwise error occures
        # _csv.Error: need to escape, but no escapechar set"
        use_escapechar = (
            not doublequote and quotechar != '"' and quoting != csv.QUOTE_NONE
        )
        escapechar = "\\" if use_escapechar else None
        unique_filename = make_csv_file(
            quoting=quoting,
            quotechar=quotechar,
            doublequote=doublequote,
            escapechar=escapechar,
            comment_col_char=comment,
        )

        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=unique_filename,
            quoting=quoting,
            quotechar=quotechar,
            doublequote=doublequote,
            escapechar=escapechar,
            comment=comment,
        )

    # Error Handling parameters tests
    @pytest.mark.skip(reason="https://github.com/modin-project/modin/issues/6239")
    @pytest.mark.parametrize("on_bad_lines", ["error", "warn", "skip", None])
    def test_read_csv_error_handling(self, on_bad_lines):
        # in that case exceptions are raised both by Modin and pandas
        # and tests pass
        raise_exception_case = on_bad_lines is not None
        # TODO: Check #2500 as it was closed
        if not raise_exception_case and Engine.get() not in ["Python"]:
            pytest.xfail("read_csv doesn't raise `bad lines` exceptions - issue #2500")
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_bad_lines"],
            on_bad_lines=on_bad_lines,
        )

    @pytest.mark.parametrize("float_precision", [None, "high", "legacy", "round_trip"])
    def test_python_engine_float_precision_except(self, float_precision):
        expected_exception = None
        if float_precision is not None:
            expected_exception = ValueError(
                "The 'float_precision' option is not supported with the 'python' engine"
            )
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"],
            engine="python",
            float_precision=float_precision,
            expected_exception=expected_exception,
        )

    @pytest.mark.parametrize("low_memory", [False, True])
    def test_python_engine_low_memory_except(self, low_memory):
        expected_exception = None
        if not low_memory:
            expected_exception = ValueError(
                "The 'low_memory' option is not supported with the 'python' engine"
            )
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"],
            engine="python",
            low_memory=low_memory,
            expected_exception=expected_exception,
        )

    @pytest.mark.parametrize("delim_whitespace", [True, False])
    def test_delim_whitespace(self, delim_whitespace, tmp_path):
        str_delim_whitespaces = "col1 col2  col3   col4\n5 6   7  8\n9  10    11 12\n"
        unique_filename = get_unique_filename(data_dir=tmp_path)
        eval_io_from_str(
            str_delim_whitespaces,
            unique_filename,
            delim_whitespace=delim_whitespace,
        )

    # Internal parameters tests
    @pytest.mark.parametrize("engine", ["c"])
    @pytest.mark.parametrize("delimiter", [",", " "])
    @pytest.mark.parametrize("low_memory", [True, False])
    @pytest.mark.parametrize("memory_map", [True, False])
    @pytest.mark.parametrize("float_precision", [None, "high", "round_trip"])
    def test_read_csv_internal(
        self,
        make_csv_file,
        engine,
        delimiter,
        low_memory,
        memory_map,
        float_precision,
    ):
        unique_filename = make_csv_file(delimiter=delimiter)
        eval_io(
            filepath_or_buffer=unique_filename,
            fn_name="read_csv",
            engine=engine,
            delimiter=delimiter,
            low_memory=low_memory,
            memory_map=memory_map,
            float_precision=float_precision,
        )

    # Issue related, specific or corner cases
    @pytest.mark.parametrize("nrows", [2, None])
    def test_read_csv_bad_quotes(self, nrows):
        csv_bad_quotes = (
            '1, 2, 3, 4\none, two, three, four\nfive, "six", seven, "eight\n'
        )

        with ensure_clean(".csv") as unique_filename:
            eval_io_from_str(csv_bad_quotes, unique_filename, nrows=nrows)

    def test_read_csv_categories(self):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer="modin/tests/pandas/data/test_categories.csv",
            names=["one", "two"],
            dtype={"one": "int64", "two": "category"},
        )

    @pytest.mark.parametrize("encoding", [None, "utf-8"])
    @pytest.mark.parametrize("encoding_errors", ["strict", "ignore"])
    @pytest.mark.parametrize(
        "parse_dates",
        [pytest.param(value, id=id) for id, value in parse_dates_values_by_id.items()],
    )
    @pytest.mark.parametrize("index_col", [None, 0, 5])
    @pytest.mark.parametrize("header", ["infer", 0])
    @pytest.mark.parametrize(
        "names",
        [
            None,
            [
                "timestamp",
                "year",
                "month",
                "date",
                "symbol",
                "high",
                "low",
                "open",
                "close",
                "spread",
                "volume",
            ],
        ],
    )
    @pytest.mark.exclude_in_sanity
    def test_read_csv_parse_dates(
        self,
        names,
        header,
        index_col,
        parse_dates,
        encoding,
        encoding_errors,
        request,
    ):
        if names is not None and header == "infer":
            pytest.xfail(
                "read_csv with Ray engine works incorrectly with date data and names parameter provided - issue #2509"
            )

        expected_exception = None
        if "nonexistent_int_column" in request.node.callspec.id:
            expected_exception = IndexError("list index out of range")
        elif "nonexistent_string_column" in request.node.callspec.id:
            expected_exception = ValueError(
                "Missing column provided to 'parse_dates': 'z'"
            )
        eval_io(
            fn_name="read_csv",
            expected_exception=expected_exception,
            # read_csv kwargs
            filepath_or_buffer=time_parsing_csv_path,
            names=names,
            header=header,
            index_col=index_col,
            parse_dates=parse_dates,
            encoding=encoding,
            encoding_errors=encoding_errors,
        )

    @pytest.mark.parametrize(
        "storage_options",
        [{"anon": False}, {"anon": True}, {"key": "123", "secret": "123"}, None],
    )
    @pytest.mark.xfail(
        reason="S3 file gone missing, see https://github.com/modin-project/modin/issues/4875"
    )
    def test_read_csv_s3(self, storage_options):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer="s3://noaa-ghcn-pds/csv/1788.csv",
            storage_options=storage_options,
        )

    @pytest.mark.xfail(
        reason="S3 file gone missing, see https://github.com/modin-project/modin/issues/7571"
    )
    def test_read_csv_s3_issue4658(self):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer="s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv",
            nrows=10,
            storage_options={"anon": True},
        )

    @pytest.mark.parametrize("names", [list("XYZ"), None])
    @pytest.mark.parametrize("skiprows", [1, 2, 3, 4, None])
    def test_read_csv_skiprows_names(self, names, skiprows):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer="modin/tests/pandas/data/issue_2239.csv",
            names=names,
            skiprows=skiprows,
        )

    def _has_pandas_fallback_reason(self):
        # The Python engine does not use custom IO dispatchers, so specialized error messages
        # won't appear
        return Engine.get() != "Python"

    def test_read_csv_default_to_pandas(self):
        if self._has_pandas_fallback_reason():
            warning_suffix = "buffers"
        else:
            warning_suffix = ""
        with warns_that_defaulting_to_pandas_if(
            not current_execution_is_native(), suffix=warning_suffix
        ):
            # This tests that we default to pandas on a buffer
            with open(pytest.csvs_names["test_read_csv_regular"], "r") as _f:
                pd.read_csv(StringIO(_f.read()))

    def test_read_csv_url(self):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer="https://raw.githubusercontent.com/modin-project/modin/main/modin/tests/pandas/data/blah.csv",
        )

    @pytest.mark.parametrize("nrows", [21, 5, None])
    @pytest.mark.parametrize("skiprows", [4, 1, 500, None])
    def test_read_csv_newlines_in_quotes(self, nrows, skiprows):
        expected_exception = None
        if skiprows == 500:
            expected_exception = pandas.errors.EmptyDataError(
                "No columns to parse from file"
            )
        eval_io(
            fn_name="read_csv",
            expected_exception=expected_exception,
            # read_csv kwargs
            filepath_or_buffer="modin/tests/pandas/data/newlines.csv",
            nrows=nrows,
            skiprows=skiprows,
            cast_to_str=True,
        )

    @pytest.mark.parametrize("skiprows", [None, 0, [], [1, 2], np.arange(0, 2)])
    def test_read_csv_skiprows_with_usecols(self, skiprows):
        usecols = {"float_data": "float64"}
        expected_exception = None
        if isinstance(skiprows, np.ndarray):
            expected_exception = ValueError(
                "Usecols do not match columns, columns expected but not found: ['float_data']"
            )
        eval_io(
            fn_name="read_csv",
            expected_exception=expected_exception,
            # read_csv kwargs
            filepath_or_buffer="modin/tests/pandas/data/issue_4543.csv",
            skiprows=skiprows,
            usecols=usecols.keys(),
            dtype=usecols,
        )

    def test_read_csv_sep_none(self):
        eval_io(
            fn_name="read_csv",
            modin_warning=ParserWarning,
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"],
            sep=None,
        )

    def test_read_csv_incorrect_data(self):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer="modin/tests/pandas/data/test_categories.json",
        )

    @pytest.mark.parametrize(
        "kwargs",
        [
            {"names": [5, 1, 3, 4, 2, 6]},
            {"names": [0]},
            {"names": None, "usecols": [1, 0, 2]},
            {"names": [3, 1, 2, 5], "usecols": [4, 1, 3, 2]},
        ],
    )
    def test_read_csv_names_neq_num_cols(self, kwargs):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer="modin/tests/pandas/data/issue_2074.csv",
            **kwargs,
        )

    def test_read_csv_wrong_path(self):
        expected_exception = FileNotFoundError(2, "No such file or directory")
        eval_io(
            fn_name="read_csv",
            expected_exception=expected_exception,
            # read_csv kwargs
            filepath_or_buffer="/some/wrong/path.csv",
        )

    @pytest.mark.parametrize("extension", [None, "csv", "csv.gz"])
    @pytest.mark.parametrize("sep", [" "])
    @pytest.mark.parametrize("header", [False, True, "sfx-"])
    @pytest.mark.parametrize("mode", ["w", "wb+"])
    @pytest.mark.parametrize("idx_name", [None, "Index"])
    @pytest.mark.parametrize("index", [True, False, "New index"])
    @pytest.mark.parametrize("index_label", [None, False, "New index"])
    @pytest.mark.parametrize("columns", [None, ["col1", "col3", "col5"]])
    @pytest.mark.exclude_in_sanity
    @pytest.mark.skipif(
        condition=Engine.get() == "Unidist" and os.name == "nt",
        reason="https://github.com/modin-project/modin/issues/6846",
    )
    def test_to_csv(
        self,
        tmp_path,
        extension,
        sep,
        header,
        mode,
        idx_name,
        index,
        index_label,
        columns,
    ):
        pandas_df = generate_dataframe(idx_name=idx_name)
        modin_df = pd.DataFrame(pandas_df)

        if isinstance(header, str):
            if columns is None:
                header = [f"{header}{c}" for c in modin_df.columns]
            else:
                header = [f"{header}{c}" for c in columns]

        eval_to_csv_file(
            tmp_path,
            modin_obj=modin_df,
            pandas_obj=pandas_df,
            extension=extension,
            sep=sep,
            header=header,
            mode=mode,
            index=index,
            index_label=index_label,
            columns=columns,
        )

    @pytest.mark.skipif(
        condition=Engine.get() == "Unidist" and os.name == "nt",
        reason="https://github.com/modin-project/modin/issues/6846",
    )
    def test_dataframe_to_csv(self, tmp_path):
        pandas_df = pandas.read_csv(pytest.csvs_names["test_read_csv_regular"])
        modin_df = pd.DataFrame(pandas_df)
        eval_to_csv_file(
            tmp_path,
            modin_obj=modin_df,
            pandas_obj=pandas_df,
            extension="csv",
        )

    @pytest.mark.skipif(
        condition=Engine.get() == "Unidist" and os.name == "nt",
        reason="https://github.com/modin-project/modin/issues/6846",
    )
    def test_series_to_csv(self, tmp_path):
        pandas_s = pandas.read_csv(
            pytest.csvs_names["test_read_csv_regular"], usecols=["col1"]
        ).squeeze()
        modin_s = pd.Series(pandas_s)
        eval_to_csv_file(
            tmp_path,
            modin_obj=modin_s,
            pandas_obj=pandas_s,
            extension="csv",
        )

    def test_read_csv_within_decorator(self):
        @dummy_decorator()
        def wrapped_read_csv(file, method):
            if method == "pandas":
                return pandas.read_csv(file)

            if method == "modin":
                return pd.read_csv(file)

        pandas_df = wrapped_read_csv(
            pytest.csvs_names["test_read_csv_regular"], method="pandas"
        )
        modin_df = wrapped_read_csv(
            pytest.csvs_names["test_read_csv_regular"], method="modin"
        )

        df_equals(modin_df, pandas_df)

    @pytest.mark.parametrize(
        "read_mode",
        [
            "r",
            "rb",
        ],
    )
    @pytest.mark.parametrize("buffer_start_pos", [0, 10])
    @pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True)
    def test_read_csv_file_handle(
        self, read_mode, make_csv_file, buffer_start_pos, set_async_read_mode
    ):
        unique_filename = make_csv_file()
        with open(unique_filename, mode=read_mode) as buffer:
            buffer.seek(buffer_start_pos)
            pandas_df = pandas.read_csv(buffer)
            buffer.seek(buffer_start_pos)
            modin_df = pd.read_csv(buffer)
        df_equals(modin_df, pandas_df)

    @pytest.mark.skipif(
        current_execution_is_native(),
        reason="no partitions",
    )
    def test_unnamed_index(self):
        def get_internal_df(df):
            partition = read_df._query_compiler._modin_frame._partitions[0][0]
            return partition.to_pandas()

        path = "modin/tests/pandas/data/issue_3119.csv"
        read_df = pd.read_csv(path, index_col=0)
        assert get_internal_df(read_df).index.name is None
        read_df = pd.read_csv(path, index_col=[0, 1])
        for name1, name2 in zip(get_internal_df(read_df).index.names, [None, "a"]):
            assert name1 == name2

    def test_read_csv_empty_frame(self):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"],
            usecols=["col1"],
            index_col="col1",
        )

    @pytest.mark.parametrize(
        "skiprows",
        [
            [x for x in range(10)],
            [x + 5 for x in range(15)],
            [x for x in range(10) if x % 2 == 0],
            [x + 5 for x in range(15) if x % 2 == 0],
            lambda x: x % 2,
            lambda x: x > 20,
            lambda x: x < 20,
            lambda x: True,
            lambda x: x in [10, 20],
            lambda x: x << 10,
        ],
    )
    @pytest.mark.parametrize("header", ["infer", None, 0, 1, 150])
    def test_read_csv_skiprows_corner_cases(self, skiprows, header):
        eval_io(
            fn_name="read_csv",
            check_kwargs_callable=not callable(skiprows),
            # read_csv kwargs
            filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"],
            skiprows=skiprows,
            header=header,
            dtype="str",  # to avoid issues with heterogeneous data
            # FIXME: https://github.com/modin-project/modin/issues/7035
            expected_exception=False,
        )

    def test_to_csv_with_index(self, tmp_path):
        cols = 100
        arows = 20000
        keyrange = 100
        values = np.vstack(
            [
                np.random.choice(keyrange, size=(arows)),
                np.random.normal(size=(cols, arows)),
            ]
        ).transpose()
        modin_df = pd.DataFrame(
            values,
            columns=["key"] + ["avalue" + str(i) for i in range(1, 1 + cols)],
        ).set_index("key")
        pandas_df = pandas.DataFrame(
            values,
            columns=["key"] + ["avalue" + str(i) for i in range(1, 1 + cols)],
        ).set_index("key")
        eval_to_csv_file(tmp_path, modin_df, pandas_df, "csv")

    @pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True)
    def test_read_csv_issue_5150(self, set_async_read_mode):
        with ensure_clean(".csv") as unique_filename:
            pandas_df = pandas.DataFrame(np.random.randint(0, 100, size=(2**6, 2**6)))
            pandas_df.to_csv(unique_filename, index=False)
            expected_pandas_df = pandas.read_csv(unique_filename, index_col=False)
            modin_df = pd.read_csv(unique_filename, index_col=False)
            actual_pandas_df = modin_df._to_pandas()
            if AsyncReadMode.get():
                # If read operations are asynchronous, then the dataframes
                # check should be inside `ensure_clean` context
                # because the file may be deleted before actual reading starts
                df_equals(expected_pandas_df, actual_pandas_df)
        if not AsyncReadMode.get():
            df_equals(expected_pandas_df, actual_pandas_df)

    @pytest.mark.parametrize("usecols", [None, [0, 1, 2, 3, 4]])
    def test_read_csv_1930(self, usecols):
        eval_io(
            fn_name="read_csv",
            # read_csv kwargs
            filepath_or_buffer="modin/tests/pandas/data/issue_1930.csv",
            names=["c1", "c2", "c3", "c4", "c5"],
            usecols=usecols,
        )


def _check_relative_io(fn_name, unique_filename, path_arg, storage_default=()):
    # Windows can be funny at where it searches for ~; besides, Python >= 3.8 no longer honors %HOME%
    dirname, basename = os.path.split(unique_filename)
    pinned_home = {envvar: dirname for envvar in ("HOME", "USERPROFILE", "HOMEPATH")}
    should_default = Engine.get() == "Python" or StorageFormat.get() in storage_default
    with mock.patch.dict(os.environ, pinned_home):
        with warns_that_defaulting_to_pandas_if(should_default):
            eval_io(
                fn_name=fn_name,
                **{path_arg: f"~/{basename}"},
            )
        # check that when read without $HOME patched we have equivalent results
        eval_general(
            f"~/{basename}",
            unique_filename,
            lambda fname: getattr(pandas, fn_name)(**{path_arg: fname}),
        )


# Leave this test apart from the test classes, which skip the default to pandas
# warning check. We want to make sure we are NOT defaulting to pandas for a
# path relative to user home.
# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
# commment once we turn all default to pandas messages into errors.
def test_read_csv_relative_to_user_home(make_csv_file):
    unique_filename = make_csv_file()
    _check_relative_io("read_csv", unique_filename, "filepath_or_buffer")


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestTable:
    def test_read_table(self, make_csv_file):
        unique_filename = make_csv_file(delimiter="\t")
        eval_io(
            fn_name="read_table",
            # read_table kwargs
            filepath_or_buffer=unique_filename,
        )

    @pytest.mark.parametrize("set_async_read_mode", [False, True], indirect=True)
    def test_read_table_within_decorator(self, make_csv_file, set_async_read_mode):
        @dummy_decorator()
        def wrapped_read_table(file, method):
            if method == "pandas":
                return pandas.read_table(file)

            if method == "modin":
                return pd.read_table(file)

        unique_filename = make_csv_file(delimiter="\t")

        pandas_df = wrapped_read_table(unique_filename, method="pandas")
        modin_df = wrapped_read_table(unique_filename, method="modin")

        df_equals(modin_df, pandas_df)

    def test_read_table_empty_frame(self, make_csv_file):
        unique_filename = make_csv_file(delimiter="\t")

        eval_io(
            fn_name="read_table",
            # read_table kwargs
            filepath_or_buffer=unique_filename,
            usecols=["col1"],
            index_col="col1",
        )


@pytest.mark.parametrize("engine", ["pyarrow", "fastparquet"])
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestParquet:
    @pytest.mark.parametrize("columns", [None, ["col1"]])
    @pytest.mark.parametrize("row_group_size", [None, 100, 1000, 10_000])
    @pytest.mark.parametrize("path_type", [Path, str])
    def test_read_parquet(
        self, engine, make_parquet_file, columns, row_group_size, path_type
    ):
        self._test_read_parquet(
            engine=engine,
            make_parquet_file=make_parquet_file,
            columns=columns,
            filters=None,
            row_group_size=row_group_size,
            path_type=path_type,
        )

    def _test_read_parquet(
        self,
        engine,
        make_parquet_file,
        columns,
        filters,
        row_group_size,
        path_type=str,
        range_index_start=0,
        range_index_step=1,
        range_index_name=None,
        expected_exception=None,
    ):
        if engine == "pyarrow" and filters == [] and os.name == "nt":
            # pyarrow, and therefore pandas using pyarrow, errors in this case.
            # Modin correctly replicates this behavior; however error cases
            # cause race conditions with ensure_clean on Windows.
            # TODO: Remove this once https://github.com/modin-project/modin/issues/6460 is fixed.
            pytest.xfail(
                "Skipping empty filters error case to avoid race condition - see #6460"
            )

        with ensure_clean(".parquet") as unique_filename:
            unique_filename = path_type(unique_filename)
            make_parquet_file(
                filename=unique_filename,
                row_group_size=row_group_size,
                range_index_start=range_index_start,
                range_index_step=range_index_step,
                range_index_name=range_index_name,
            )

            eval_io(
                fn_name="read_parquet",
                # read_parquet kwargs
                engine=engine,
                path=unique_filename,
                columns=columns,
                filters=filters,
                expected_exception=expected_exception,
            )

    @pytest.mark.parametrize(
        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
    )
    def test_read_parquet_dtype_backend(self, engine, make_parquet_file, dtype_backend):
        with ensure_clean(".parquet") as unique_filename:
            make_parquet_file(filename=unique_filename, row_group_size=100)

            def comparator(df1, df2):
                df_equals(df1, df2)
                df_equals(df1.dtypes, df2.dtypes)

            expected_exception = None
            if engine == "fastparquet":
                expected_exception = ValueError(
                    "The 'dtype_backend' argument is not supported for the fastparquet engine"
                )

            eval_io(
                fn_name="read_parquet",
                # read_parquet kwargs
                engine=engine,
                path=unique_filename,
                dtype_backend=dtype_backend,
                comparator=comparator,
                expected_exception=expected_exception,
            )

    # Tests issue #6778
    def test_read_parquet_no_extension(self, engine, make_parquet_file):
        with ensure_clean(".parquet") as unique_filename:
            # Remove the .parquet extension
            no_ext_fname = unique_filename[: unique_filename.index(".parquet")]

            make_parquet_file(filename=no_ext_fname)
            eval_io(
                fn_name="read_parquet",
                # read_parquet kwargs
                engine=engine,
                path=no_ext_fname,
            )

    @pytest.mark.parametrize(
        "filters",
        [None, [], [("col1", "==", 5)], [("col1", "<=", 215), ("col2", ">=", 35)]],
    )
    def test_read_parquet_filters(self, engine, make_parquet_file, filters):
        expected_exception = None
        if filters == [] and engine == "pyarrow":
            expected_exception = ValueError("Malformed filters")
        self._test_read_parquet(
            engine=engine,
            make_parquet_file=make_parquet_file,
            columns=None,
            filters=filters,
            row_group_size=100,
            path_type=str,
            expected_exception=expected_exception,
        )

    @pytest.mark.parametrize("columns", [None, ["col1"]])
    @pytest.mark.parametrize(
        "filters",
        [None, [("col1", "<=", 1_000_000)], [("col1", "<=", 75), ("col2", ">=", 35)]],
    )
    @pytest.mark.parametrize(
        "range_index_start",
        [0, 5_000],
    )
    @pytest.mark.parametrize(
        "range_index_step",
        [1, 10],
    )
    @pytest.mark.parametrize(
        "range_index_name",
        [None, "my_index"],
    )
    def test_read_parquet_range_index(
        self,
        engine,
        make_parquet_file,
        columns,
        filters,
        range_index_start,
        range_index_step,
        range_index_name,
    ):
        self._test_read_parquet(
            engine=engine,
            make_parquet_file=make_parquet_file,
            columns=columns,
            filters=filters,
            row_group_size=100,
            path_type=str,
            range_index_start=range_index_start,
            range_index_step=range_index_step,
            range_index_name=range_index_name,
        )

    def test_read_parquet_list_of_files_5698(self, engine, make_parquet_file):
        if engine == "fastparquet" and os.name == "nt":
            pytest.xfail(reason="https://github.com/pandas-dev/pandas/issues/51720")
        with ensure_clean(".parquet") as f1, ensure_clean(
            ".parquet"
        ) as f2, ensure_clean(".parquet") as f3:
            for f in [f1, f2, f3]:
                make_parquet_file(filename=f)
            eval_io(fn_name="read_parquet", path=[f1, f2, f3], engine=engine)

    def test_read_parquet_indexing_by_column(self, tmp_path, engine, make_parquet_file):
        # Test indexing into a column of Modin with various parquet file row lengths.
        # Specifically, tests for https://github.com/modin-project/modin/issues/3527
        # which fails when min_partition_size < nrows < min_partition_size * (num_partitions - 1)

        nrows = (
            MinRowPartitionSize.get() + 1
        )  # Use the minimal guaranteed failing value for nrows.
        unique_filename = get_unique_filename(extension="parquet", data_dir=tmp_path)
        make_parquet_file(filename=unique_filename, nrows=nrows)

        parquet_df = pd.read_parquet(unique_filename, engine=engine)
        for col in parquet_df.columns:
            parquet_df[col]

    @pytest.mark.parametrize("columns", [None, ["col1"]])
    @pytest.mark.parametrize(
        "filters",
        [None, [("col1", "<=", 3_215), ("col2", ">=", 35)]],
    )
    @pytest.mark.parametrize("row_group_size", [None, 100, 1000, 10_000])
    @pytest.mark.parametrize(
        "rows_per_file", [[1000] * 40, [0, 0, 40_000], [10_000, 10_000] + [100] * 200]
    )
    @pytest.mark.exclude_in_sanity
    def test_read_parquet_directory(
        self, engine, make_parquet_dir, columns, filters, row_group_size, rows_per_file
    ):
        self._test_read_parquet_directory(
            engine=engine,
            make_parquet_dir=make_parquet_dir,
            columns=columns,
            filters=filters,
            range_index_start=0,
            range_index_step=1,
            range_index_name=None,
            row_group_size=row_group_size,
            rows_per_file=rows_per_file,
        )

    def _test_read_parquet_directory(
        self,
        engine,
        make_parquet_dir,
        columns,
        filters,
        range_index_start,
        range_index_step,
        range_index_name,
        row_group_size,
        rows_per_file,
    ):
        num_cols = DATASET_SIZE_DICT.get(
            TestDatasetSize.get(), DATASET_SIZE_DICT["Small"]
        )
        dfs_by_filename = {}
        start_row = 0
        for i, length in enumerate(rows_per_file):
            end_row = start_row + length
            df = pandas.DataFrame(
                {f"col{x + 1}": np.arange(start_row, end_row) for x in range(num_cols)},
            )
            index = pandas.RangeIndex(
                start=range_index_start,
                stop=range_index_start + (length * range_index_step),
                step=range_index_step,
                name=range_index_name,
            )
            if (
                range_index_start == 0
                and range_index_step == 1
                and range_index_name is None
            ):
                assert df.index.equals(index)
            else:
                df.index = index

            dfs_by_filename[f"{i}.parquet"] = df
            start_row = end_row
        path = make_parquet_dir(dfs_by_filename, row_group_size)

        # There are specific files that PyArrow will try to ignore by default
        # in a parquet directory. One example are files that start with '_'. Our
        # previous implementation tried to read all files in a parquet directory,
        # but we now make use of PyArrow to ensure the directory is valid.
        with open(os.path.join(path, "_committed_file"), "w+") as f:
            f.write("testingtesting")

        eval_io(
            fn_name="read_parquet",
            # read_parquet kwargs
            engine=engine,
            path=path,
            columns=columns,
            filters=filters,
        )

    @pytest.mark.parametrize(
        "filters",
        [None, [("col1", "<=", 1_000_000)], [("col1", "<=", 75), ("col2", ">=", 35)]],
    )
    @pytest.mark.parametrize(
        "range_index_start",
        [0, 5_000],
    )
    @pytest.mark.parametrize(
        "range_index_step",
        [1, 10],
    )
    @pytest.mark.parametrize(
        "range_index_name",
        [None, "my_index"],
    )
    @pytest.mark.parametrize("row_group_size", [None, 20])
    def test_read_parquet_directory_range_index(
        self,
        engine,
        make_parquet_dir,
        filters,
        range_index_start,
        range_index_step,
        range_index_name,
        row_group_size,
    ):
        self._test_read_parquet_directory(
            engine=engine,
            make_parquet_dir=make_parquet_dir,
            columns=None,
            filters=filters,
            range_index_start=range_index_start,
            range_index_step=range_index_step,
            range_index_name=range_index_name,
            row_group_size=row_group_size,
            # We don't vary rows_per_file, but we choose a
            # tricky option: uneven with some empty files,
            # none divisible by the row_group_size.
            # We use a smaller total size than in other tests
            # to make this test run faster.
            rows_per_file=([250] + [0] * 10 + [25] * 10),
        )

    @pytest.mark.parametrize(
        "filters",
        [None, [("col1", "<=", 1_000_000)], [("col1", "<=", 75), ("col2", ">=", 35)]],
    )
    @pytest.mark.parametrize(
        "range_index_start",
        [0, 5_000],
    )
    @pytest.mark.parametrize(
        "range_index_step",
        [1, 10],
    )
    @pytest.mark.parametrize(
        "range_index_name",
        [None, "my_index"],
    )
    def test_read_parquet_directory_range_index_consistent_metadata(
        self,
        engine,
        filters,
        range_index_start,
        range_index_step,
        range_index_name,
        tmp_path,
    ):
        num_cols = DATASET_SIZE_DICT.get(
            TestDatasetSize.get(), DATASET_SIZE_DICT["Small"]
        )
        df = pandas.DataFrame(
            {f"col{x + 1}": np.arange(0, 500) for x in range(num_cols)},
        )
        index = pandas.RangeIndex(
            start=range_index_start,
            stop=range_index_start + (len(df) * range_index_step),
            step=range_index_step,
            name=range_index_name,
        )
        if (
            range_index_start == 0
            and range_index_step == 1
            and range_index_name is None
        ):
            assert df.index.equals(index)
        else:
            df.index = index

        path = get_unique_filename(extension=None, data_dir=tmp_path)

        table = pa.Table.from_pandas(df)
        pyarrow.dataset.write_dataset(
            table,
            path,
            format="parquet",
            max_rows_per_group=35,
            max_rows_per_file=100,
        )

        # There are specific files that PyArrow will try to ignore by default
        # in a parquet directory. One example are files that start with '_'. Our
        # previous implementation tried to read all files in a parquet directory,
        # but we now make use of PyArrow to ensure the directory is valid.
        with open(os.path.join(path, "_committed_file"), "w+") as f:
            f.write("testingtesting")

        eval_io(
            fn_name="read_parquet",
            # read_parquet kwargs
            engine=engine,
            path=path,
            filters=filters,
        )

    @pytest.mark.parametrize("columns", [None, ["col1"]])
    @pytest.mark.parametrize(
        "filters",
        [None, [], [("col1", "==", 5)], [("col1", "<=", 215), ("col2", ">=", 35)]],
    )
    @pytest.mark.parametrize(
        "range_index_start",
        [0, 5_000],
    )
    @pytest.mark.parametrize(
        "range_index_step",
        [1, 10],
    )
    def test_read_parquet_partitioned_directory(
        self,
        tmp_path,
        make_parquet_file,
        columns,
        filters,
        range_index_start,
        range_index_step,
        engine,
    ):
        unique_filename = get_unique_filename(extension=None, data_dir=tmp_path)
        make_parquet_file(
            filename=unique_filename,
            partitioned_columns=["col1"],
            range_index_start=range_index_start,
            range_index_step=range_index_step,
            range_index_name="my_index",
        )

        expected_exception = None
        if filters == [] and engine == "pyarrow":
            expected_exception = ValueError("Malformed filters")
        eval_io(
            fn_name="read_parquet",
            # read_parquet kwargs
            engine=engine,
            path=unique_filename,
            columns=columns,
            filters=filters,
            expected_exception=expected_exception,
        )

    @pytest.mark.parametrize(
        "filters",
        [
            None,
            [],
            [("B", "==", "a")],
            [
                ("B", "==", "a"),
                ("A", ">=", 50_000),
                ("idx", "<=", 30_000),
                ("idx_categorical", "==", "y"),
            ],
        ],
    )
    def test_read_parquet_pandas_index(self, engine, filters):
        if (
            version.parse(pa.__version__) >= version.parse("12.0.0")
            and version.parse(pd.__version__) < version.parse("2.0.0")
            and engine == "pyarrow"
        ):
            pytest.xfail("incompatible versions; see #6072")
        # Ensure modin can read parquet files written by pandas with a non-RangeIndex object
        pandas_df = pandas.DataFrame(
            {
                "idx": np.random.randint(0, 100_000, size=2000),
                "idx_categorical": pandas.Categorical(["y", "z"] * 1000),
                # Can't do interval index right now because of this bug fix that is planned
                # to be apart of the pandas 1.5.0 release: https://github.com/pandas-dev/pandas/pull/46034
                # "idx_interval": pandas.interval_range(start=0, end=2000),
                "idx_periodrange": pandas.period_range(
                    start="2017-01-01", periods=2000
                ),
                "A": np.random.randint(0, 100_000, size=2000),
                "B": ["a", "b"] * 1000,
                "C": ["c"] * 2000,
            }
        )
        # Older versions of pyarrow do not support Arrow to Parquet
        # schema conversion for duration[ns]
        # https://issues.apache.org/jira/browse/ARROW-6780
        if version.parse(pa.__version__) >= version.parse("8.0.0"):
            pandas_df["idx_timedelta"] = pandas.timedelta_range(
                start="1 day", periods=2000
            )

        # There is a non-deterministic bug in the fastparquet engine when we
        # try to set the index to the datetime column. Please see:
        # https://github.com/dask/fastparquet/issues/796
        if engine == "pyarrow":
            pandas_df["idx_datetime"] = pandas.date_range(
                start="1/1/2018", periods=2000
            )

        for col in pandas_df.columns:
            if col.startswith("idx"):
                # Before this commit, first released in version 2023.1.0, fastparquet relied
                # on pandas private APIs to handle Categorical indices.
                # These private APIs broke in pandas 2.
                # https://github.com/dask/fastparquet/commit/cf60ae0e9a9ca57afc7a8da98d8c0423db1c0c53
                if (
                    col == "idx_categorical"
                    and engine == "fastparquet"
                    and version.parse(fastparquet.__version__)
                    < version.parse("2023.1.0")
                ):
                    continue

                with ensure_clean(".parquet") as unique_filename:
                    pandas_df.set_index(col).to_parquet(unique_filename)
                    # read the same parquet using modin.pandas
                    eval_io(
                        "read_parquet",
                        # read_parquet kwargs
                        path=unique_filename,
                        engine=engine,
                        filters=filters,
                    )

        with ensure_clean(".parquet") as unique_filename:
            pandas_df.set_index(["idx", "A"]).to_parquet(unique_filename)
            eval_io(
                "read_parquet",
                # read_parquet kwargs
                path=unique_filename,
                engine=engine,
                filters=filters,
            )

    @pytest.mark.parametrize(
        "filters",
        [
            None,
            [],
            [("B", "==", "a")],
            [("B", "==", "a"), ("A", ">=", 5), ("idx", "<=", 30_000)],
        ],
    )
    def test_read_parquet_pandas_index_partitioned(self, tmp_path, engine, filters):
        # Ensure modin can read parquet files written by pandas with a non-RangeIndex object
        pandas_df = pandas.DataFrame(
            {
                "idx": np.random.randint(0, 100_000, size=2000),
                "A": np.random.randint(0, 10, size=2000),
                "B": ["a", "b"] * 1000,
                "C": ["c"] * 2000,
            }
        )
        unique_filename = get_unique_filename(extension="parquet", data_dir=tmp_path)
        pandas_df.set_index("idx").to_parquet(unique_filename, partition_cols=["A"])
        expected_exception = None
        if filters == [] and engine == "pyarrow":
            expected_exception = ValueError("Malformed filters")
        # read the same parquet using modin.pandas
        eval_io(
            "read_parquet",
            # read_parquet kwargs
            path=unique_filename,
            engine=engine,
            filters=filters,
            expected_exception=expected_exception,
        )

    def test_read_parquet_hdfs(self, engine):
        eval_io(
            fn_name="read_parquet",
            # read_parquet kwargs
            path="modin/tests/pandas/data/hdfs.parquet",
            engine=engine,
        )

    @pytest.mark.parametrize(
        "path_type",
        ["object", "directory", "url"],
    )
    def test_read_parquet_s3(self, s3_resource, path_type, engine, s3_storage_options):
        s3_path = "s3://modin-test/modin-bugs/test_data.parquet"
        if path_type == "object":
            import s3fs

            fs = s3fs.S3FileSystem(
                endpoint_url=s3_storage_options["client_kwargs"]["endpoint_url"]
            )
            with fs.open(s3_path, "rb") as file_obj:
                eval_io("read_parquet", path=file_obj, engine=engine)
        elif path_type == "directory":
            s3_path = "s3://modin-test/modin-bugs/test_data_dir.parquet"
            eval_io(
                "read_parquet",
                path=s3_path,
                storage_options=s3_storage_options,
                engine=engine,
            )
        else:
            eval_io(
                "read_parquet",
                path=s3_path,
                storage_options=s3_storage_options,
                engine=engine,
            )

    @pytest.mark.parametrize(
        "filters",
        [None, [], [("idx", "<=", 30_000)], [("idx", "<=", 30_000), ("A", ">=", 5)]],
    )
    def test_read_parquet_without_metadata(self, tmp_path, engine, filters):
        """Test that Modin can read parquet files not written by pandas."""
        from pyarrow import csv, parquet

        parquet_fname = get_unique_filename(extension="parquet", data_dir=tmp_path)
        csv_fname = get_unique_filename(extension="parquet", data_dir=tmp_path)
        pandas_df = pandas.DataFrame(
            {
                "idx": np.random.randint(0, 100_000, size=2000),
                "A": np.random.randint(0, 10, size=2000),
                "B": ["a", "b"] * 1000,
                "C": ["c"] * 2000,
            }
        )
        pandas_df.to_csv(csv_fname, index=False)
        # read into pyarrow table and write it to a parquet file
        t = csv.read_csv(csv_fname)
        parquet.write_table(t, parquet_fname)

        expected_exception = None
        if filters == [] and engine == "pyarrow":
            expected_exception = ValueError("Malformed filters")
        eval_io(
            "read_parquet",
            # read_parquet kwargs
            path=parquet_fname,
            engine=engine,
            filters=filters,
            expected_exception=expected_exception,
        )

    def test_read_empty_parquet_file(self, tmp_path, engine):
        test_df = pandas.DataFrame()
        path = tmp_path / "data"
        path.mkdir()
        test_df.to_parquet(path / "part-00000.parquet", engine=engine)
        eval_io(fn_name="read_parquet", path=path, engine=engine)

    @pytest.mark.parametrize(
        "compression_kwargs",
        [
            pytest.param({}, id="no_compression_kwargs"),
            pytest.param({"compression": None}, id="compression=None"),
            pytest.param({"compression": "gzip"}, id="compression=gzip"),
            pytest.param({"compression": "snappy"}, id="compression=snappy"),
            pytest.param({"compression": "brotli"}, id="compression=brotli"),
        ],
    )
    @pytest.mark.parametrize("extension", ["parquet", ".gz", ".bz2", ".zip", ".xz"])
    def test_to_parquet(self, tmp_path, engine, compression_kwargs, extension):
        modin_df, pandas_df = create_test_dfs(TEST_DATA)
        parquet_eval_to_file(
            tmp_path,
            modin_obj=modin_df,
            pandas_obj=pandas_df,
            fn="to_parquet",
            extension=extension,
            engine=engine,
            **compression_kwargs,
        )

    def test_to_parquet_keep_index(self, tmp_path, engine):
        data = {"c0": [0, 1] * 1000, "c1": [2, 3] * 1000}
        modin_df, pandas_df = create_test_dfs(data)
        modin_df.index.name = "foo"
        pandas_df.index.name = "foo"

        parquet_eval_to_file(
            tmp_path,
            modin_obj=modin_df,
            pandas_obj=pandas_df,
            fn="to_parquet",
            extension="parquet",
            index=True,
            engine=engine,
        )

    def test_to_parquet_s3(self, s3_resource, engine, s3_storage_options):
        # use utils_test_data because it spans multiple partitions
        modin_path = "s3://modin-test/modin-dir/modin_df.parquet"
        mdf, pdf = create_test_dfs(utils_test_data["int_data"])
        pdf.to_parquet(
            "s3://modin-test/pandas-dir/pandas_df.parquet",
            engine=engine,
            storage_options=s3_storage_options,
        )
        mdf.to_parquet(modin_path, engine=engine, storage_options=s3_storage_options)
        df_equals(
            pandas.read_parquet(
                "s3://modin-test/pandas-dir/pandas_df.parquet",
                storage_options=s3_storage_options,
            ),
            pd.read_parquet(modin_path, storage_options=s3_storage_options),
        )
        # check we're not creating local file:
        # https://github.com/modin-project/modin/issues/5888
        assert not os.path.isdir(modin_path)

    def test_read_parquet_2462(self, tmp_path, engine):
        test_df = pandas.DataFrame({"col1": [["ad_1", "ad_2"], ["ad_3"]]})
        path = tmp_path / "data"
        path.mkdir()
        test_df.to_parquet(path / "part-00000.parquet", engine=engine)
        read_df = pd.read_parquet(path, engine=engine)
        df_equals(test_df, read_df)

    def test_read_parquet_5767(self, tmp_path, engine):
        test_df = pandas.DataFrame({"a": [1, 2, 3, 4], "b": [1, 1, 2, 2]})
        path = tmp_path / "data"
        path.mkdir()
        file_name = "modin_issue#0000.parquet"
        test_df.to_parquet(path / file_name, engine=engine, partition_cols=["b"])
        read_df = pd.read_parquet(path / file_name)
        # both Modin and pandas read column "b" as a category
        df_equals(test_df, read_df.astype("int64"))

    @pytest.mark.parametrize("index", [False, True])
    def test_read_parquet_6855(self, tmp_path, engine, index):
        if engine == "fastparquet":
            pytest.skip("integer columns aren't supported")
        test_df = pandas.DataFrame(np.random.rand(10**2, 10))
        path = tmp_path / "data"
        path.mkdir()
        file_name = "issue6855.parquet"
        test_df.to_parquet(path / file_name, index=index, engine=engine)
        read_df = pd.read_parquet(path / file_name, engine=engine)
        if not index:
            # In that case pyarrow cannot preserve index dtype
            read_df.columns = pandas.Index(read_df.columns).astype("int64").to_list()
        df_equals(test_df, read_df)

    def test_read_parquet_s3_with_column_partitioning(
        self, s3_resource, engine, s3_storage_options
    ):
        # https://github.com/modin-project/modin/issues/4636
        s3_path = "s3://modin-test/modin-bugs/issue5159.parquet"
        eval_io(
            fn_name="read_parquet",
            path=s3_path,
            engine=engine,
            storage_options=s3_storage_options,
        )


# Leave this test apart from the test classes, which skip the default to pandas
# warning check. We want to make sure we are NOT defaulting to pandas for a
# path relative to user home.
# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
# commment once we turn all default to pandas messages into errors.
def test_read_parquet_relative_to_user_home(make_parquet_file):
    with ensure_clean(".parquet") as unique_filename:
        make_parquet_file(filename=unique_filename)
        _check_relative_io("read_parquet", unique_filename, "path")


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestJson:
    @pytest.mark.parametrize("pathlike", [False, True])
    @pytest.mark.parametrize("lines", [False, True])
    def test_read_json(self, make_json_file, lines, pathlike):
        unique_filename = make_json_file(lines=lines)
        eval_io(
            fn_name="read_json",
            # read_json kwargs
            path_or_buf=Path(unique_filename) if pathlike else unique_filename,
            lines=lines,
        )

    @pytest.mark.parametrize(
        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
    )
    def test_read_json_dtype_backend(self, make_json_file, dtype_backend):
        def comparator(df1, df2):
            df_equals(df1, df2)
            df_equals(df1.dtypes, df2.dtypes)

        eval_io(
            fn_name="read_json",
            # read_json kwargs
            path_or_buf=make_json_file(lines=True),
            lines=True,
            dtype_backend=dtype_backend,
            comparator=comparator,
        )

    @pytest.mark.parametrize(
        "storage_options_extra",
        [{"anon": False}, {"anon": True}, {"key": "123", "secret": "123"}],
    )
    def test_read_json_s3(self, s3_resource, s3_storage_options, storage_options_extra):
        s3_path = "s3://modin-test/modin-bugs/test_data.json"
        expected_exception = None
        if "anon" in storage_options_extra:
            expected_exception = PermissionError("Forbidden")
        eval_io(
            fn_name="read_json",
            path_or_buf=s3_path,
            lines=True,
            orient="records",
            storage_options=s3_storage_options | storage_options_extra,
            expected_exception=expected_exception,
        )

    def test_read_json_categories(self):
        eval_io(
            fn_name="read_json",
            # read_json kwargs
            path_or_buf="modin/tests/pandas/data/test_categories.json",
            dtype={"one": "int64", "two": "category"},
        )

    def test_read_json_different_columns(self):
        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            eval_io(
                fn_name="read_json",
                # read_json kwargs
                path_or_buf="modin/tests/pandas/data/test_different_columns_in_rows.json",
                lines=True,
            )

    @pytest.mark.parametrize(
        "data",
        [json_short_string, json_short_bytes, json_long_string, json_long_bytes],
    )
    def test_read_json_string_bytes(self, data):
        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            modin_df = pd.read_json(data)
        # For I/O objects we need to rewind to reuse the same object.
        if hasattr(data, "seek"):
            data.seek(0)
        df_equals(modin_df, pandas.read_json(data))

    def test_to_json(self, tmp_path):
        modin_df, pandas_df = create_test_dfs(TEST_DATA)
        eval_to_file(
            tmp_path,
            modin_obj=modin_df,
            pandas_obj=pandas_df,
            fn="to_json",
            extension="json",
        )

    @pytest.mark.parametrize(
        "read_mode",
        [
            "r",
            "rb",
        ],
    )
    def test_read_json_file_handle(self, make_json_file, read_mode):
        with open(make_json_file(), mode=read_mode) as buf:
            df_pandas = pandas.read_json(buf)
            buf.seek(0)
            df_modin = pd.read_json(buf)
            df_equals(df_pandas, df_modin)

    @pytest.mark.skipif(
        current_execution_is_native(),
        reason="no partitions",
    )
    def test_read_json_metadata(self, make_json_file):
        # `lines=True` is for triggering Modin implementation,
        # `orient="records"` should be set if `lines=True`
        df = pd.read_json(
            make_json_file(ncols=80, lines=True), lines=True, orient="records"
        )
        parts_width_cached = df._query_compiler._modin_frame._column_widths_cache
        num_splits = len(df._query_compiler._modin_frame._partitions[0])
        parts_width_actual = [
            len(df._query_compiler._modin_frame._partitions[0][i].get().columns)
            for i in range(num_splits)
        ]

        assert parts_width_cached == parts_width_actual


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestExcel:
    @check_file_leaks
    @pytest.mark.parametrize("pathlike", [False, True])
    def test_read_excel(self, pathlike, make_excel_file):
        unique_filename = make_excel_file()
        eval_io(
            fn_name="read_excel",
            # read_excel kwargs
            io=Path(unique_filename) if pathlike else unique_filename,
        )

    @check_file_leaks
    @pytest.mark.parametrize("skiprows", [2, [1, 3], lambda x: x in [0, 2]])
    def test_read_excel_skiprows(self, skiprows, make_excel_file):
        eval_io(
            fn_name="read_excel",
            # read_excel kwargs
            io=make_excel_file(),
            skiprows=skiprows,
            check_kwargs_callable=False,
        )

    @check_file_leaks
    @pytest.mark.parametrize(
        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
    )
    def test_read_excel_dtype_backend(self, make_excel_file, dtype_backend):
        def comparator(df1, df2):
            df_equals(df1, df2)
            df_equals(df1.dtypes, df2.dtypes)

        eval_io(
            fn_name="read_excel",
            # read_csv kwargs
            io=make_excel_file(),
            dtype_backend=dtype_backend,
            comparator=comparator,
        )

    @check_file_leaks
    def test_read_excel_engine(self, make_excel_file):
        eval_io(
            fn_name="read_excel",
            modin_warning=(UserWarning if StorageFormat.get() == "Pandas" else None),
            # read_excel kwargs
            io=make_excel_file(),
            engine="openpyxl",
        )

    @check_file_leaks
    def test_read_excel_index_col(self, make_excel_file):
        eval_io(
            fn_name="read_excel",
            modin_warning=(UserWarning if StorageFormat.get() == "Pandas" else None),
            # read_excel kwargs
            io=make_excel_file(),
            index_col=0,
        )

    @check_file_leaks
    def test_read_excel_all_sheets(self, make_excel_file):
        unique_filename = make_excel_file()

        pandas_df = pandas.read_excel(unique_filename, sheet_name=None)
        modin_df = pd.read_excel(unique_filename, sheet_name=None)

        assert isinstance(pandas_df, dict)
        assert isinstance(modin_df, type(pandas_df))
        assert pandas_df.keys() == modin_df.keys()

        for key in pandas_df.keys():
            df_equals(modin_df.get(key), pandas_df.get(key))

    # TODO: Check pandas gh-#39250 as it was fixed
    @pytest.mark.xfail(
        (StorageFormat.get() == "Pandas" and Engine.get() != "Python"),
        reason="pandas throws the exception. See pandas issue #39250 for more info",
    )
    @check_file_leaks
    def test_read_excel_sheetname_title(self):
        eval_io(
            fn_name="read_excel",
            # read_excel kwargs
            io="modin/tests/pandas/data/excel_sheetname_title.xlsx",
            # FIXME: https://github.com/modin-project/modin/issues/7036
            expected_exception=False,
        )

    @check_file_leaks
    def test_excel_empty_line(self):
        path = "modin/tests/pandas/data/test_emptyline.xlsx"
        modin_df = pd.read_excel(path)
        assert str(modin_df)

    @check_file_leaks
    def test_read_excel_empty_rows(self):
        # Test parsing empty rows in middle of excel dataframe as NaN values
        eval_io(
            fn_name="read_excel",
            io="modin/tests/pandas/data/test_empty_rows.xlsx",
        )

    @check_file_leaks
    def test_read_excel_border_rows(self):
        # Test parsing border rows as NaN values in excel dataframe
        eval_io(
            fn_name="read_excel",
            io="modin/tests/pandas/data/test_border_rows.xlsx",
        )

    @check_file_leaks
    def test_read_excel_every_other_nan(self):
        # Test for reading excel dataframe with every other row as a NaN value
        eval_io(
            fn_name="read_excel",
            io="modin/tests/pandas/data/every_other_row_nan.xlsx",
        )

    @check_file_leaks
    def test_read_excel_header_none(self):
        eval_io(
            fn_name="read_excel",
            io="modin/tests/pandas/data/every_other_row_nan.xlsx",
            header=None,
        )

    @pytest.mark.parametrize(
        "sheet_name",
        [
            "Sheet1",
            "AnotherSpecialName",
            "SpecialName",
            "SecondSpecialName",
            0,
            1,
            2,
            3,
        ],
    )
    @check_file_leaks
    def test_read_excel_sheet_name(self, sheet_name):
        eval_io(
            fn_name="read_excel",
            # read_excel kwargs
            io="modin/tests/pandas/data/modin_error_book.xlsx",
            sheet_name=sheet_name,
            # https://github.com/modin-project/modin/issues/5965
            comparator_kwargs={"check_dtypes": False},
        )

    def test_ExcelFile(self, make_excel_file):
        unique_filename = make_excel_file()

        modin_excel_file = pd.ExcelFile(unique_filename)
        pandas_excel_file = pandas.ExcelFile(unique_filename)

        try:
            df_equals(modin_excel_file.parse(), pandas_excel_file.parse())
            assert modin_excel_file.io == unique_filename
        finally:
            modin_excel_file.close()
            pandas_excel_file.close()

    def test_ExcelFile_bytes(self, make_excel_file):
        unique_filename = make_excel_file()
        with open(unique_filename, mode="rb") as f:
            content = f.read()

        modin_excel_file = pd.ExcelFile(content)
        pandas_excel_file = pandas.ExcelFile(content)

        df_equals(modin_excel_file.parse(), pandas_excel_file.parse())

    def test_read_excel_ExcelFile(self, make_excel_file):
        unique_filename = make_excel_file()
        with open(unique_filename, mode="rb") as f:
            content = f.read()

        modin_excel_file = pd.ExcelFile(content)
        pandas_excel_file = pandas.ExcelFile(content)

        df_equals(pd.read_excel(modin_excel_file), pandas.read_excel(pandas_excel_file))

    @pytest.mark.parametrize("use_bytes_io", [False, True])
    def test_read_excel_bytes(self, use_bytes_io, make_excel_file):
        unique_filename = make_excel_file()
        with open(unique_filename, mode="rb") as f:
            io_bytes = f.read()

        if use_bytes_io:
            io_bytes = BytesIO(io_bytes)

        eval_io(
            fn_name="read_excel",
            # read_excel kwargs
            io=io_bytes,
        )

    def test_read_excel_file_handle(self, make_excel_file):
        unique_filename = make_excel_file()
        with open(unique_filename, mode="rb") as f:
            eval_io(
                fn_name="read_excel",
                # read_excel kwargs
                io=f,
            )

    @pytest.mark.xfail(strict=False, reason="Flaky test, defaults to pandas")
    def test_to_excel(self, tmp_path):
        modin_df, pandas_df = create_test_dfs(TEST_DATA)

        unique_filename_modin = get_unique_filename(extension="xlsx", data_dir=tmp_path)
        unique_filename_pandas = get_unique_filename(
            extension="xlsx", data_dir=tmp_path
        )

        modin_writer = pandas.ExcelWriter(unique_filename_modin)
        pandas_writer = pandas.ExcelWriter(unique_filename_pandas)

        modin_df.to_excel(modin_writer)
        pandas_df.to_excel(pandas_writer)

        modin_writer.save()
        pandas_writer.save()

        assert assert_files_eq(unique_filename_modin, unique_filename_pandas)

    @check_file_leaks
    def test_read_excel_empty_frame(self, make_excel_file):
        eval_io(
            fn_name="read_excel",
            modin_warning=(UserWarning if StorageFormat.get() == "Pandas" else None),
            # read_excel kwargs
            io=make_excel_file(),
            usecols=[0],
            index_col=0,
        )


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestHdf:
    @pytest.mark.parametrize("format", [None, "table"])
    def test_read_hdf(self, make_hdf_file, format):
        eval_io(
            fn_name="read_hdf",
            # read_hdf kwargs
            path_or_buf=make_hdf_file(format=format),
            key="df",
        )

    def test_HDFStore(self, tmp_path):
        unique_filename_modin = get_unique_filename(extension="hdf", data_dir=tmp_path)
        unique_filename_pandas = get_unique_filename(extension="hdf", data_dir=tmp_path)

        modin_store = pd.HDFStore(unique_filename_modin)
        pandas_store = pandas.HDFStore(unique_filename_pandas)

        modin_df, pandas_df = create_test_dfs(TEST_DATA)

        modin_store["foo"] = modin_df
        pandas_store["foo"] = pandas_df

        modin_df = modin_store.get("foo")
        pandas_df = pandas_store.get("foo")
        df_equals(modin_df, pandas_df)

        modin_store.close()
        pandas_store.close()
        modin_df = pandas.read_hdf(unique_filename_modin, key="foo", mode="r")
        pandas_df = pandas.read_hdf(unique_filename_pandas, key="foo", mode="r")
        df_equals(modin_df, pandas_df)
        assert isinstance(modin_store, pd.HDFStore)

        with ensure_clean(".hdf5") as hdf_file:
            with pd.HDFStore(hdf_file, mode="w") as store:
                store.append("data/df1", pd.DataFrame(np.random.randn(5, 5)))
                store.append("data/df2", pd.DataFrame(np.random.randn(4, 4)))

            modin_df = pd.read_hdf(hdf_file, key="data/df1", mode="r")
            pandas_df = pandas.read_hdf(hdf_file, key="data/df1", mode="r")
        df_equals(modin_df, pandas_df)

    def test_HDFStore_in_read_hdf(self):
        with ensure_clean(".hdf") as filename:
            dfin = pd.DataFrame(np.random.rand(8, 8))
            dfin.to_hdf(filename, "/key")

            with pd.HDFStore(filename) as h:
                modin_df = pd.read_hdf(h, "/key")
            with pandas.HDFStore(filename) as h:
                pandas_df = pandas.read_hdf(h, "/key")
        df_equals(modin_df, pandas_df)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestSql:
    @pytest.mark.parametrize("read_sql_engine", ["Pandas", "Connectorx"])
    def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):
        filename = get_unique_filename(".db")
        table = "test_read_sql"
        conn = make_sql_connection(tmp_path / filename, table)
        query = f"select * from {table}"

        eval_io(
            fn_name="read_sql",
            # read_sql kwargs
            sql=query,
            con=conn,
        )

        eval_io(
            fn_name="read_sql",
            # read_sql kwargs
            sql=query,
            con=conn,
            index_col="index",
        )

        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            pd.read_sql_query(query, conn)

        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
            pd.read_sql_table(table, conn)

        # Test SQLAlchemy engine
        sqlalchemy_engine = sa.create_engine(conn)
        eval_io(
            fn_name="read_sql",
            # read_sql kwargs
            sql=query,
            con=sqlalchemy_engine,
        )

        # Test SQLAlchemy Connection
        sqlalchemy_connection = sqlalchemy_engine.connect()
        eval_io(
            fn_name="read_sql",
            # read_sql kwargs
            sql=query,
            con=sqlalchemy_connection,
        )

        old_sql_engine = ReadSqlEngine.get()
        ReadSqlEngine.put(read_sql_engine)
        if ReadSqlEngine.get() == "Connectorx":
            modin_df = pd.read_sql(sql=query, con=conn)
        else:
            modin_df = pd.read_sql(
                sql=query, con=ModinDatabaseConnection("sqlalchemy", conn)
            )
        ReadSqlEngine.put(old_sql_engine)
        pandas_df = pandas.read_sql(sql=query, con=sqlalchemy_connection)
        df_equals(modin_df, pandas_df)

    @pytest.mark.parametrize(
        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
    )
    def test_read_sql_dtype_backend(self, tmp_path, make_sql_connection, dtype_backend):
        filename = get_unique_filename(extension="db")

        table = "test_read_sql_dtype_backend"
        conn = make_sql_connection(tmp_path / filename, table)
        query = f"select * from {table}"

        def comparator(df1, df2):
            df_equals(df1, df2)
            df_equals(df1.dtypes, df2.dtypes)

        eval_io(
            fn_name="read_sql",
            # read_sql kwargs
            sql=query,
            con=conn,
            dtype_backend=dtype_backend,
            comparator=comparator,
        )

    @pytest.mark.skipif(
        not TestReadFromSqlServer.get(),
        reason="Skip the test when the test SQL server is not set up.",
    )
    def test_read_sql_from_sql_server(self):
        table_name = "test_1000x256"
        query = f"SELECT * FROM {table_name}"
        sqlalchemy_connection_string = (
            "mssql+pymssql://sa:Strong.Pwd-123@0.0.0.0:1433/master"
        )
        pandas_df_to_read = pandas.DataFrame(
            np.arange(
                1000 * 256,
            ).reshape(1000, 256)
        ).add_prefix("col")
        pandas_df_to_read.to_sql(
            table_name, sqlalchemy_connection_string, if_exists="replace"
        )
        modin_df = pd.read_sql(
            query,
            ModinDatabaseConnection("sqlalchemy", sqlalchemy_connection_string),
        )
        pandas_df = pandas.read_sql(query, sqlalchemy_connection_string)
        df_equals(modin_df, pandas_df)

    @pytest.mark.skipif(
        not TestReadFromPostgres.get(),
        reason="Skip the test when the postgres server is not set up.",
    )
    def test_read_sql_from_postgres(self):
        table_name = "test_1000x256"
        query = f"SELECT * FROM {table_name}"
        connection = "postgresql://sa:Strong.Pwd-123@localhost:2345/postgres"
        pandas_df_to_read = pandas.DataFrame(
            np.arange(
                1000 * 256,
            ).reshape(1000, 256)
        ).add_prefix("col")
        pandas_df_to_read.to_sql(table_name, connection, if_exists="replace")
        modin_df = pd.read_sql(
            query,
            ModinDatabaseConnection("psycopg2", connection),
        )
        pandas_df = pandas.read_sql(query, connection)
        df_equals(modin_df, pandas_df)

    def test_invalid_modin_database_connections(self):
        with pytest.raises(UnsupportedDatabaseException):
            ModinDatabaseConnection("unsupported_database")

    def test_read_sql_with_chunksize(self, make_sql_connection):
        filename = get_unique_filename(extension="db")
        table = "test_read_sql_with_chunksize"
        conn = make_sql_connection(filename, table)
        query = f"select * from {table}"

        pandas_gen = pandas.read_sql(query, conn, chunksize=10)
        modin_gen = pd.read_sql(query, conn, chunksize=10)
        for modin_df, pandas_df in zip(modin_gen, pandas_gen):
            df_equals(modin_df, pandas_df)

    @pytest.mark.parametrize("index", [False, True])
    @pytest.mark.parametrize("conn_type", ["str", "sqlalchemy", "sqlalchemy+connect"])
    def test_to_sql(self, tmp_path, make_sql_connection, index, conn_type):
        table_name = f"test_to_sql_{str(index)}"
        modin_df, pandas_df = create_test_dfs(TEST_DATA)

        # We do not pass the table name so the fixture won't generate a table
        conn = make_sql_connection(tmp_path / f"{table_name}_modin.db")
        if conn_type.startswith("sqlalchemy"):
            conn = sa.create_engine(conn)
            if conn_type == "sqlalchemy+connect":
                conn = conn.connect()
        modin_df.to_sql(table_name, conn, index=index)
        df_modin_sql = pandas.read_sql(
            table_name, con=conn, index_col="index" if index else None
        )

        # We do not pass the table name so the fixture won't generate a table
        conn = make_sql_connection(tmp_path / f"{table_name}_pandas.db")
        if conn_type.startswith("sqlalchemy"):
            conn = sa.create_engine(conn)
            if conn_type == "sqlalchemy+connect":
                conn = conn.connect()
        pandas_df.to_sql(table_name, conn, index=index)
        df_pandas_sql = pandas.read_sql(
            table_name, con=conn, index_col="index" if index else None
        )

        assert df_modin_sql.sort_index().equals(df_pandas_sql.sort_index())


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestHtml:
    @pytest.mark.skipif(
        platform.system() == "Windows",
        reason="https://github.com/modin-project/modin/issues/7497",
    )
    def test_read_html(self, make_html_file):
        eval_io(fn_name="read_html", io=make_html_file())

    def test_to_html(self, tmp_path):
        modin_df, pandas_df = create_test_dfs(TEST_DATA)

        eval_to_file(
            tmp_path,
            modin_obj=modin_df,
            pandas_obj=pandas_df,
            fn="to_html",
            extension="html",
        )


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestFwf:
    @pytest.mark.parametrize("pathlike", [False, True])
    def test_fwf_file(self, make_fwf_file, pathlike):
        fwf_data = (
            "id8141  360.242940  149.910199 11950.7\n"
            + "id1594  444.953632  166.985655 11788.4\n"
            + "id1849  364.136849  183.628767 11806.2\n"
            + "id1230  413.836124  184.375703 11916.8\n"
            + "id1948  502.953953  173.237159 12468.3\n"
        )
        unique_filename = make_fwf_file(fwf_data=fwf_data)

        colspecs = [(0, 6), (8, 20), (21, 33), (34, 43)]
        df = pd.read_fwf(
            Path(unique_filename) if pathlike else unique_filename,
            colspecs=colspecs,
            header=None,
            index_col=0,
        )
        assert isinstance(df, pd.DataFrame)

    @pytest.mark.parametrize(
        "kwargs",
        [
            {
                "colspecs": [
                    (0, 11),
                    (11, 15),
                    (19, 24),
                    (27, 32),
                    (35, 40),
                    (43, 48),
                    (51, 56),
                    (59, 64),
                    (67, 72),
                    (75, 80),
                    (83, 88),
                    (91, 96),
                    (99, 104),
                    (107, 112),
                ],
                "names": ["stationID", "year", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
                "na_values": ["-9999"],
                "index_col": ["stationID", "year"],
            },
            {
                "widths": [20, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
                "names": ["id", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
                "index_col": [0],
            },
        ],
    )
    def test_fwf_file_colspecs_widths(self, make_fwf_file, kwargs):
        unique_filename = make_fwf_file()

        modin_df = pd.read_fwf(unique_filename, **kwargs)
        pandas_df = pd.read_fwf(unique_filename, **kwargs)

        df_equals(modin_df, pandas_df)

    @pytest.mark.parametrize(
        "usecols",
        [
            ["a"],
            ["a", "b", "d"],
            [0, 1, 3],
        ],
    )
    def test_fwf_file_usecols(self, make_fwf_file, usecols):
        fwf_data = (
            "a       b           c          d\n"
            + "id8141  360.242940  149.910199 11950.7\n"
            + "id1594  444.953632  166.985655 11788.4\n"
            + "id1849  364.136849  183.628767 11806.2\n"
            + "id1230  413.836124  184.375703 11916.8\n"
            + "id1948  502.953953  173.237159 12468.3\n"
        )
        eval_io(
            fn_name="read_fwf",
            # read_fwf kwargs
            filepath_or_buffer=make_fwf_file(fwf_data=fwf_data),
            usecols=usecols,
        )

    @pytest.mark.parametrize(
        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
    )
    def test_read_fwf_dtype_backend(self, make_fwf_file, dtype_backend):
        unique_filename = make_fwf_file()

        def comparator(df1, df2):
            df_equals(df1, df2)
            df_equals(df1.dtypes, df2.dtypes)

        eval_io(
            fn_name="read_fwf",
            # read_csv kwargs
            filepath_or_buffer=unique_filename,
            dtype_backend=dtype_backend,
            comparator=comparator,
        )

    def test_fwf_file_chunksize(self, make_fwf_file):
        unique_filename = make_fwf_file()

        # Tests __next__ and correctness of reader as an iterator
        rdf_reader = pd.read_fwf(unique_filename, chunksize=5)
        pd_reader = pandas.read_fwf(unique_filename, chunksize=5)

        for modin_df, pd_df in zip(rdf_reader, pd_reader):
            df_equals(modin_df, pd_df)

        # Tests that get_chunk works correctly
        rdf_reader = pd.read_fwf(unique_filename, chunksize=1)
        pd_reader = pandas.read_fwf(unique_filename, chunksize=1)

        modin_df = rdf_reader.get_chunk(1)
        pd_df = pd_reader.get_chunk(1)

        df_equals(modin_df, pd_df)

        # Tests that read works correctly
        rdf_reader = pd.read_fwf(unique_filename, chunksize=1)
        pd_reader = pandas.read_fwf(unique_filename, chunksize=1)

        modin_df = rdf_reader.read()
        pd_df = pd_reader.read()

        df_equals(modin_df, pd_df)

    @pytest.mark.parametrize("nrows", [13, None])
    def test_fwf_file_skiprows(self, make_fwf_file, nrows):
        unique_filename = make_fwf_file()

        eval_io(
            fn_name="read_fwf",
            # read_fwf kwargs
            filepath_or_buffer=unique_filename,
            skiprows=2,
            nrows=nrows,
        )

        eval_io(
            fn_name="read_fwf",
            # read_fwf kwargs
            filepath_or_buffer=unique_filename,
            usecols=[0, 4, 7],
            skiprows=[2, 5],
            nrows=nrows,
        )

    def test_fwf_file_index_col(self, make_fwf_file):
        fwf_data = (
            "a       b           c          d\n"
            + "id8141  360.242940  149.910199 11950.7\n"
            + "id1594  444.953632  166.985655 11788.4\n"
            + "id1849  364.136849  183.628767 11806.2\n"
            + "id1230  413.836124  184.375703 11916.8\n"
            + "id1948  502.953953  173.237159 12468.3\n"
        )
        eval_io(
            fn_name="read_fwf",
            # read_fwf kwargs
            filepath_or_buffer=make_fwf_file(fwf_data=fwf_data),
            index_col="c",
        )

    def test_fwf_file_skipfooter(self, make_fwf_file):
        eval_io(
            fn_name="read_fwf",
            # read_fwf kwargs
            filepath_or_buffer=make_fwf_file(),
            skipfooter=2,
        )

    def test_fwf_file_parse_dates(self, make_fwf_file):
        dates = pandas.date_range("2000", freq="h", periods=10)
        fwf_data = "col1 col2        col3 col4"
        for i in range(10, 20):
            fwf_data = fwf_data + "\n{col1}   {col2}  {col3}   {col4}".format(
                col1=str(i),
                col2=str(dates[i - 10].date()),
                col3=str(i),
                col4=str(dates[i - 10].time()),
            )
        unique_filename = make_fwf_file(fwf_data=fwf_data)

        eval_io(
            fn_name="read_fwf",
            # read_fwf kwargs
            filepath_or_buffer=unique_filename,
            parse_dates=[["col2", "col4"]],
        )

        eval_io(
            fn_name="read_fwf",
            # read_fwf kwargs
            filepath_or_buffer=unique_filename,
            parse_dates={"time": ["col2", "col4"]},
        )

    @pytest.mark.parametrize(
        "read_mode",
        [
            "r",
            "rb",
        ],
    )
    def test_read_fwf_file_handle(self, make_fwf_file, read_mode):
        with open(make_fwf_file(), mode=read_mode) as buffer:
            df_pandas = pandas.read_fwf(buffer)
            buffer.seek(0)
            df_modin = pd.read_fwf(buffer)
            df_equals(df_modin, df_pandas)

    def test_read_fwf_empty_frame(self, make_fwf_file):
        kwargs = {
            "usecols": [0],
            "index_col": 0,
        }
        unique_filename = make_fwf_file()

        modin_df = pd.read_fwf(unique_filename, **kwargs)
        pandas_df = pandas.read_fwf(unique_filename, **kwargs)

        df_equals(modin_df, pandas_df)

    @pytest.mark.parametrize(
        "storage_options_extra",
        [{"anon": False}, {"anon": True}, {"key": "123", "secret": "123"}],
    )
    def test_read_fwf_s3(self, s3_resource, s3_storage_options, storage_options_extra):
        expected_exception = None
        if "anon" in storage_options_extra:
            expected_exception = PermissionError("Forbidden")
        eval_io(
            fn_name="read_fwf",
            filepath_or_buffer="s3://modin-test/modin-bugs/test_data.fwf",
            storage_options=s3_storage_options | storage_options_extra,
            expected_exception=expected_exception,
        )


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestGbq:
    @pytest.mark.skip(reason="Can not pass without GBQ access")
    def test_read_gbq(self):
        # Test API, but do not supply credentials until credits can be secured.
        with pytest.raises(
            ValueError, match="Could not determine project ID and one was not supplied."
        ):
            pd.read_gbq("SELECT 1")

    @pytest.mark.skip(reason="Can not pass without GBQ access")
    def test_to_gbq(self):
        modin_df, _ = create_test_dfs(TEST_DATA)
        # Test API, but do not supply credentials until credits can be secured.
        with pytest.raises(
            ValueError, match="Could not determine project ID and one was not supplied."
        ):
            modin_df.to_gbq("modin.table")

    def test_read_gbq_mock(self):
        test_args = ("fake_query",)
        test_kwargs = inspect.signature(pd.read_gbq).parameters.copy()
        test_kwargs.update(project_id="test_id", dialect="standart")
        test_kwargs.pop("query", None)
        with mock.patch(
            "pandas.read_gbq", return_value=pandas.DataFrame([])
        ) as read_gbq:
            pd.read_gbq(*test_args, **test_kwargs)
        read_gbq.assert_called_once_with(*test_args, **test_kwargs)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestStata:
    def test_read_stata(self, make_stata_file):
        eval_io(
            fn_name="read_stata",
            # read_stata kwargs
            filepath_or_buffer=make_stata_file(),
        )

    def test_to_stata(self, tmp_path):
        modin_df, pandas_df = create_test_dfs(TEST_DATA)
        eval_to_file(
            tmp_path,
            modin_obj=modin_df,
            pandas_obj=pandas_df,
            fn="to_stata",
            extension="stata",
        )


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestSas:
    def test_read_sas(self):
        eval_io(
            fn_name="read_sas",
            # read_sas kwargs
            filepath_or_buffer="modin/tests/pandas/data/airline.sas7bdat",
        )


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestFeather:
    def test_read_feather(self, make_feather_file):
        eval_io(
            fn_name="read_feather",
            # read_feather kwargs
            path=make_feather_file(),
        )

    @pytest.mark.parametrize(
        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
    )
    def test_read_feather_dtype_backend(self, make_feather_file, dtype_backend):
        def comparator(df1, df2):
            df_equals(df1, df2)
            df_equals(df1.dtypes, df2.dtypes)

        eval_io(
            fn_name="read_feather",
            # read_feather kwargs
            path=make_feather_file(),
            dtype_backend=dtype_backend,
            comparator=comparator,
        )

    @pytest.mark.parametrize(
        "storage_options_extra",
        [{"anon": False}, {"anon": True}, {"key": "123", "secret": "123"}],
    )
    def test_read_feather_s3(
        self, s3_resource, s3_storage_options, storage_options_extra
    ):
        expected_exception = None
        if "anon" in storage_options_extra:
            expected_exception = PermissionError("Forbidden")
        eval_io(
            fn_name="read_feather",
            path="s3://modin-test/modin-bugs/test_data.feather",
            storage_options=s3_storage_options | storage_options_extra,
            expected_exception=expected_exception,
        )

    def test_read_feather_path_object(self, make_feather_file):
        eval_io(
            fn_name="read_feather",
            path=Path(make_feather_file()),
        )

    def test_to_feather(self, tmp_path):
        modin_df, pandas_df = create_test_dfs(TEST_DATA)
        eval_to_file(
            tmp_path,
            modin_obj=modin_df,
            pandas_obj=pandas_df,
            fn="to_feather",
            extension="feather",
        )

    def test_read_feather_with_index_metadata(self, tmp_path):
        # see: https://github.com/modin-project/modin/issues/6212
        df = pandas.DataFrame({"a": [1, 2, 3]}, index=[0, 1, 2])
        assert not isinstance(df.index, pandas.RangeIndex)

        path = get_unique_filename(extension=".feather", data_dir=tmp_path)
        df.to_feather(path)
        eval_io(
            fn_name="read_feather",
            path=path,
        )


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestClipboard:
    @pytest.mark.skip(reason="No clipboard in CI")
    def test_read_clipboard(self):
        setup_clipboard()

        eval_io(fn_name="read_clipboard")

    @pytest.mark.skip(reason="No clipboard in CI")
    def test_to_clipboard(self):
        modin_df, pandas_df = create_test_dfs(TEST_DATA)

        modin_df.to_clipboard()
        modin_as_clip = pandas.read_clipboard()

        pandas_df.to_clipboard()
        pandas_as_clip = pandas.read_clipboard()

        assert modin_as_clip.equals(pandas_as_clip)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestPickle:
    def test_read_pickle(self, make_pickle_file):
        eval_io(
            fn_name="read_pickle",
            # read_pickle kwargs
            filepath_or_buffer=make_pickle_file(),
        )

    def test_to_pickle(self, tmp_path):
        modin_df, _ = create_test_dfs(TEST_DATA)

        unique_filename_modin = get_unique_filename(extension="pkl", data_dir=tmp_path)

        modin_df.to_pickle(unique_filename_modin)
        recreated_modin_df = pd.read_pickle(unique_filename_modin)

        df_equals(modin_df, recreated_modin_df)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestXml:
    @pytest.mark.skipif(
        platform.system() == "Windows",
        reason="https://github.com/modin-project/modin/issues/7497",
    )
    def test_read_xml(self):
        # example from pandas
        data = """<?xml version='1.0' encoding='utf-8'?>
<data xmlns="http://example.com">
 <row>
   <shape>square</shape>
   <degrees>360</degrees>
   <sides>4.0</sides>
 </row>
 <row>
   <shape>circle</shape>
   <degrees>360</degrees>
   <sides/>
 </row>
 <row>
   <shape>triangle</shape>
   <degrees>180</degrees>
   <sides>3.0</sides>
 </row>
</data>
"""
        eval_io("read_xml", path_or_buffer=data)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestOrc:
    # It's not easy to add infrastructure for `orc` format.
    # In case of defaulting to pandas, it's enough
    # to check that the parameters are passed to pandas.
    def test_read_orc(self):
        test_args = ("fake_path",)
        test_kwargs = dict(
            columns=["A"],
            dtype_backend=lib.no_default,
            filesystem=None,
            fake_kwarg="some_pyarrow_parameter",
        )
        with mock.patch(
            "pandas.read_orc", return_value=pandas.DataFrame([])
        ) as read_orc:
            pd.read_orc(*test_args, **test_kwargs)
        read_orc.assert_called_once_with(*test_args, **test_kwargs)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestSpss:
    # It's not easy to add infrastructure for `spss` format.
    # In case of defaulting to pandas, it's enough
    # to check that the parameters are passed to pandas.
    def test_read_spss(self):
        test_args = ("fake_path",)
        test_kwargs = dict(
            usecols=["A"], convert_categoricals=False, dtype_backend=lib.no_default
        )
        with mock.patch(
            "pandas.read_spss", return_value=pandas.DataFrame([])
        ) as read_spss:
            pd.read_spss(*test_args, **test_kwargs)
        read_spss.assert_called_once_with(*test_args, **test_kwargs)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_json_normalize():
    # example from pandas
    data = [
        {"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
        {"name": {"given": "Mark", "family": "Regner"}},
        {"id": 2, "name": "Faye Raker"},
    ]
    eval_io("json_normalize", data=data)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_from_arrow():
    _, pandas_df = create_test_dfs(TEST_DATA)
    modin_df = from_arrow(pa.Table.from_pandas(pandas_df))
    df_equals(modin_df, pandas_df)


@pytest.mark.skipif(
    condition=Engine.get() != "Ray",
    reason="Distributed 'from_pandas' is only available for Ray engine",
)
@pytest.mark.parametrize("modify_config", [{AsyncReadMode: True}], indirect=True)
def test_distributed_from_pandas(modify_config):
    pandas_df = pandas.DataFrame({f"col{i}": np.arange(200_000) for i in range(64)})
    modin_df = pd.DataFrame(pandas_df)
    df_equals(modin_df, pandas_df)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_from_spmatrix():
    data = sparse.eye(3)
    with pytest.warns(UserWarning, match="defaulting to pandas.*"):
        modin_df = pd.DataFrame.sparse.from_spmatrix(data)
    pandas_df = pandas.DataFrame.sparse.from_spmatrix(data)
    df_equals(modin_df, pandas_df)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_dense():
    data = {"col1": pandas.arrays.SparseArray([0, 1, 0])}
    modin_df, pandas_df = create_test_dfs(data)
    df_equals(modin_df.sparse.to_dense(), pandas_df.sparse.to_dense())


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_dict_dataframe():
    modin_df, _ = create_test_dfs(TEST_DATA)
    assert modin_df.to_dict() == to_pandas(modin_df).to_dict()


@pytest.mark.parametrize(
    "kwargs",
    [
        pytest.param({}, id="no_kwargs"),
        pytest.param({"into": dict}, id="into_dict"),
        pytest.param({"into": defaultdict(list)}, id="into_defaultdict"),
    ],
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_dict_series(kwargs):
    eval_general(
        *[df.iloc[:, 0] for df in create_test_dfs(utils_test_data["int_data"])],
        lambda df: df.to_dict(**kwargs),
    )


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_latex():
    modin_df, _ = create_test_dfs(TEST_DATA)
    assert modin_df.to_latex() == to_pandas(modin_df).to_latex()


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
@pytest.mark.skipif(
    platform.system() == "Windows",
    reason="https://github.com/modin-project/modin/issues/7497",
)
def test_to_xml():
    # `lxml` is a required dependency for `to_xml`, but optional for Modin.
    # For some engines we do not install it.
    pytest.importorskip("lxml")
    modin_df, _ = create_test_dfs(TEST_DATA)
    assert modin_df.to_xml() == to_pandas(modin_df).to_xml()


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_period():
    index = pandas.DatetimeIndex(
        pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))
    )
    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)
    df_equals(modin_df.to_period(), pandas_df.to_period())


@pytest.mark.xfail(
    Engine.get() == "Ray" and version.parse(ray.__version__) <= version.parse("2.9.3"),
    reason="Ray-2.9.3 has a problem using pandas 2.2.0. It will be resolved in the next release of Ray.",
)
@pytest.mark.skipif(
    condition=Engine.get() != "Ray",
    reason="Modin Dataframe can only be converted to a Ray Dataset if Modin uses a Ray engine.",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_df_to_ray():
    index = pandas.DatetimeIndex(
        pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))
    )
    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)
    ray_dataset = modin_df.modin.to_ray()
    df_equals(ray_dataset.to_pandas(), pandas_df)


@pytest.mark.xfail(
    Engine.get() == "Ray" and version.parse(ray.__version__) <= version.parse("2.9.3"),
    reason="Ray-2.9.3 has a problem using pandas 2.2.0. It will be resolved in the next release of Ray.",
)
@pytest.mark.skipif(
    condition=Engine.get() != "Ray",
    reason="Modin Dataframe can only be converted to a Ray Dataset if Modin uses a Ray engine.",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_series_to_ray():
    index = pandas.DatetimeIndex(
        pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))
    )
    # A Pandas DataFrame with column names of non-str types is not supported by Ray Dataset.
    index = [str(x) for x in index]
    pandas_df = pandas.DataFrame(TEST_DATA, index=index)
    pandas_s = pandas_df.iloc[0]
    modin_s = pd.Series(pandas_s)
    ray_dataset = modin_s.modin.to_ray()
    df_equals(ray_dataset.to_pandas().squeeze(), pandas_s)


@pytest.mark.xfail(
    Engine.get() == "Ray" and version.parse(ray.__version__) <= version.parse("2.9.3"),
    reason="Ray-2.9.3 has a problem using pandas 2.2.0. It will be resolved in the next release of Ray.",
)
@pytest.mark.skipif(
    condition=Engine.get() != "Ray",
    reason="Ray Dataset can only be converted to a Modin Dataframe if Modin uses a Ray engine.",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_from_ray():
    index = pandas.DatetimeIndex(
        pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))
    )
    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)
    ray_df = ray.data.from_pandas(pandas_df)
    result_df = from_ray(ray_df)
    df_equals(result_df, modin_df)


@pytest.mark.skipif(
    condition=Engine.get() != "Dask",
    reason="Modin DataFrame can only be converted to a Dask DataFrame if Modin uses a Dask engine.",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_df_to_dask():
    index = pandas.DatetimeIndex(
        pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))
    )

    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)

    dask_df = modin_df.modin.to_dask()
    df_equals(dask_df.compute(), pandas_df)


@pytest.mark.skipif(
    condition=Engine.get() != "Dask",
    reason="Modin DataFrame can only be converted to a Dask DataFrame if Modin uses a Dask engine.",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_series_to_dask():
    modin_s, pandas_s = create_test_series(TEST_DATA["col1"])

    dask_series = modin_s.modin.to_dask()
    df_equals(dask_series.compute(), pandas_s)


@pytest.mark.skipif(
    condition=Engine.get() != "Dask",
    reason="Dask DataFrame can only be converted to a Modin DataFrame if Modin uses a Dask engine.",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_from_dask():
    import dask.dataframe as dd

    index = pandas.DatetimeIndex(
        pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))
    )
    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)

    dask_df = dd.from_pandas(pandas_df, npartitions=NPartitions.get())

    result_df = from_dask(dask_df)
    df_equals(result_df, modin_df)


@pytest.mark.skipif(
    condition=Engine.get() not in ("Ray", "Dask", "Unidist"),
    reason="Modin DataFrame can only be created from map if Modin uses Ray, Dask or MPI engine.",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_from_map():
    factor = 3
    data = [1] * factor + [2] * factor + [3] * factor
    expected_df = pd.DataFrame(data, index=[0, 1, 2] * factor)

    def map_func(x, factor):
        return [x] * factor

    result_df = from_map(map_func, [1, 2, 3], 3)
    df_equals(result_df, expected_df)


================================================
FILE: modin/tests/pandas/test_repartition.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import re

import numpy as np
import pytest

import modin.pandas as pd
from modin.config import context
from modin.core.storage_formats.pandas.native_query_compiler import (
    _NO_REPARTITION_ON_NATIVE_EXECUTION_EXCEPTION_MESSAGE,
)
from modin.tests.test_utils import current_execution_is_native
from modin.utils import get_current_execution


@pytest.fixture(autouse=True)
def set_npartitions():
    with context(NPartitions=4):
        yield


@pytest.mark.skipif(
    current_execution_is_native(), reason="Native execution does not have partitions."
)
@pytest.mark.skipif(
    get_current_execution() == "BaseOnPython",
    reason="BaseOnPython chooses partition numbers differently",
)
@pytest.mark.parametrize("axis", [0, 1, None])
@pytest.mark.parametrize("dtype", ["DataFrame", "Series"])
def test_repartition(axis, dtype):
    if axis in (1, None) and dtype == "Series":
        # no sense for Series
        return

    df = pd.DataFrame({"col1": [1, 2], "col2": [5, 6]})
    df2 = pd.DataFrame({"col3": [9, 4]})

    df = pd.concat([df, df2], axis=1)
    df = pd.concat([df, df], axis=0)

    obj = df if dtype == "DataFrame" else df["col1"]

    source_shapes = {
        "DataFrame": (2, 2),
        "Series": (2, 1),
    }
    # check that the test makes sense
    assert obj._query_compiler._modin_frame._partitions.shape == source_shapes[dtype]

    kwargs = {"axis": axis} if dtype == "DataFrame" else {}
    obj = obj._repartition(**kwargs)

    if dtype == "DataFrame":
        results = {
            None: (1, 1),
            0: (1, 2),
            1: (2, 1),
        }
    else:
        results = {
            None: (1, 1),
            0: (1, 1),
            1: (2, 1),
        }

    assert obj._query_compiler._modin_frame._partitions.shape == results[axis]


@pytest.mark.skipif(
    current_execution_is_native(), reason="Native execution does not have partitions."
)
def test_repartition_7170():
    with context(MinColumnPartitionSize=102, NPartitions=5):
        df = pd.DataFrame(np.random.rand(10000, 100))
        _ = df._repartition(axis=1).to_numpy()


@pytest.mark.skipif(
    not current_execution_is_native(), reason="This is a native execution test."
)
def test_repartition_not_valid_on_native_execution():
    df = pd.DataFrame()
    with pytest.raises(
        Exception,
        match=re.escape(_NO_REPARTITION_ON_NATIVE_EXECUTION_EXCEPTION_MESSAGE),
    ):
        df._repartition()


================================================
FILE: modin/tests/pandas/test_reshape.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import StorageFormat
from modin.tests.test_utils import (
    current_execution_is_native,
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)

from .utils import df_equals, test_data_values


def test_get_dummies():
    s = pd.Series(list("abca"))
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.get_dummies(s)

    s1 = ["a", "b", np.nan]
    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.get_dummies(s1)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.get_dummies(s1, dummy_na=True)

    data = {"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 3]}
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    modin_result = pd.get_dummies(modin_df, prefix=["col1", "col2"])
    pandas_result = pandas.get_dummies(pandas_df, prefix=["col1", "col2"])
    df_equals(modin_result, pandas_result)
    assert modin_result._to_pandas().columns.equals(pandas_result.columns)
    assert modin_result.shape == pandas_result.shape

    modin_result = pd.get_dummies(pd.DataFrame(pd.Series(list("abcdeabac"))))
    pandas_result = pandas.get_dummies(
        pandas.DataFrame(pandas.Series(list("abcdeabac")))
    )
    df_equals(modin_result, pandas_result)
    assert modin_result._to_pandas().columns.equals(pandas_result.columns)
    assert modin_result.shape == pandas_result.shape

    with pytest.raises(NotImplementedError):
        pd.get_dummies(modin_df, prefix=["col1", "col2"], sparse=True)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.get_dummies(pd.Series(list("abcaa")))

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.get_dummies(pd.Series(list("abcaa")), drop_first=True)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.get_dummies(pd.Series(list("abc")), dtype=float)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        pd.get_dummies(1)

    # test from #5184
    pandas_df = pandas.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["7", "8", "9"]})
    modin_df = pd.DataFrame(pandas_df)
    pandas_result = pandas.get_dummies(pandas_df, columns=["a", "b"])
    modin_result = pd.get_dummies(modin_df, columns=["a", "b"])
    df_equals(modin_result, pandas_result)


def test_melt():
    data = test_data_values[0]

    with (
        pytest.warns(
            UserWarning, match=r"`melt` implementation has mismatches with pandas"
        )
        if StorageFormat.get() == "Pandas"
        else contextlib.nullcontext()
    ):
        pd.melt(pd.DataFrame(data))


def test_crosstab():
    a = np.array(
        ["foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar", "foo", "foo", "foo"],
        dtype=object,
    )
    b = np.array(
        ["one", "one", "one", "two", "one", "one", "one", "two", "two", "two", "one"],
        dtype=object,
    )
    c = np.array(
        [
            "dull",
            "dull",
            "shiny",
            "dull",
            "dull",
            "shiny",
            "shiny",
            "dull",
            "shiny",
            "shiny",
            "shiny",
        ],
        dtype=object,
    )

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"])
        assert isinstance(df, pd.DataFrame)

    foo = pd.Categorical(["a", "b"], categories=["a", "b", "c"])
    bar = pd.Categorical(["d", "e"], categories=["d", "e", "f"])

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.crosstab(foo, bar)
        assert isinstance(df, pd.DataFrame)

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.crosstab(foo, bar, dropna=False)
        assert isinstance(df, pd.DataFrame)


def test_lreshape():
    data = pd.DataFrame(
        {
            "hr1": [514, 573],
            "hr2": [545, 526],
            "team": ["Red Sox", "Yankees"],
            "year1": [2007, 2008],
            "year2": [2008, 2008],
        }
    )

    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):
        df = pd.lreshape(data, {"year": ["year1", "year2"], "hr": ["hr1", "hr2"]})
        assert isinstance(df, pd.DataFrame)

    with pytest.raises(ValueError):
        pd.lreshape(data.to_numpy(), {"year": ["year1", "year2"], "hr": ["hr1", "hr2"]})


def test_wide_to_long():
    data = pd.DataFrame(
        {
            "hr1": [514, 573],
            "hr2": [545, 526],
            "team": ["Red Sox", "Yankees"],
            "year1": [2007, 2008],
            "year2": [2008, 2008],
        }
    )

    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(data)
    ):
        df = pd.wide_to_long(data, ["hr", "year"], "team", "index")
        assert isinstance(df, pd.DataFrame)

    with pytest.raises(ValueError):
        pd.wide_to_long(data.to_numpy(), ["hr", "year"], "team", "index")


================================================
FILE: modin/tests/pandas/test_rolling.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pandas._libs.lib as lib
import pytest

import modin.pandas as pd
from modin.config import NPartitions

from .utils import (
    create_test_dfs,
    create_test_series,
    default_to_pandas_ignore_string,
    df_equals,
    eval_general,
    test_data_keys,
    test_data_values,
)

NPartitions.put(4)

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
# of defaulting to pandas.
pytestmark = [
    pytest.mark.filterwarnings(default_to_pandas_ignore_string),
    # TO MAKE SURE ALL FUTUREWARNINGS ARE CONSIDERED
    pytest.mark.filterwarnings("error::FutureWarning"),
    # ... except for this expected Ray warning due to https://github.com/ray-project/ray/issues/54868
    pytest.mark.filterwarnings(
        "ignore:.*In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None:FutureWarning"
    ),
    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT
    pytest.mark.filterwarnings(
        "ignore:Support for axis=1 in DataFrame.rolling is deprecated:FutureWarning"
    ),
    # FIXME: these cases inconsistent between modin and pandas
    pytest.mark.filterwarnings(
        "ignore:.*In a future version of pandas, the provided callable will be used directly.*:FutureWarning"
    ),
]


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize("axis", [lib.no_default, 1])
@pytest.mark.parametrize(
    "method, kwargs",
    [
        ("count", {}),
        ("sum", {}),
        ("mean", {}),
        ("var", {"ddof": 0}),
        ("std", {"ddof": 0}),
        ("min", {}),
        ("max", {}),
        ("skew", {}),
        ("kurt", {}),
        ("apply", {"func": np.sum}),
        ("rank", {}),
        ("sem", {"ddof": 0}),
        ("quantile", {"q": 0.1}),
        ("median", {}),
    ],
)
def test_dataframe_rolling(data, window, min_periods, axis, method, kwargs):
    # Testing of Rolling class
    modin_df, pandas_df = create_test_dfs(data)
    if window > len(pandas_df):
        window = len(pandas_df)
    eval_general(
        modin_df,
        pandas_df,
        lambda df: getattr(
            df.rolling(
                window=window,
                min_periods=min_periods,
                win_type=None,
                center=True,
                axis=axis,
            ),
            method,
        )(**kwargs),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize("axis", [lib.no_default, 1])
def test_dataframe_agg(data, window, min_periods, axis):
    modin_df, pandas_df = create_test_dfs(data)
    if window > len(pandas_df):
        window = len(pandas_df)
    modin_rolled = modin_df.rolling(
        window=window, min_periods=min_periods, win_type=None, center=True, axis=axis
    )
    pandas_rolled = pandas_df.rolling(
        window=window, min_periods=min_periods, win_type=None, center=True, axis=axis
    )
    df_equals(pandas_rolled.aggregate(np.sum), modin_rolled.aggregate(np.sum))
    # TODO(https://github.com/modin-project/modin/issues/4260): Once pandas
    # allows us to rolling aggregate a list of functions over axis 1, test
    # that, too.
    if axis != 1:
        df_equals(
            pandas_rolled.aggregate([np.sum, np.mean]),
            modin_rolled.aggregate([np.sum, np.mean]),
        )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize("axis", [lib.no_default, 1])
@pytest.mark.parametrize(
    "method, kwargs",
    [
        ("sum", {}),
        ("mean", {}),
        ("var", {"ddof": 0}),
        ("std", {"ddof": 0}),
    ],
)
def test_dataframe_window(data, window, min_periods, axis, method, kwargs):
    # Testing of Window class
    modin_df, pandas_df = create_test_dfs(data)
    if window > len(pandas_df):
        window = len(pandas_df)
    eval_general(
        modin_df,
        pandas_df,
        lambda df: getattr(
            df.rolling(
                window=window,
                min_periods=min_periods,
                win_type="triang",
                center=True,
                axis=axis,
            ),
            method,
        )(**kwargs),
    )


@pytest.mark.parametrize("axis", [lib.no_default, "columns"])
@pytest.mark.parametrize("on", [None, "DateCol"])
@pytest.mark.parametrize("closed", ["both", "right"])
@pytest.mark.parametrize("window", [3, "3s"])
def test_dataframe_dt_index(axis, on, closed, window):
    index = pandas.date_range("31/12/2000", periods=12, freq="min")
    data = {"A": range(12), "B": range(12)}
    pandas_df = pandas.DataFrame(data, index=index)
    modin_df = pd.DataFrame(data, index=index)
    if on is not None and axis == lib.no_default and isinstance(window, str):
        pandas_df[on] = pandas.date_range("22/06/1941", periods=12, freq="min")
        modin_df[on] = pd.date_range("22/06/1941", periods=12, freq="min")
    else:
        on = None
    if axis == "columns":
        pandas_df = pandas_df.T
        modin_df = modin_df.T
    pandas_rolled = pandas_df.rolling(window=window, on=on, axis=axis, closed=closed)
    modin_rolled = modin_df.rolling(window=window, on=on, axis=axis, closed=closed)
    if isinstance(window, int):
        # This functions are very slowly for data from test_rolling
        df_equals(
            modin_rolled.corr(modin_df, True), pandas_rolled.corr(pandas_df, True)
        )
        df_equals(
            modin_rolled.corr(modin_df, False), pandas_rolled.corr(pandas_df, False)
        )
        df_equals(modin_rolled.cov(modin_df, True), pandas_rolled.cov(pandas_df, True))
        df_equals(
            modin_rolled.cov(modin_df, False), pandas_rolled.cov(pandas_df, False)
        )
        if axis == lib.no_default:
            df_equals(
                modin_rolled.cov(modin_df[modin_df.columns[0]], True),
                pandas_rolled.cov(pandas_df[pandas_df.columns[0]], True),
            )
            df_equals(
                modin_rolled.corr(modin_df[modin_df.columns[0]], True),
                pandas_rolled.corr(pandas_df[pandas_df.columns[0]], True),
            )
    else:
        df_equals(modin_rolled.count(), pandas_rolled.count())
        df_equals(modin_rolled.skew(), pandas_rolled.skew())
        df_equals(
            modin_rolled.apply(np.sum, raw=True),
            pandas_rolled.apply(np.sum, raw=True),
        )
        df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum))
        df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize(
    "method, kwargs",
    [
        ("count", {}),
        ("sum", {}),
        ("mean", {}),
        ("var", {"ddof": 0}),
        ("std", {"ddof": 0}),
        ("min", {}),
        ("max", {}),
        ("skew", {}),
        ("kurt", {}),
        ("apply", {"func": np.sum}),
        ("rank", {}),
        ("sem", {"ddof": 0}),
        ("aggregate", {"func": np.sum}),
        ("agg", {"func": [np.sum, np.mean]}),
        ("quantile", {"q": 0.1}),
        ("median", {}),
    ],
)
def test_series_rolling(data, window, min_periods, method, kwargs):
    # Test of Rolling class
    modin_series, pandas_series = create_test_series(data)
    if window > len(pandas_series):
        window = len(pandas_series)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: getattr(
            series.rolling(
                window=window,
                min_periods=min_periods,
                win_type=None,
                center=True,
            ),
            method,
        )(**kwargs),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
def test_series_corr_cov(data, window, min_periods):
    modin_series, pandas_series = create_test_series(data)
    if window > len(pandas_series):
        window = len(pandas_series)
    modin_rolled = modin_series.rolling(
        window=window, min_periods=min_periods, win_type=None, center=True
    )
    pandas_rolled = pandas_series.rolling(
        window=window, min_periods=min_periods, win_type=None, center=True
    )
    df_equals(modin_rolled.corr(modin_series), pandas_rolled.corr(pandas_series))
    df_equals(
        modin_rolled.cov(modin_series, True), pandas_rolled.cov(pandas_series, True)
    )
    df_equals(
        modin_rolled.cov(modin_series, False), pandas_rolled.cov(pandas_series, False)
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize(
    "method, kwargs",
    [
        ("sum", {}),
        ("mean", {}),
        ("var", {"ddof": 0}),
        ("std", {"ddof": 0}),
    ],
)
def test_series_window(data, window, min_periods, method, kwargs):
    # Test of Window class
    modin_series, pandas_series = create_test_series(data)
    if window > len(pandas_series):
        window = len(pandas_series)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: getattr(
            series.rolling(
                window=window,
                min_periods=min_periods,
                win_type="triang",
                center=True,
            ),
            method,
        )(**kwargs),
    )


@pytest.mark.parametrize("closed", ["both", "right"])
def test_series_dt_index(closed):
    index = pandas.date_range("1/1/2000", periods=12, freq="min")
    pandas_series = pandas.Series(range(12), index=index)
    modin_series = pd.Series(range(12), index=index)

    pandas_rolled = pandas_series.rolling("3s", closed=closed)
    modin_rolled = modin_series.rolling("3s", closed=closed)
    df_equals(modin_rolled.count(), pandas_rolled.count())
    df_equals(modin_rolled.skew(), pandas_rolled.skew())
    df_equals(
        modin_rolled.apply(np.sum, raw=True), pandas_rolled.apply(np.sum, raw=True)
    )
    df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum))
    df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))


def test_api_indexer():
    modin_df, pandas_df = create_test_dfs(test_data_values[0])
    indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=3)
    pandas_rolled = pandas_df.rolling(window=indexer)
    modin_rolled = modin_df.rolling(window=indexer)
    df_equals(modin_rolled.sum(), pandas_rolled.sum())


def test_issue_3512():
    data = np.random.rand(129)
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    modin_ans = modin_df[0:33].rolling(window=21).mean()
    pandas_ans = pandas_df[0:33].rolling(window=21).mean()

    df_equals(modin_ans, pandas_ans)


### TEST ROLLING WARNINGS ###


def test_rolling_axis_1_depr():
    index = pandas.date_range("31/12/2000", periods=12, freq="min")
    data = {"A": range(12), "B": range(12)}
    modin_df = pd.DataFrame(data, index=index)
    with pytest.warns(
        FutureWarning,
        match="Support for axis=1 in DataFrame.rolling is deprecated",
    ):
        modin_df.rolling(window=3, axis=1)


================================================
FILE: modin/tests/pandas/test_series.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from __future__ import annotations

import datetime
import itertools
import json
import sys
import unittest.mock as mock
import warnings

import matplotlib
import numpy as np
import pandas
import pandas._libs.lib as lib
import pytest
from numpy.testing import assert_array_equal
from packaging.version import Version
from pandas.core.indexing import IndexingError
from pandas.errors import PerformanceWarning, SpecificationError

import modin.pandas as pd
from modin.config import Engine, NPartitions, StorageFormat
from modin.core.storage_formats.pandas.query_compiler_caster import (
    _assert_casting_functions_wrap_same_implementation,
)
from modin.pandas.io import to_pandas
from modin.tests.test_utils import (
    current_execution_is_native,
    df_or_series_using_native_execution,
    warns_that_defaulting_to_pandas_if,
)
from modin.utils import get_current_execution, try_cast_to_pandas

from .utils import (
    RAND_HIGH,
    RAND_LOW,
    UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS,
    CustomIntegerForAddition,
    NonCommutativeMultiplyInteger,
    agg_func_except_keys,
    agg_func_except_values,
    agg_func_keys,
    agg_func_values,
    arg_keys,
    bool_arg_keys,
    bool_arg_values,
    categories_equals,
    create_test_dfs,
    create_test_series,
    default_to_pandas_ignore_string,
    df_equals,
    df_equals_with_non_stable_indices,
    encoding_types,
    eval_general,
    generate_multiindex,
    int_arg_keys,
    int_arg_values,
    name_contains,
    no_numeric_dfs,
    numeric_dfs,
    quantiles_keys,
    quantiles_values,
    random_state,
    sort_if_range_partitioning,
    string_na_rep_keys,
    string_na_rep_values,
    string_sep_keys,
    string_sep_values,
    test_data,
    test_data_categorical_keys,
    test_data_categorical_values,
    test_data_diff_dtype,
    test_data_keys,
    test_data_large_categorical_series_keys,
    test_data_large_categorical_series_values,
    test_data_small_keys,
    test_data_small_values,
    test_data_values,
    test_data_with_duplicates_keys,
    test_data_with_duplicates_values,
    test_string_data_keys,
    test_string_data_values,
    test_string_list_data_keys,
    test_string_list_data_values,
)

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
# of defaulting to pandas.
pytestmark = [
    pytest.mark.filterwarnings(default_to_pandas_ignore_string),
    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT
    pytest.mark.filterwarnings(
        "ignore:.*bool is now deprecated and will be removed:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:first is deprecated and will be removed:FutureWarning"
    ),
    pytest.mark.filterwarnings(
        "ignore:last is deprecated and will be removed:FutureWarning"
    ),
]

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")

# Initialize the environment
pd.DataFrame()


def get_rop(op):
    if op.startswith("__") and op.endswith("__"):
        return "__r" + op[2:]
    else:
        return None


def inter_df_math_helper(
    modin_series, pandas_series, op, comparator_kwargs=None, expected_exception=None
):
    inter_df_math_helper_one_side(
        modin_series, pandas_series, op, comparator_kwargs, expected_exception
    )
    rop = get_rop(op)
    if rop:
        inter_df_math_helper_one_side(
            modin_series, pandas_series, rop, comparator_kwargs, expected_exception
        )


def inter_df_math_helper_one_side(
    modin_series,
    pandas_series,
    op,
    comparator_kwargs=None,
    expected_exception=None,
):
    if comparator_kwargs is None:
        comparator_kwargs = {}

    try:
        pandas_attr = getattr(pandas_series, op)
    except Exception as err:
        with pytest.raises(type(err)):
            _ = getattr(modin_series, op)
        return
    modin_attr = getattr(modin_series, op)

    try:
        pandas_result = pandas_attr(4)
    except Exception as err:
        with pytest.raises(type(err)):
            try_cast_to_pandas(modin_attr(4))  # force materialization
    else:
        modin_result = modin_attr(4)
        df_equals(modin_result, pandas_result, **comparator_kwargs)

    try:
        pandas_result = pandas_attr(4.0)
    except Exception as err:
        with pytest.raises(type(err)):
            try_cast_to_pandas(modin_attr(4.0))  # force materialization
    else:
        modin_result = modin_attr(4.0)
        df_equals(modin_result, pandas_result, **comparator_kwargs)

    # These operations don't support non-scalar `other` or have a strange behavior in
    # the testing environment
    if op in [
        "__divmod__",
        "divmod",
        "rdivmod",
        "floordiv",
        "__floordiv__",
        "rfloordiv",
        "__rfloordiv__",
        "mod",
        "__mod__",
        "rmod",
        "__rmod__",
    ]:
        return

    eval_general(
        modin_series,
        pandas_series,
        lambda df: (pandas_attr if isinstance(df, pandas.Series) else modin_attr)(df),
        comparator_kwargs=comparator_kwargs,
        expected_exception=expected_exception,
    )

    list_test = random_state.randint(RAND_LOW, RAND_HIGH, size=(modin_series.shape[0]))
    try:
        pandas_result = pandas_attr(list_test)
    except Exception as err:
        with pytest.raises(type(err)):
            try_cast_to_pandas(modin_attr(list_test))  # force materialization
    else:
        modin_result = modin_attr(list_test)
        df_equals(modin_result, pandas_result, **comparator_kwargs)

    series_test_modin = pd.Series(list_test, index=modin_series.index)
    series_test_pandas = pandas.Series(list_test, index=pandas_series.index)

    eval_general(
        series_test_modin,
        series_test_pandas,
        lambda df: (pandas_attr if isinstance(df, pandas.Series) else modin_attr)(df),
        comparator_kwargs=comparator_kwargs,
        expected_exception=expected_exception,
    )

    # Level test
    new_idx = pandas.MultiIndex.from_tuples(
        [(i // 4, i // 2, i) for i in modin_series.index]
    )
    modin_df_multi_level = modin_series.copy()
    modin_df_multi_level.index = new_idx
    # When 'level' parameter is passed, modin's implementation must raise a default-to-pandas warning,
    # here we first detect whether 'op' takes 'level' parameter at all and only then perform the warning check
    # reasoning: https://github.com/modin-project/modin/issues/6893
    try:
        getattr(modin_df_multi_level, op)(modin_df_multi_level, level=1)
    except TypeError:
        # Operation doesn't support 'level' parameter
        pass
    else:
        # Operation supports 'level' parameter, so it makes sense to check for a warning
        with warns_that_defaulting_to_pandas_if(
            not df_or_series_using_native_execution(modin_df_multi_level)
        ):
            getattr(modin_df_multi_level, op)(modin_df_multi_level, level=1)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_to_frame(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.to_frame(name="miao"), pandas_series.to_frame(name="miao"))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_to_list(data):
    modin_series, pandas_series = create_test_series(data)
    pd_res = pandas_series.to_list()
    md_res = modin_series.to_list()
    assert type(pd_res) is type(md_res)
    assert np.array_equal(pd_res, md_res, equal_nan=True)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_to_json(data):
    modin_series, pandas_series = create_test_series(data)
    pd_res = pandas_series.to_json()
    md_res = modin_series.to_json()
    assert type(pd_res) is type(md_res)
    assert pd_res == md_res


def test_accessing_index_element_as_property():
    s = pd.Series([10, 20, 30], index=["a", "b", "c"])
    assert s.b == 20
    with pytest.raises(Exception):
        _ = s.d


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_callable_key_in_getitem(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(
        modin_series[lambda s: s.index % 2 == 0],
        pandas_series[lambda s: s.index % 2 == 0],
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_T(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.T, pandas_series.T)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___abs__(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.__abs__(), pandas_series.__abs__())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___add__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__add__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___and__(data, request):
    modin_series, pandas_series = create_test_series(data)
    expected_exception = None
    if "float_nan_data" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7037
        expected_exception = False
    inter_df_math_helper(
        modin_series,
        pandas_series,
        "__and__",
        # https://github.com/modin-project/modin/issues/5966
        comparator_kwargs={"check_dtypes": False},
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("copy_kwargs", ({"copy": True}, {"copy": None}, {}))
@pytest.mark.parametrize(
    "get_array, get_array_name",
    (
        (lambda df, copy_kwargs: df.__array__(**copy_kwargs), "__array__"),
        (lambda df, copy_kwargs: np.array(df, **copy_kwargs), "np.array"),
    ),
)
def test___array__(data, copy_kwargs, get_array, get_array_name):
    if (
        get_array_name == "np.array"
        and Version(np.__version__) < Version("2")
        and "copy" in copy_kwargs
        and copy_kwargs["copy"] is None
    ):
        pytest.skip(reason="np.array does not support copy=None before numpy 2.0")
    assert_array_equal(*(get_array(df, copy_kwargs) for df in create_test_series(data)))


@pytest.mark.xfail(
    raises=AssertionError, reason="https://github.com/modin-project/modin/issues/4650"
)
def test___array__copy_false_creates_view():
    def do_in_place_update_via_copy(series):
        array = np.array(series, copy=False)
        array[0] += 1

    eval_general(
        *create_test_series([11]), do_in_place_update_via_copy, __inplace__=True
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___bool__(data):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.__bool__()
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.__bool__()
    else:
        modin_result = modin_series.__bool__()
        df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___contains__(request, data):
    modin_series, pandas_series = create_test_series(data)

    result = False
    key = "Not Exist"
    assert result == modin_series.__contains__(key)
    assert result == (key in modin_series)

    if "empty_data" not in request.node.name:
        result = True
        key = pandas_series.keys()[0]
        assert result == modin_series.__contains__(key)
        assert result == (key in modin_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___copy__(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.copy(), modin_series)
    df_equals(modin_series.copy(), pandas_series.copy())
    df_equals(modin_series.copy(), pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___deepcopy__(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.__deepcopy__(), modin_series)
    df_equals(modin_series.__deepcopy__(), pandas_series.__deepcopy__())
    df_equals(modin_series.__deepcopy__(), pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___delitem__(data):
    modin_series, pandas_series = create_test_series(data)
    del modin_series[modin_series.index[0]]
    del pandas_series[pandas_series.index[0]]
    df_equals(modin_series, pandas_series)

    del modin_series[modin_series.index[-1]]
    del pandas_series[pandas_series.index[-1]]
    df_equals(modin_series, pandas_series)

    del modin_series[modin_series.index[0]]
    del pandas_series[pandas_series.index[0]]
    df_equals(modin_series, pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_divmod(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "divmod")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rdivmod(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "rdivmod")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___eq__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__eq__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___floordiv__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__floordiv__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___ge__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__ge__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___getitem__(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series[0], pandas_series[0])
    df_equals(
        modin_series[modin_series.index[-1]], pandas_series[pandas_series.index[-1]]
    )
    modin_series = pd.Series(list(range(1000)))
    pandas_series = pandas.Series(list(range(1000)))
    df_equals(modin_series[:30], pandas_series[:30])
    df_equals(modin_series[modin_series > 500], pandas_series[pandas_series > 500])
    df_equals(modin_series[::2], pandas_series[::2])
    # Test getting an invalid string key
    # FIXME: https://github.com/modin-project/modin/issues/7038
    eval_general(
        modin_series, pandas_series, lambda s: s["a"], expected_exception=False
    )
    eval_general(
        modin_series, pandas_series, lambda s: s[["a"]], expected_exception=False
    )

    # Test empty series
    df_equals(pd.Series([])[:30], pandas.Series([])[:30])


def test___getitem__1383():
    # see #1383 for more details
    data = ["", "a", "b", "c", "a"]
    modin_series = pd.Series(data)
    pandas_series = pandas.Series(data)
    df_equals(modin_series[3:7], pandas_series[3:7])


@pytest.mark.parametrize("start", [-7, -5, -3, 0, None, 3, 5, 7])
@pytest.mark.parametrize("stop", [-7, -5, -3, 0, None, 3, 5, 7])
def test___getitem_edge_cases(start, stop):
    data = ["", "a", "b", "c", "a"]
    modin_series = pd.Series(data)
    pandas_series = pandas.Series(data)
    df_equals(modin_series[start:stop], pandas_series[start:stop])


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___gt__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__gt__")


@pytest.mark.parametrize("count_elements", [0, 1, 10])
def test___int__(count_elements):
    expected_exception = None
    if count_elements != 1:
        expected_exception = TypeError("cannot convert the series to <class 'int'>")
    eval_general(
        *create_test_series([1.5] * count_elements),
        int,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("count_elements", [0, 1, 10])
def test___float__(count_elements):
    expected_exception = None
    if count_elements != 1:
        expected_exception = TypeError("cannot convert the series to <class 'float'>")
    eval_general(
        *create_test_series([1] * count_elements),
        float,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___invert__(data, request):
    modin_series, pandas_series = create_test_series(data)
    expected_exception = None
    if "float_nan_data" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7081
        expected_exception = False
    eval_general(
        modin_series,
        pandas_series,
        lambda ser: ser.__invert__(),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___iter__(data):
    modin_series, pandas_series = create_test_series(data)
    for m, p in zip(modin_series.__iter__(), pandas_series.__iter__()):
        np.testing.assert_equal(m, p)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___le__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__le__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___len__(data):
    modin_series, pandas_series = create_test_series(data)
    assert len(modin_series) == len(pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___long__(data):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series[0].__long__()
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series[0].__long__()
    else:
        assert modin_series[0].__long__() == pandas_result


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___lt__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__lt__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___mod__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__mod__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___mul__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__mul__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___ne__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__ne__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___neg__(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda ser: ser.__neg__())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___or__(data, request):
    modin_series, pandas_series = create_test_series(data)
    expected_exception = None
    if "float_nan_data" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7081
        expected_exception = False
    inter_df_math_helper(
        modin_series,
        pandas_series,
        "__or__",
        # https://github.com/modin-project/modin/issues/5966
        comparator_kwargs={"check_dtypes": False},
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___pow__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__pow__")


@pytest.mark.parametrize("name", ["Dates", None])
@pytest.mark.parametrize(
    "dt_index", [True, False], ids=["dt_index_true", "dt_index_false"]
)
@pytest.mark.parametrize(
    "data",
    [*test_data_values, "empty"],
    ids=[*test_data_keys, "empty"],
)
def test___repr__(name, dt_index, data):
    if data == "empty":
        modin_series, pandas_series = pd.Series(), pandas.Series()
    else:
        modin_series, pandas_series = create_test_series(data)
    pandas_series.name = modin_series.name = name
    if dt_index:
        index = pandas.date_range(
            "1/1/2000", periods=len(pandas_series.index), freq="min"
        )
        pandas_series.index = modin_series.index = index

    assert repr(modin_series) == repr(pandas_series)


def test___repr__4186():
    modin_series, pandas_series = create_test_series(
        ["a", "b", "c", "a"], dtype="category"
    )
    assert repr(modin_series) == repr(pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.exclude_in_sanity
def test___round__(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(round(modin_series), round(pandas_series))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.exclude_in_sanity
def test___setitem__(data):
    modin_series, pandas_series = create_test_series(data)
    for key in modin_series.keys():
        modin_series[key] = 0
        pandas_series[key] = 0
        df_equals(modin_series, pandas_series)


@pytest.mark.parametrize(
    "key",
    [
        pytest.param(lambda idx: slice(1, 3), id="location_based_slice"),
        pytest.param(lambda idx: slice(idx[1], idx[-1]), id="index_based_slice"),
        pytest.param(lambda idx: [idx[0], idx[2], idx[-1]], id="list_of_labels"),
        pytest.param(
            lambda idx: [True if i % 2 else False for i in range(len(idx))],
            id="boolean_mask",
        ),
    ],
)
@pytest.mark.parametrize(
    "index",
    [
        pytest.param(
            lambda idx_len: [chr(x) for x in range(ord("a"), ord("a") + idx_len)],
            id="str_index",
        ),
        pytest.param(lambda idx_len: list(range(1, idx_len + 1)), id="int_index"),
    ],
)
def test___setitem___non_hashable(key, index):
    data = np.arange(5)
    index = index(len(data))
    key = key(index)
    md_sr, pd_sr = create_test_series(data, index=index)

    md_sr[key] = 10
    pd_sr[key] = 10
    df_equals(md_sr, pd_sr)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___sizeof__(data):
    modin_series, pandas_series = create_test_series(data)
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.__sizeof__()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___str__(data):
    modin_series, pandas_series = create_test_series(data)
    assert str(modin_series) == str(pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___sub__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__sub__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___truediv__(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "__truediv__")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___xor__(data, request):
    modin_series, pandas_series = create_test_series(data)
    expected_exception = None
    if "float_nan_data" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7081
        expected_exception = False
    inter_df_math_helper(
        modin_series,
        pandas_series,
        "__xor__",
        # https://github.com/modin-project/modin/issues/5966
        comparator_kwargs={"check_dtypes": False},
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_abs(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.abs(), pandas_series.abs())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_add(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "add")


def test_add_does_not_change_original_series_name():
    # See https://github.com/modin-project/modin/issues/5232
    s1 = pd.Series(1, name=1)
    s2 = pd.Series(2, name=2)
    original_s1 = s1.copy(deep=True)
    original_s2 = s2.copy(deep=True)
    _ = s1 + s2
    df_equals(s1, original_s1)
    df_equals(s2, original_s2)


@pytest.mark.parametrize("axis", [None, 0, 1])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_add_prefix(data, axis):
    expected_exception = None
    if axis:
        expected_exception = ValueError("No axis named 1 for object type Series")
    eval_general(
        *create_test_series(data),
        lambda df: df.add_prefix("PREFIX_ADD_", axis=axis),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("axis", [None, 0, 1])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_add_suffix(data, axis):
    expected_exception = None
    if axis:
        expected_exception = ValueError("No axis named 1 for object type Series")
    eval_general(
        *create_test_series(data),
        lambda df: df.add_suffix("SUFFIX_ADD_", axis=axis),
        expected_exception=expected_exception,
    )


def test_add_custom_class():
    # see https://github.com/modin-project/modin/issues/5236
    # Test that we can add any object that is addable to pandas object data
    # via "+".
    eval_general(
        *create_test_series(test_data["int_data"]),
        lambda df: df + CustomIntegerForAddition(4),
    )


def test_aggregate_alias():
    # It's optimization. If failed, Series.agg should be tested explicitly
    _assert_casting_functions_wrap_same_implementation(
        pd.Series.aggregate, pd.Series.agg
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("func", agg_func_values, ids=agg_func_keys)
def test_aggregate(data, func, request):
    expected_exception = None
    if "should raise AssertionError" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7031
        expected_exception = False
    eval_general(
        *create_test_series(data),
        lambda df: df.aggregate(func),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("func", agg_func_except_values, ids=agg_func_except_keys)
def test_aggregate_except(data, func):
    # SpecificationError is arisen because we treat a Series as a DataFrame.
    # See details in pandas issues 36036.
    with pytest.raises(SpecificationError):
        eval_general(
            *create_test_series(data),
            lambda df: df.aggregate(func),
        )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_aggregate_error_checking(data):
    modin_series, pandas_series = create_test_series(data)

    assert pandas_series.aggregate("ndim") == 1
    assert modin_series.aggregate("ndim") == 1

    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.aggregate("cumprod"),
    )
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.aggregate("NOT_EXISTS"),
        expected_exception=AttributeError(
            "'NOT_EXISTS' is not a valid function for 'Series' object"
        ),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_align(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.align(modin_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_all(data, skipna):
    eval_general(*create_test_series(data), lambda df: df.all(skipna=skipna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_any(data, skipna):
    eval_general(*create_test_series(data), lambda df: df.any(skipna=skipna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_append(data):
    modin_series, pandas_series = create_test_series(data)

    data_to_append = {"append_a": 2, "append_b": 1000}

    ignore_idx_values = [True, False]

    for ignore in ignore_idx_values:
        try:
            pandas_result = pandas_series.append(data_to_append, ignore_index=ignore)
        except Exception as err:
            with pytest.raises(type(err)):
                modin_series.append(data_to_append, ignore_index=ignore)
        else:
            modin_result = modin_series.append(data_to_append, ignore_index=ignore)
            df_equals(modin_result, pandas_result)

    try:
        pandas_result = pandas_series.append(pandas_series.iloc[-1])
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.append(modin_series.iloc[-1])
    else:
        modin_result = modin_series.append(modin_series.iloc[-1])
        df_equals(modin_result, pandas_result)

    try:
        pandas_result = pandas_series.append([pandas_series.iloc[-1]])
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.append([modin_series.iloc[-1]])
    else:
        modin_result = modin_series.append([modin_series.iloc[-1]])
        df_equals(modin_result, pandas_result)

    verify_integrity_values = [True, False]

    for verify_integrity in verify_integrity_values:
        try:
            pandas_result = pandas_series.append(
                [pandas_series, pandas_series], verify_integrity=verify_integrity
            )
        except Exception as err:
            with pytest.raises(type(err)):
                modin_series.append(
                    [modin_series, modin_series], verify_integrity=verify_integrity
                )
        else:
            modin_result = modin_series.append(
                [modin_series, modin_series], verify_integrity=verify_integrity
            )
            df_equals(modin_result, pandas_result)

        try:
            pandas_result = pandas_series.append(
                pandas_series, verify_integrity=verify_integrity
            )
        except Exception as err:
            with pytest.raises(type(err)):
                modin_series.append(modin_series, verify_integrity=verify_integrity)
        else:
            modin_result = modin_series.append(
                modin_series, verify_integrity=verify_integrity
            )
            df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("func", agg_func_values, ids=agg_func_keys)
def test_apply(data, func, request):
    expected_exception = None
    if "should raise AssertionError" in request.node.callspec.id:
        # FIXME: https://github.com/modin-project/modin/issues/7031
        expected_exception = False
    elif "df sum" in request.node.callspec.id:
        _type = "int" if "int_data" in request.node.callspec.id else "float"
        expected_exception = AttributeError(f"'{_type}' object has no attribute 'sum'")
    eval_general(
        *create_test_series(data),
        lambda df: df.apply(func),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("func", agg_func_except_values, ids=agg_func_except_keys)
def test_apply_except(data, func):
    eval_general(
        *create_test_series(data),
        lambda df: df.apply(func),
        expected_exception=pandas.errors.SpecificationError(
            "Function names must be unique if there is no new column names assigned"
        ),
    )


def test_apply_external_lib():
    json_string = """
    {
        "researcher": {
            "name": "Ford Prefect",
            "species": "Betelgeusian",
            "relatives": [
                {
                    "name": "Zaphod Beeblebrox",
                    "species": "Betelgeusian"
                }
            ]
        }
    }
    """
    modin_result = pd.DataFrame.from_dict({"a": [json_string]}).a.apply(json.loads)
    pandas_result = pandas.DataFrame.from_dict({"a": [json_string]}).a.apply(json.loads)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("axis", [None, 0, 1])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("func", ["count", "all", "kurt", "array", "searchsorted"])
def test_apply_text_func(data, func, axis):
    func_kwargs = {}
    if func not in ("count", "searchsorted"):
        func_kwargs["axis"] = axis
    elif not axis:
        # FIXME: https://github.com/modin-project/modin/issues/7000
        return
    rows_number = len(next(iter(data.values())))  # length of the first data column
    level_0 = np.random.choice([0, 1, 2], rows_number)
    level_1 = np.random.choice([3, 4, 5], rows_number)
    index = pd.MultiIndex.from_arrays([level_0, level_1])

    modin_series, pandas_series = create_test_series(data)
    modin_series.index = index
    pandas_series.index = index

    if func == "searchsorted":
        # required parameter
        func_kwargs["value"] = pandas_series[1]

    eval_general(modin_series, pandas_series, lambda df: df.apply(func, **func_kwargs))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [True, False])
def test_argmax(data, skipna):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.argmax(skipna=skipna), pandas_series.argmax(skipna=skipna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [True, False])
def test_argmin(data, skipna):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.argmin(skipna=skipna), pandas_series.argmin(skipna=skipna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_argsort(data):
    modin_series, pandas_series = create_test_series(data)
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_result = modin_series.argsort()
    df_equals(modin_result, pandas_series.argsort())


def test_asfreq():
    index = pd.date_range("1/1/2000", periods=4, freq="min")
    series = pd.Series([0.0, None, 2.0, 3.0], index=index)
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(series)
    ):
        # We are only testing that this defaults to pandas, so we will just check for
        # the warning
        series.asfreq(freq="30S")


@pytest.mark.parametrize(
    "where",
    [
        20,
        30,
        [10, 40],
        [20, 30],
        [20],
        25,
        [25, 45],
        [25, 30],
        pandas.Index([20, 30]),
        pandas.Index([10]),
    ],
)
def test_asof(where):
    # With NaN:
    values = [1, 2, np.nan, 4]
    index = [10, 20, 30, 40]
    modin_series, pandas_series = (
        pd.Series(values, index=index),
        pandas.Series(values, index=index),
    )
    df_equals(modin_series.asof(where), pandas_series.asof(where))

    # No NaN:
    values = [1, 2, 7, 4]
    modin_series, pandas_series = (
        pd.Series(values, index=index),
        pandas.Series(values, index=index),
    )
    df_equals(modin_series.asof(where), pandas_series.asof(where))


@pytest.mark.parametrize(
    "where",
    [20, 30, [10.5, 40.5], [10], pandas.Index([20, 30]), pandas.Index([10.5])],
)
def test_asof_large(where):
    values = test_data["float_nan_data"]["col1"]
    index = list(range(len(values)))
    modin_series, pandas_series = (
        pd.Series(values, index=index),
        pandas.Series(values, index=index),
    )
    df_equals(modin_series.asof(where), pandas_series.asof(where))


@pytest.mark.parametrize(
    "data",
    [
        test_data["int_data"],
        test_data["float_nan_data"],
    ],
    ids=test_data_keys,
)
def test_astype(data, request):
    modin_series, pandas_series = create_test_series(data)
    series_name = "test_series"
    modin_series.name = pandas_series.name = series_name

    eval_general(modin_series, pandas_series, lambda df: df.astype(str))
    expected_exception = None
    if "float_nan_data" in request.node.callspec.id:
        expected_exception = pd.errors.IntCastingNaNError(
            "Cannot convert non-finite values (NA or inf) to integer"
        )
    eval_general(
        modin_series,
        pandas_series,
        lambda ser: ser.astype(np.int64),
        expected_exception=expected_exception,
    )
    eval_general(modin_series, pandas_series, lambda ser: ser.astype(np.float64))
    eval_general(
        modin_series, pandas_series, lambda ser: ser.astype({series_name: str})
    )
    # FIXME: https://github.com/modin-project/modin/issues/7039
    eval_general(
        modin_series,
        pandas_series,
        lambda ser: ser.astype({"wrong_name": str}),
        expected_exception=False,
    )

    # TODO(https://github.com/modin-project/modin/issues/4317): Test passing a
    # dict to astype() for a series with no name.


@pytest.mark.parametrize("dtype", ["int32", "float32"])
def test_astype_32_types(dtype):
    # https://github.com/modin-project/modin/issues/6881
    assert pd.Series([1, 2, 6]).astype(dtype).dtype == dtype


@pytest.mark.parametrize(
    "data", [["A", "A", "B", "B", "A"], [1, 1, 2, 1, 2, 2, 3, 1, 2, 1, 2]]
)
def test_astype_categorical(data):
    modin_df, pandas_df = create_test_series(data)

    modin_result = modin_df.astype("category")
    pandas_result = pandas_df.astype("category")
    df_equals(modin_result, pandas_result)
    assert modin_result.dtype == pandas_result.dtype

    dtype = pd.CategoricalDtype(categories=sorted(set(data)))
    modin_result = modin_df.astype(dtype)
    pandas_result = pandas_df.astype(dtype)
    df_equals(modin_result, pandas_result)
    assert modin_result.dtype == pandas_result.dtype


@pytest.mark.parametrize("data", [["a", "a", "b", "c", "c", "d", "b", "d"]])
@pytest.mark.parametrize(
    "set_min_row_partition_size",
    [2, 4],
    ids=["four_row_partitions", "two_row_partitions"],
    indirect=True,
)
def test_astype_categorical_issue5722(data, set_min_row_partition_size):
    modin_series, pandas_series = create_test_series(data)

    modin_result = modin_series.astype("category")
    pandas_result = pandas_series.astype("category")
    df_equals(modin_result, pandas_result)
    assert modin_result.dtype == pandas_result.dtype

    pandas_result1, pandas_result2 = pandas_result.iloc[:4], pandas_result.iloc[4:]
    modin_result1, modin_result2 = modin_result.iloc[:4], modin_result.iloc[4:]

    # check categories
    assert pandas_result1.cat.categories.equals(pandas_result2.cat.categories)
    assert modin_result1.cat.categories.equals(modin_result2.cat.categories)
    assert pandas_result1.cat.categories.equals(modin_result1.cat.categories)
    assert pandas_result2.cat.categories.equals(modin_result2.cat.categories)

    # check codes
    assert_array_equal(pandas_result1.cat.codes.values, modin_result1.cat.codes.values)
    assert_array_equal(pandas_result2.cat.codes.values, modin_result2.cat.codes.values)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_at(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(
        modin_series.at[modin_series.index[0]], pandas_series.at[pandas_series.index[0]]
    )
    df_equals(
        modin_series.at[modin_series.index[-1]], pandas_series[pandas_series.index[-1]]
    )


def test_at_time():
    i = pd.date_range("2008-01-01", periods=1000, freq="12H")
    modin_series = pd.Series(list(range(1000)), index=i)
    pandas_series = pandas.Series(list(range(1000)), index=i)
    df_equals(modin_series.at_time("12:00"), pandas_series.at_time("12:00"))
    df_equals(modin_series.at_time("3:00"), pandas_series.at_time("3:00"))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("lag", [1, 2, 3])
def test_autocorr(data, lag):
    modin_series, pandas_series = create_test_series(data)
    modin_result = modin_series.autocorr(lag=lag)
    pandas_result = pandas_series.autocorr(lag=lag)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_axes(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.axes[0].equals(pandas_series.axes[0])
    assert len(modin_series.axes) == len(pandas_series.axes)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_attrs(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda df: df.attrs)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_array(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda df: df.array)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_between(data):
    modin_series, pandas_series = create_test_series(data)

    df_equals(
        modin_series.between(1, 4),
        pandas_series.between(1, 4),
    )


def test_between_time():
    i = pd.date_range("2008-01-01", periods=1000, freq="12H")
    modin_series = pd.Series(list(range(1000)), index=i)
    pandas_series = pandas.Series(list(range(1000)), index=i)
    df_equals(
        modin_series.between_time("12:00", "17:00"),
        pandas_series.between_time("12:00", "17:00"),
    )
    df_equals(
        modin_series.between_time("3:00", "8:00"),
        pandas_series.between_time("3:00", "8:00"),
    )
    df_equals(
        modin_series.between_time("3:00", "8:00", inclusive="right"),
        pandas_series.between_time("3:00", "8:00", inclusive="right"),
    )


def test_add_series_to_timedeltaindex():
    # Make a pandas.core.indexes.timedeltas.TimedeltaIndex
    deltas = pd.to_timedelta([1], unit="h")
    test_series = create_test_series(np.datetime64("2000-12-12"))
    eval_general(*test_series, lambda s: s + deltas)
    eval_general(*test_series, lambda s: s - deltas)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_bfill(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.bfill(), pandas_series.bfill())
    # inplace
    modin_series_cp = modin_series.copy()
    pandas_series_cp = pandas_series.copy()
    modin_series_cp.bfill(inplace=True)
    pandas_series_cp.bfill(inplace=True)
    df_equals(modin_series_cp, pandas_series_cp)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_bool(data):
    modin_series, _ = create_test_series(data)

    with pytest.warns(
        FutureWarning, match="bool is now deprecated and will be removed"
    ):
        with pytest.raises(ValueError):
            modin_series.bool()
    with pytest.raises(ValueError):
        modin_series.__bool__()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("bound_type", ["list", "series"], ids=["list", "series"])
def test_clip_scalar(request, data, bound_type):
    modin_series, pandas_series = create_test_series(
        data,
    )

    if name_contains(request.node.name, numeric_dfs):
        # set bounds
        lower, upper = np.sort(random_state.randint(RAND_LOW, RAND_HIGH, 2))

        # test only upper scalar bound
        modin_result = modin_series.clip(None, upper)
        pandas_result = pandas_series.clip(None, upper)
        df_equals(modin_result, pandas_result)

        # test lower and upper scalar bound
        modin_result = modin_series.clip(lower, upper)
        pandas_result = pandas_series.clip(lower, upper)
        df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("bound_type", ["list", "series"], ids=["list", "series"])
def test_clip_sequence(request, data, bound_type):
    modin_series, pandas_series = create_test_series(
        data,
    )

    if name_contains(request.node.name, numeric_dfs):
        lower = random_state.randint(RAND_LOW, RAND_HIGH, len(pandas_series))
        upper = random_state.randint(RAND_LOW, RAND_HIGH, len(pandas_series))

        if bound_type == "series":
            modin_lower = pd.Series(lower)
            pandas_lower = pandas.Series(lower)
            modin_upper = pd.Series(upper)
            pandas_upper = pandas.Series(upper)
        else:
            modin_lower = pandas_lower = lower
            modin_upper = pandas_upper = upper

        # test lower and upper list bound
        modin_result = modin_series.clip(modin_lower, modin_upper, axis=0)
        pandas_result = pandas_series.clip(pandas_lower, pandas_upper)
        df_equals(modin_result, pandas_result)

        # test only upper list bound
        modin_result = modin_series.clip(np.nan, modin_upper, axis=0)
        pandas_result = pandas_series.clip(np.nan, pandas_upper)
        df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_combine(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    modin_series2 = modin_series % (max(modin_series) // 2)
    modin_series.combine(modin_series2, lambda s1, s2: s1 if s1 < s2 else s2)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_combine_first(data):
    modin_series, pandas_series = create_test_series(data)
    modin_series2 = modin_series % (max(modin_series) // 2)
    pandas_series2 = pandas_series % (max(pandas_series) // 2)
    modin_result = modin_series.combine_first(modin_series2)
    pandas_result = pandas_series.combine_first(pandas_series2)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_compress(data):
    modin_series, pandas_series = create_test_series(data)  # noqa: F841
    try:
        pandas_series.compress(pandas_series > 30)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.compress(modin_series > 30)
    else:
        modin_series.compress(modin_series > 30)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_constructor(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series, pandas_series)
    df_equals(pd.Series(modin_series), pandas.Series(pandas_series))


def test_constructor_columns_and_index():
    modin_series = pd.Series([1, 1, 10], index=[1, 2, 3], name="health")
    pandas_series = pandas.Series([1, 1, 10], index=[1, 2, 3], name="health")
    df_equals(modin_series, pandas_series)
    df_equals(pd.Series(modin_series), pandas.Series(pandas_series))
    df_equals(
        pd.Series(modin_series, name="max_speed"),
        pandas.Series(pandas_series, name="max_speed"),
    )
    df_equals(
        pd.Series(modin_series, index=[1, 2]),
        pandas.Series(pandas_series, index=[1, 2]),
    )
    with pytest.raises(NotImplementedError):
        pd.Series(modin_series, index=[1, 2, 99999])


def test_constructor_arrow_extension_array():
    # example from pandas docs
    pa = pytest.importorskip("pyarrow")
    array = pd.arrays.ArrowExtensionArray(
        pa.array(
            [{"1": "2"}, {"10": "20"}, None],
            type=pa.map_(pa.string(), pa.string()),
        )
    )
    md_ser, pd_ser = create_test_series(array)
    df_equals(md_ser, pd_ser)
    df_equals(md_ser.dtypes, pd_ser.dtypes)


def test_pyarrow_backed_constructor():
    pa = pytest.importorskip("pyarrow")
    data = list("abcd")
    df_equals(*create_test_series(data, dtype="string[pyarrow]"))
    df_equals(*create_test_series(data, dtype=pd.ArrowDtype(pa.string())))

    data = [["hello"], ["there"]]
    list_str_type = pa.list_(pa.string())
    df_equals(*create_test_series(data, dtype=pd.ArrowDtype(list_str_type)))


def test_pyarrow_backed_functions():
    pytest.importorskip("pyarrow")
    modin_series, pandas_series = create_test_series(
        [-1.545, 0.211, None], dtype="float32[pyarrow]"
    )
    df_equals(modin_series.mean(), pandas_series.mean())

    def comparator(df1, df2):
        df_equals(df1, df2)
        df_equals(df1.dtypes, df2.dtypes)

    eval_general(
        modin_series,
        pandas_series,
        lambda ser: ser
        + (modin_series if isinstance(ser, pd.Series) else pandas_series),
        comparator=comparator,
    )

    eval_general(
        modin_series,
        pandas_series,
        lambda ser: ser > (ser + 1),
        comparator=comparator,
    )

    eval_general(
        modin_series,
        pandas_series,
        lambda ser: ser.dropna(),
        comparator=comparator,
    )

    eval_general(
        modin_series,
        pandas_series,
        lambda ser: ser.isna(),
        comparator=comparator,
    )

    eval_general(
        modin_series,
        pandas_series,
        lambda ser: ser.fillna(0),
        comparator=comparator,
    )


def test_pyarrow_array_retrieve():
    pa = pytest.importorskip("pyarrow")
    modin_series, pandas_series = create_test_series(
        [1, 2, None], dtype="uint8[pyarrow]"
    )
    eval_general(
        modin_series,
        pandas_series,
        lambda ser: pa.array(ser),
    )


def test___arrow_array__():
    # https://github.com/modin-project/modin/issues/6808
    pa = pytest.importorskip("pyarrow")
    mpd_df_1 = pd.DataFrame({"a": ["1", "2", "3"], "b": ["4", "5", "6"]})
    mpd_df_2 = pd.DataFrame({"a": ["7", "8", "9"], "b": ["10", "11", "12"]})
    test_df = pd.concat([mpd_df_1, mpd_df_2])

    res_from_md = pa.Table.from_pandas(df=test_df)
    res_from_pd = pa.Table.from_pandas(df=test_df._to_pandas())
    assert res_from_md.equals(res_from_pd)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_copy(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series, modin_series.copy())
    df_equals(modin_series.copy(), pandas_series)
    df_equals(modin_series.copy(), pandas_series.copy())


def test_copy_empty_series():
    ser = pd.Series(range(3))
    res = ser[:0].copy()
    assert res.dtype == ser.dtype


@pytest.mark.parametrize("method", ["pearson", "kendall"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_corr(data, method):
    modin_series, pandas_series = create_test_series(data)
    modin_result = modin_series.corr(modin_series, method=method)
    pandas_result = pandas_series.corr(pandas_series, method=method)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data",
    test_data_values + test_data_large_categorical_series_values,
    ids=test_data_keys + test_data_large_categorical_series_keys,
)
def test_count(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.count(), pandas_series.count())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_cov(data):
    modin_series, pandas_series = create_test_series(data)
    modin_result = modin_series.cov(modin_series)
    pandas_result = pandas_series.cov(pandas_series)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_cummax(data, skipna):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.cummax(skipna=skipna)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.cummax(skipna=skipna)
    else:
        df_equals(modin_series.cummax(skipna=skipna), pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_cummin(data, skipna):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.cummin(skipna=skipna)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.cummin(skipna=skipna)
    else:
        df_equals(modin_series.cummin(skipna=skipna), pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_cumprod(data, skipna):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.cumprod(skipna=skipna)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.cumprod(skipna=skipna)
    else:
        df_equals(modin_series.cumprod(skipna=skipna), pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_cumsum(data, skipna):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.cumsum(skipna=skipna)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.cumsum(skipna=skipna)
    else:
        df_equals(modin_series.cumsum(skipna=skipna), pandas_result)


def test_cumsum_6771():
    _ = to_pandas(pd.Series([1, 2, 3], dtype="Int64").cumsum())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_describe(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.describe(), pandas_series.describe())
    percentiles = [0.10, 0.11, 0.44, 0.78, 0.99]
    df_equals(
        modin_series.describe(percentiles=percentiles),
        pandas_series.describe(percentiles=percentiles),
    )

    try:
        pandas_result = pandas_series.describe(exclude=[np.float64])
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.describe(exclude=[np.float64])
    else:
        modin_result = modin_series.describe(exclude=[np.float64])
        df_equals(modin_result, pandas_result)

    try:
        pandas_result = pandas_series.describe(exclude=np.float64)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.describe(exclude=np.float64)
    else:
        modin_result = modin_series.describe(exclude=np.float64)
        df_equals(modin_result, pandas_result)

    try:
        pandas_result = pandas_series.describe(
            include=[np.timedelta64, np.datetime64, np.object_, np.bool_]
        )
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.describe(
                include=[np.timedelta64, np.datetime64, np.object_, np.bool_]
            )
    else:
        modin_result = modin_series.describe(
            include=[np.timedelta64, np.datetime64, np.object_, np.bool_]
        )
        df_equals(modin_result, pandas_result)

    modin_result = modin_series.describe(include=str(modin_series.dtypes))
    pandas_result = pandas_series.describe(include=str(pandas_series.dtypes))
    df_equals(modin_result, pandas_result)

    modin_result = modin_series.describe(include=[np.number])
    pandas_result = pandas_series.describe(include=[np.number])
    df_equals(modin_result, pandas_result)

    df_equals(
        modin_series.describe(include="all"), pandas_series.describe(include="all")
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "periods", int_arg_values, ids=arg_keys("periods", int_arg_keys)
)
def test_diff(data, periods):
    modin_series, pandas_series = create_test_series(data)

    try:
        pandas_result = pandas_series.diff(periods=periods)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.diff(periods=periods)
    else:
        modin_result = modin_series.diff(periods=periods)
        df_equals(modin_result, pandas_result)

    try:
        pandas_result = pandas_series.T.diff(periods=periods)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.T.diff(periods=periods)
    else:
        modin_result = modin_series.T.diff(periods=periods)
        df_equals(modin_result, pandas_result)


def test_diff_with_dates():
    data = pandas.date_range("2018-01-01", periods=15, freq="h").values
    pandas_series = pandas.Series(data)
    modin_series = pd.Series(pandas_series)

    # Check that `diff` with datetime types works correctly.
    pandas_result = pandas_series.diff()
    modin_result = modin_series.diff()
    df_equals(modin_result, pandas_result)

    # Check that `diff` with timedelta types works correctly.
    td_pandas_result = pandas_result.diff()
    td_modin_result = modin_result.diff()
    df_equals(td_modin_result, td_pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_div(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "div")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_divide(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "divide")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dot(data):
    modin_series, pandas_series = create_test_series(data)
    ind_len = len(modin_series)

    # Test 1D array input
    arr = np.arange(ind_len)
    modin_result = modin_series.dot(arr)
    pandas_result = pandas_series.dot(arr)
    df_equals(modin_result, pandas_result)

    # Test 2D array input
    arr = np.arange(ind_len * 2).reshape(ind_len, 2)
    modin_result = modin_series.dot(arr)
    pandas_result = pandas_series.dot(arr)
    assert_array_equal(modin_result, pandas_result)

    # Test bad dimensions
    with pytest.raises(ValueError):
        modin_series.dot(np.arange(ind_len + 10))

    # Test dataframe input
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    modin_result = modin_series.dot(modin_df)
    pandas_result = pandas_series.dot(pandas_df)
    df_equals(modin_result, pandas_result)

    # Test series input
    modin_series_2 = pd.Series(np.arange(ind_len), index=modin_series.index)
    pandas_series_2 = pandas.Series(np.arange(ind_len), index=pandas_series.index)
    modin_result = modin_series.dot(modin_series_2)
    pandas_result = pandas_series.dot(pandas_series_2)
    df_equals(modin_result, pandas_result)

    # Test when input series index doesn't line up with columns
    with pytest.raises(ValueError):
        modin_series.dot(
            pd.Series(
                np.arange(ind_len), index=["a" for _ in range(len(modin_series.index))]
            )
        )

    # Test case when left series has size (1 x 1)
    # and right dataframe has size (1 x n)
    modin_result = pd.Series([1]).dot(pd.DataFrame(modin_series).T)
    pandas_result = pandas.Series([1]).dot(pandas.DataFrame(pandas_series).T)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_matmul(data):
    modin_series, pandas_series = create_test_series(data)  # noqa: F841
    ind_len = len(modin_series)

    # Test 1D array input
    arr = np.arange(ind_len)
    modin_result = modin_series @ arr
    pandas_result = pandas_series @ arr
    df_equals(modin_result, pandas_result)

    # Test 2D array input
    arr = np.arange(ind_len * 2).reshape(ind_len, 2)
    modin_result = modin_series @ arr
    pandas_result = pandas_series @ arr
    assert_array_equal(modin_result, pandas_result)

    # Test bad dimensions
    with pytest.raises(ValueError):
        modin_series @ np.arange(ind_len + 10)

    # Test dataframe input
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    modin_result = modin_series @ modin_df
    pandas_result = pandas_series @ pandas_df
    df_equals(modin_result, pandas_result)

    # Test series input
    modin_series_2 = pd.Series(np.arange(ind_len), index=modin_series.index)
    pandas_series_2 = pandas.Series(np.arange(ind_len), index=pandas_series.index)
    modin_result = modin_series @ modin_series_2
    pandas_result = pandas_series @ pandas_series_2
    df_equals(modin_result, pandas_result)

    # Test when input series index doesn't line up with columns
    with pytest.raises(ValueError):
        modin_series @ pd.Series(
            np.arange(ind_len), index=["a" for _ in range(len(modin_series.index))]
        )


@pytest.mark.xfail(reason="Using pandas Series.")
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_drop(data):
    modin_series = create_test_series(data)

    with pytest.raises(NotImplementedError):
        modin_series.drop(None, None, None, None)


@pytest.mark.parametrize(
    "data", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys
)
@pytest.mark.parametrize(
    "keep", ["last", "first", False], ids=["last", "first", "False"]
)
@pytest.mark.parametrize("inplace", [True, False], ids=["True", "False"])
def test_drop_duplicates(data, keep, inplace):
    modin_series, pandas_series = create_test_series(data)
    modin_res = modin_series.drop_duplicates(keep=keep, inplace=inplace)
    pandas_res = pandas_series.drop_duplicates(keep=keep, inplace=inplace)
    if inplace:
        sort_if_range_partitioning(modin_series, pandas_series)
    else:
        sort_if_range_partitioning(modin_res, pandas_res)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("how", ["any", "all"], ids=["any", "all"])
def test_dropna(data, how):
    modin_series, pandas_series = create_test_series(data)
    modin_result = modin_series.dropna(how=how)
    pandas_result = pandas_series.dropna(how=how)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dropna_inplace(data):
    modin_series, pandas_series = create_test_series(data)
    pandas_result = pandas_series.dropna()
    modin_series.dropna(inplace=True)
    df_equals(modin_series, pandas_result)

    modin_series, pandas_series = create_test_series(data)
    pandas_series.dropna(how="any", inplace=True)
    modin_series.dropna(how="any", inplace=True)
    df_equals(modin_series, pandas_series)


def test_dtype_empty():
    modin_series, pandas_series = pd.Series(), pandas.Series()
    assert modin_series.dtype == pandas_series.dtype


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_dtype(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.dtype, modin_series.dtypes)
    df_equals(modin_series.dtype, pandas_series.dtype)
    df_equals(modin_series.dtype, pandas_series.dtypes)


# Bug https://github.com/modin-project/modin/issues/4436 in
# Series.dt.to_pydatetime is only reproducible when the date range out of which
# the frame is created has timezone None, so that its dtype is datetime64[ns]
# as opposed to, e.g. datetime64[ns, Europe/Berlin]. To reproduce that bug, we
# use timezones None and Europe/Berlin.
@pytest.mark.parametrize(
    "timezone",
    [
        pytest.param(None),
        pytest.param("Europe/Berlin"),
    ],
)
def test_dt(timezone):
    data = pd.date_range("2016-12-31", periods=128, freq="D", tz=timezone)
    modin_series = pd.Series(data)
    pandas_series = pandas.Series(data)

    df_equals(modin_series.dt.date, pandas_series.dt.date)
    df_equals(modin_series.dt.time, pandas_series.dt.time)
    df_equals(modin_series.dt.timetz, pandas_series.dt.timetz)
    df_equals(modin_series.dt.year, pandas_series.dt.year)
    df_equals(modin_series.dt.month, pandas_series.dt.month)
    df_equals(modin_series.dt.day, pandas_series.dt.day)
    df_equals(modin_series.dt.hour, pandas_series.dt.hour)
    df_equals(modin_series.dt.minute, pandas_series.dt.minute)
    df_equals(modin_series.dt.second, pandas_series.dt.second)
    df_equals(modin_series.dt.microsecond, pandas_series.dt.microsecond)
    df_equals(modin_series.dt.nanosecond, pandas_series.dt.nanosecond)
    df_equals(modin_series.dt.dayofweek, pandas_series.dt.dayofweek)
    df_equals(modin_series.dt.day_of_week, pandas_series.dt.day_of_week)
    df_equals(modin_series.dt.weekday, pandas_series.dt.weekday)
    df_equals(modin_series.dt.dayofyear, pandas_series.dt.dayofyear)
    df_equals(modin_series.dt.day_of_year, pandas_series.dt.day_of_year)
    df_equals(modin_series.dt.unit, pandas_series.dt.unit)
    df_equals(modin_series.dt.as_unit("s"), pandas_series.dt.as_unit("s"))
    df_equals(modin_series.dt.isocalendar(), pandas_series.dt.isocalendar())
    df_equals(modin_series.dt.quarter, pandas_series.dt.quarter)
    df_equals(modin_series.dt.is_month_start, pandas_series.dt.is_month_start)
    df_equals(modin_series.dt.is_month_end, pandas_series.dt.is_month_end)
    df_equals(modin_series.dt.is_quarter_start, pandas_series.dt.is_quarter_start)
    df_equals(modin_series.dt.is_quarter_end, pandas_series.dt.is_quarter_end)
    df_equals(modin_series.dt.is_year_start, pandas_series.dt.is_year_start)
    df_equals(modin_series.dt.is_year_end, pandas_series.dt.is_year_end)
    df_equals(modin_series.dt.is_leap_year, pandas_series.dt.is_leap_year)
    df_equals(modin_series.dt.daysinmonth, pandas_series.dt.daysinmonth)
    df_equals(modin_series.dt.days_in_month, pandas_series.dt.days_in_month)
    assert modin_series.dt.tz == pandas_series.dt.tz
    assert modin_series.dt.freq == pandas_series.dt.freq
    df_equals(modin_series.dt.to_period("W"), pandas_series.dt.to_period("W"))
    assert_array_equal(
        modin_series.dt.to_pydatetime(), pandas_series.dt.to_pydatetime()
    )
    df_equals(
        modin_series.dt.tz_localize(None),
        pandas_series.dt.tz_localize(None),
    )
    if timezone:
        df_equals(
            modin_series.dt.tz_convert(tz="Europe/Berlin"),
            pandas_series.dt.tz_convert(tz="Europe/Berlin"),
        )

    df_equals(modin_series.dt.normalize(), pandas_series.dt.normalize())
    df_equals(
        modin_series.dt.strftime("%B %d, %Y, %r"),
        pandas_series.dt.strftime("%B %d, %Y, %r"),
    )
    df_equals(modin_series.dt.round("h"), pandas_series.dt.round("h"))
    df_equals(modin_series.dt.floor("h"), pandas_series.dt.floor("h"))
    df_equals(modin_series.dt.ceil("h"), pandas_series.dt.ceil("h"))
    df_equals(modin_series.dt.month_name(), pandas_series.dt.month_name())
    df_equals(modin_series.dt.day_name(), pandas_series.dt.day_name())

    modin_series = pd.Series(pd.to_timedelta(np.arange(128), unit="d"))
    pandas_series = pandas.Series(pandas.to_timedelta(np.arange(128), unit="d"))

    assert_array_equal(
        modin_series.dt.to_pytimedelta(), pandas_series.dt.to_pytimedelta()
    )
    df_equals(modin_series.dt.total_seconds(), pandas_series.dt.total_seconds())
    df_equals(modin_series.dt.days, pandas_series.dt.days)
    df_equals(modin_series.dt.seconds, pandas_series.dt.seconds)
    df_equals(modin_series.dt.microseconds, pandas_series.dt.microseconds)
    df_equals(modin_series.dt.nanoseconds, pandas_series.dt.nanoseconds)
    df_equals(modin_series.dt.components, pandas_series.dt.components)

    data_per = pd.date_range("1/1/2012", periods=128, freq="M")
    pandas_series = pandas.Series(data_per, index=data_per).dt.to_period()
    modin_series = pd.Series(data_per, index=data_per).dt.to_period()

    df_equals(modin_series.dt.qyear, pandas_series.dt.qyear)
    df_equals(modin_series.dt.start_time, pandas_series.dt.start_time)
    df_equals(modin_series.dt.end_time, pandas_series.dt.end_time)
    df_equals(modin_series.dt.to_timestamp(), pandas_series.dt.to_timestamp())

    def dt_with_empty_partition(lib):
        # For context, see https://github.com/modin-project/modin/issues/5112
        df = (
            pd.concat(
                [pd.DataFrame([None]), pd.DataFrame([pd.to_timedelta(1)])], axis=1
            )
            .dropna(axis=1)
            .squeeze(1)
        )
        # BaseOnPython had a single partition after the concat, and it
        # maintains that partition after dropna and squeeze. In other execution modes,
        # the series should have two column partitions, one of which is empty.
        if isinstance(df, pd.DataFrame) and get_current_execution() != "BaseOnPython":
            assert df._query_compiler._modin_frame._partitions.shape == (1, 2)
        return df.dt.days

    eval_general(pd, pandas, dt_with_empty_partition)

    if timezone is None:
        data = pd.period_range("2016-12-31", periods=128, freq="D")
        modin_series = pd.Series(data)
        pandas_series = pandas.Series(data)
        df_equals(modin_series.dt.asfreq("min"), pandas_series.dt.asfreq("min"))


@pytest.mark.parametrize(
    "data", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys
)
@pytest.mark.parametrize(
    "keep", ["last", "first", False], ids=["last", "first", "False"]
)
def test_duplicated(data, keep):
    modin_series, pandas_series = create_test_series(data)
    modin_result = modin_series.duplicated(keep=keep)
    df_equals(modin_result, pandas_series.duplicated(keep=keep))


def test_duplicated_keeps_name_issue_7375():
    # Ensure that the name property of a series is preserved across duplicated
    modin_series, pandas_series = create_test_series([1, 2, 3, 1], name="a")
    df_equals(modin_series.duplicated(), pandas_series.duplicated())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_empty(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.empty == pandas_series.empty


def test_empty_series():
    modin_series = pd.Series()
    assert modin_series.empty


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_eq(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "eq")


@pytest.mark.parametrize(
    "series1_data,series2_data,expected_pandas_equals",
    [
        pytest.param([1], [0], False, id="single_unequal_values"),
        pytest.param([None], [None], True, id="single_none_values"),
        pytest.param(
            pandas.Series(1, name="series1"),
            pandas.Series(1, name="series2"),
            True,
            id="different_names",
        ),
        pytest.param(
            pandas.Series([1], index=[1]),
            pandas.Series([1], index=[1.0]),
            True,
            id="different_index_types",
        ),
        pytest.param(
            pandas.Series([1], index=[1]),
            pandas.Series([1], index=[2]),
            False,
            id="different_index_values",
        ),
        pytest.param([1], [1.0], False, id="different_value_types"),
        pytest.param(
            [1, 2],
            [1, 2],
            True,
            id="equal_series_of_length_two",
        ),
        pytest.param(
            [1, 2],
            [1, 3],
            False,
            id="unequal_series_of_length_two",
        ),
        pytest.param(
            [[1, 2]],
            [[1]],
            False,
            id="different_lengths",
        ),
    ],
)
def test_equals(series1_data, series2_data, expected_pandas_equals):
    modin_series1, pandas_df1 = create_test_series(series1_data)
    modin_series2, pandas_df2 = create_test_series(series2_data)

    pandas_equals = pandas_df1.equals(pandas_df2)
    assert pandas_equals == expected_pandas_equals, (
        "Test expected pandas to say the series were"
        + f"{'' if expected_pandas_equals else ' not'} equal, but they were"
        + f"{' not' if expected_pandas_equals else ''} equal."
    )
    assert modin_series1.equals(modin_series2) == pandas_equals
    assert modin_series1.equals(pandas_df2) == pandas_equals


def test_equals_several_partitions():
    modin_series1 = pd.concat([pd.Series([0, 1]), pd.Series([None, 1])])
    modin_series2 = pd.concat([pd.Series([0, 1]), pd.Series([1, None])])
    assert not modin_series1.equals(modin_series2)


def test_equals_with_nans():
    ser1 = pd.Series([0, 1, None], dtype="uint8[pyarrow]")
    ser2 = pd.Series([None, None, None], dtype="uint8[pyarrow]")
    assert not ser1.equals(ser2)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_ewm(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.ewm(halflife=6)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_expanding(data):
    modin_series, pandas_series = create_test_series(data)  # noqa: F841
    df_equals(modin_series.expanding().sum(), pandas_series.expanding().sum())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_factorize(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.factorize()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_ffill(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.ffill(), pandas_series.ffill())
    # inplace
    modin_series_cp = modin_series.copy()
    pandas_series_cp = pandas_series.copy()
    modin_series_cp.ffill(inplace=True)
    pandas_series_cp.ffill(inplace=True)
    df_equals(modin_series_cp, pandas_series_cp)


@pytest.mark.parametrize("limit_area", [None, "inside", "outside"])
@pytest.mark.parametrize("method", ["ffill", "bfill"])
def test_ffill_bfill_limit_area(method, limit_area):
    modin_ser, pandas_ser = create_test_series([1, None, 2, None])
    eval_general(
        modin_ser, pandas_ser, lambda ser: getattr(ser, method)(limit_area=limit_area)
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("reindex", [None, 2, -2])
@pytest.mark.parametrize("limit", [None, 1, 2, 0.5, -1, -2, 1.5])
@pytest.mark.exclude_in_sanity
def test_fillna(data, reindex, limit):
    modin_series, pandas_series = create_test_series(data)
    index = pandas_series.index
    pandas_replace_series = index.to_series().sample(frac=1)
    modin_replace_series = pd.Series(pandas_replace_series)
    replace_dict = pandas_replace_series.to_dict()

    if reindex is not None:
        if reindex > 0:
            pandas_series = pandas_series[:reindex].reindex(index)
        else:
            pandas_series = pandas_series[reindex:].reindex(index)
        # Because of bug #3178 modin Series has to be created from pandas
        # Series instead of performing the same slice and reindex operations.
        modin_series = pd.Series(pandas_series)

    if isinstance(limit, float):
        limit = int(len(modin_series) * limit)
    if limit is not None and limit < 0:
        limit = len(modin_series) + limit

    df_equals(modin_series.fillna(0, limit=limit), pandas_series.fillna(0, limit=limit))
    df_equals(
        modin_series.fillna(method="bfill", limit=limit),
        pandas_series.fillna(method="bfill", limit=limit),
    )
    df_equals(
        modin_series.fillna(method="ffill", limit=limit),
        pandas_series.fillna(method="ffill", limit=limit),
    )
    df_equals(
        modin_series.fillna(modin_replace_series, limit=limit),
        pandas_series.fillna(pandas_replace_series, limit=limit),
    )
    df_equals(
        modin_series.fillna(replace_dict, limit=limit),
        pandas_series.fillna(replace_dict, limit=limit),
    )


@pytest.mark.xfail(reason="Using pandas Series.")
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_filter(data):
    modin_series = create_test_series(data)

    with pytest.raises(NotImplementedError):
        modin_series.filter(None, None, None)


def test_first():
    i = pd.date_range("2010-04-09", periods=400, freq="2D")
    modin_series = pd.Series(list(range(400)), index=i)
    pandas_series = pandas.Series(list(range(400)), index=i)
    with pytest.warns(FutureWarning, match="first is deprecated and will be removed"):
        modin_result = modin_series.first("3D")
    df_equals(modin_result, pandas_series.first("3D"))
    df_equals(modin_series.first("20D"), pandas_series.first("20D"))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_first_valid_index(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.first_valid_index(), pandas_series.first_valid_index())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_floordiv(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "floordiv")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_ge(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "ge")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_get(data):
    modin_series, pandas_series = create_test_series(data)
    for key in modin_series.keys():
        df_equals(modin_series.get(key), pandas_series.get(key))
    df_equals(
        modin_series.get("NO_EXIST", "DEFAULT"),
        pandas_series.get("NO_EXIST", "DEFAULT"),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_gt(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "gt")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_hasnans(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.hasnans == pandas_series.hasnans


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("n", int_arg_values, ids=arg_keys("n", int_arg_keys))
def test_head(data, n):
    modin_series, pandas_series = create_test_series(data)

    df_equals(modin_series.head(n), pandas_series.head(n))
    df_equals(
        modin_series.head(len(modin_series)), pandas_series.head(len(pandas_series))
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_hist(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.hist(None)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_iat(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.iat[0], pandas_series.iat[0])


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_idxmax(data, skipna):
    modin_series, pandas_series = create_test_series(data)
    pandas_result = pandas_series.idxmax(skipna=skipna)
    modin_result = modin_series.idxmax(skipna=skipna)
    df_equals(modin_result, pandas_result)

    pandas_result = pandas_series.T.idxmax(skipna=skipna)
    modin_result = modin_series.T.idxmax(skipna=skipna)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_idxmin(data, skipna):
    modin_series, pandas_series = create_test_series(data)
    pandas_result = pandas_series.idxmin(skipna=skipna)
    modin_result = modin_series.idxmin(skipna=skipna)
    df_equals(modin_result, pandas_result)

    pandas_result = pandas_series.T.idxmin(skipna=skipna)
    modin_result = modin_series.T.idxmin(skipna=skipna)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_iloc(request, data):
    modin_series, pandas_series = create_test_series(data)

    if not name_contains(request.node.name, ["empty_data"]):
        # Scalar
        np.testing.assert_equal(modin_series.iloc[0], pandas_series.iloc[0])

        # Series
        df_equals(modin_series.iloc[1:], pandas_series.iloc[1:])
        df_equals(modin_series.iloc[1:2], pandas_series.iloc[1:2])
        df_equals(modin_series.iloc[[1, 2]], pandas_series.iloc[[1, 2]])

        # Write Item
        modin_series.iloc[[1, 2]] = 42
        pandas_series.iloc[[1, 2]] = 42
        df_equals(modin_series, pandas_series)
        with pytest.raises(IndexingError):
            modin_series.iloc[1:, 1]
    else:
        with pytest.raises(IndexError):
            modin_series.iloc[0]


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_index(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.index, pandas_series.index)
    with pytest.raises(ValueError):
        modin_series.index = list(modin_series.index) + [999]

    modin_series.index = modin_series.index.map(str)
    pandas_series.index = pandas_series.index.map(str)
    df_equals(modin_series.index, pandas_series.index)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_interpolate(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.interpolate()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_is_monotonic_decreasing(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.is_monotonic_decreasing == pandas_series.is_monotonic_decreasing


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_is_monotonic_increasing(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.is_monotonic_increasing == pandas_series.is_monotonic_increasing


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_is_unique(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.is_unique == pandas_series.is_unique


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_isin(data):
    modin_series, pandas_series = create_test_series(data)
    val = [1, 2, 3, 4]
    pandas_result = pandas_series.isin(val)
    modin_result = modin_series.isin(val)
    df_equals(modin_result, pandas_result)


def test_isin_with_series():
    modin_series1, pandas_series1 = create_test_series([1, 2, 3])
    modin_series2, pandas_series2 = create_test_series([1, 2, 3, 4, 5])

    eval_general(
        (modin_series1, modin_series2),
        (pandas_series1, pandas_series2),
        lambda srs: srs[0].isin(srs[1]),
    )

    # Verify that Series actualy behaves like Series and ignores unmatched indices on '.isin'
    modin_series1, pandas_series1 = create_test_series([1, 2, 3], index=[10, 11, 12])

    eval_general(
        (modin_series1, modin_series2),
        (pandas_series1, pandas_series2),
        lambda srs: srs[0].isin(srs[1]),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_isnull(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.isnull(), pandas_series.isnull())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_items(data):
    modin_series, pandas_series = create_test_series(data)

    modin_items = modin_series.items()
    pandas_items = pandas_series.items()
    for modin_item, pandas_item in zip(modin_items, pandas_items):
        modin_index, modin_scalar = modin_item
        pandas_index, pandas_scalar = pandas_item
        df_equals(modin_scalar, pandas_scalar)
        assert pandas_index == modin_index


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_keys(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.keys(), pandas_series.keys())


def test_kurtosis_alias():
    # It's optimization. If failed, Series.kurt should be tested explicitly
    # in tests: `test_kurt_kurtosis`, `test_kurt_kurtosis_level`.
    _assert_casting_functions_wrap_same_implementation(
        pd.Series.kurt, pd.Series.kurtosis
    )


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("skipna", [False, True])
def test_kurtosis(axis, skipna):
    expected_exception = None
    if axis:
        expected_exception = ValueError("No axis named 1 for object type Series")
    eval_general(
        *create_test_series(test_data["float_nan_data"]),
        lambda df: df.kurtosis(axis=axis, skipna=skipna),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize("numeric_only", [False, True])
def test_kurtosis_numeric_only(axis, numeric_only):
    expected_exception = None
    if axis:
        expected_exception = ValueError("No axis named columns for object type Series")
    eval_general(
        *create_test_series(test_data_diff_dtype),
        lambda df: df.kurtosis(axis=axis, numeric_only=numeric_only),
        expected_exception=expected_exception,
    )


def test_last():
    modin_index = pd.date_range("2010-04-09", periods=400, freq="2D")
    pandas_index = pandas.date_range("2010-04-09", periods=400, freq="2D")
    modin_series = pd.Series(list(range(400)), index=modin_index)
    pandas_series = pandas.Series(list(range(400)), index=pandas_index)
    with pytest.warns(FutureWarning, match="last is deprecated and will be removed"):
        modin_result = modin_series.last("3D")
    df_equals(modin_result, pandas_series.last("3D"))
    df_equals(modin_series.last("20D"), pandas_series.last("20D"))


@pytest.mark.parametrize("func", ["all", "any", "count"])
def test_index_order(func):
    # see #1708 and #1869 for details
    s_modin, s_pandas = create_test_series(test_data["float_nan_data"])
    rows_number = len(s_modin.index)
    level_0 = np.random.choice([x for x in range(10)], rows_number)
    level_1 = np.random.choice([x for x in range(10)], rows_number)
    index = pandas.MultiIndex.from_arrays([level_0, level_1])

    s_modin.index = index
    s_pandas.index = index

    # The result of the operation is not a Series, `.index` is missed
    df_equals(
        getattr(s_modin, func)(),
        getattr(s_pandas, func)(),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_last_valid_index(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.last_valid_index() == (pandas_series.last_valid_index())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_le(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "le")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_loc(data):
    modin_series, pandas_series = create_test_series(data)
    for v in modin_series.index:
        df_equals(modin_series.loc[v], pandas_series.loc[v])
        df_equals(modin_series.loc[v:], pandas_series.loc[v:])

    indices = [True if i % 3 == 0 else False for i in range(len(modin_series.index))]
    modin_result = modin_series.loc[indices]
    pandas_result = pandas_series.loc[indices]
    df_equals(modin_result, pandas_result)

    # From issue #1988
    index = pd.MultiIndex.from_product([np.arange(10), np.arange(10)], names=["f", "s"])
    data = np.arange(100)
    modin_series = pd.Series(data, index=index).sort_index()
    pandas_series = pandas.Series(data, index=index).sort_index()
    modin_result = modin_series.loc[
        (slice(None), 1),
    ]  # fmt: skip
    pandas_result = pandas_series.loc[
        (slice(None), 1),
    ]  # fmt: skip
    df_equals(modin_result, pandas_result)


def test_loc_with_boolean_series():
    modin_series, pandas_series = create_test_series([1, 2, 3])
    modin_mask, pandas_mask = create_test_series([True, False, False])
    modin_result = modin_series.loc[modin_mask]
    pandas_result = pandas_series.loc[pandas_mask]
    df_equals(modin_result, pandas_result)


# This tests the bug from https://github.com/modin-project/modin/issues/3736
def test_loc_setting_categorical_series():
    modin_series = pd.Series(["a", "b", "c"], dtype="category")
    pandas_series = pandas.Series(["a", "b", "c"], dtype="category")
    modin_series.loc[1:3] = "a"
    pandas_series.loc[1:3] = "a"
    df_equals(modin_series, pandas_series)


# This tests the bug from https://github.com/modin-project/modin/issues/3736
def test_iloc_assigning_scalar_none_to_string_series():
    data = ["A"]
    modin_series, pandas_series = create_test_series(data, dtype="string")
    modin_series.iloc[0] = None
    pandas_series.iloc[0] = None
    df_equals(modin_series, pandas_series)


def test_set_ordered_categorical_column():
    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
    mdf = pd.DataFrame(data)
    pdf = pandas.DataFrame(data)
    mdf["a"] = pd.Categorical(mdf["a"], ordered=True)
    pdf["a"] = pandas.Categorical(pdf["a"], ordered=True)
    df_equals(mdf, pdf)

    modin_categories = mdf["a"].dtype
    pandas_categories = pdf["a"].dtype
    assert modin_categories == pandas_categories


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_lt(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "lt")


@pytest.mark.parametrize("na_values", ["ignore", None], ids=["na_ignore", "na_none"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_map(data, na_values):
    modin_series, pandas_series = create_test_series(data)
    df_equals(
        modin_series.map(str, na_action=na_values),
        pandas_series.map(str, na_action=na_values),
    )
    mapper = {i: str(i) for i in range(100)}
    df_equals(
        modin_series.map(mapper, na_action=na_values),
        pandas_series.map(mapper, na_action=na_values),
        # https://github.com/modin-project/modin/issues/5967
        check_dtypes=False,
    )

    # Return list objects
    modin_series_lists = modin_series.map(lambda s: [s, s, s])
    pandas_series_lists = pandas_series.map(lambda s: [s, s, s])
    df_equals(modin_series_lists, pandas_series_lists)

    # Index into list objects
    df_equals(
        modin_series_lists.map(lambda lst: lst[0]),
        pandas_series_lists.map(lambda lst: lst[0]),
    )


def test_mask():
    modin_series = pd.Series(np.arange(10))
    m = modin_series % 3 == 0
    with warns_that_defaulting_to_pandas_if(not df_or_series_using_native_execution(m)):
        try:
            modin_series.mask(~m, -modin_series)
        except ValueError:
            pass


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_max(data, skipna):
    eval_general(*create_test_series(data), lambda df: df.max(skipna=skipna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_mean(data, skipna):
    eval_general(*create_test_series(data), lambda df: df.mean(skipna=skipna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_median(data, skipna):
    eval_general(*create_test_series(data), lambda df: df.median(skipna=skipna))


@pytest.mark.parametrize(
    "method", ["median", "skew", "std", "sum", "var", "prod", "sem"]
)
def test_median_skew_std_sum_var_prod_sem_1953(method):
    # See #1953 for details
    data = [3, 3, 3, 3, 3, 3, 3, 3, 3]
    arrays = [
        ["1", "1", "1", "2", "2", "2", "3", "3", "3"],
        ["1", "2", "3", "4", "5", "6", "7", "8", "9"],
    ]
    modin_s = pd.Series(data, index=arrays)
    pandas_s = pandas.Series(data, index=arrays)
    eval_general(modin_s, pandas_s, lambda s: getattr(s, method)())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("index", [True, False], ids=["True", "False"])
def test_memory_usage(data, index):
    modin_series, pandas_series = create_test_series(data)
    df_equals(
        modin_series.memory_usage(index=index), pandas_series.memory_usage(index=index)
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_min(data, skipna):
    eval_general(*create_test_series(data), lambda df: df.min(skipna=skipna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_mod(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "mod")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_mode(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.mode(), pandas_series.mode())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_mul(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "mul")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_multiply(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "multiply")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_name(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.name == pandas_series.name
    modin_series.name = pandas_series.name = "New_name"
    assert modin_series.name == pandas_series.name
    assert modin_series._query_compiler.columns == ["New_name"]


def test_tuple_name():
    names = [("a", 1), ("a", "b", "c"), "flat"]
    s = pd.Series(name=names[0])
    # The internal representation of the Series stores the name as a column label.
    # When it is a tuple, this label is a MultiIndex object, and this test ensures that
    # the Series's name property remains a tuple.
    assert s.name == names[0]
    assert isinstance(s.name, tuple)
    # Setting the name to a tuple of a different level or a non-tuple should not error.
    s.name = names[1]
    assert s.name == names[1]
    assert isinstance(s.name, tuple)
    s.name = names[2]
    assert s.name == names[2]
    assert isinstance(s.name, str)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_nbytes(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.nbytes == pandas_series.nbytes


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_ndim(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    assert modin_series.ndim == 1


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_ne(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "ne")


@pytest.mark.xfail(reason="Using pandas Series.")
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_nlargest(data):
    modin_series = create_test_series(data)

    with pytest.raises(NotImplementedError):
        modin_series.nlargest(None)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_notnull(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.notnull(), pandas_series.notnull())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_nsmallest(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(
        modin_series.nsmallest(n=5, keep="first"),
        pandas_series.nsmallest(n=5, keep="first"),
    )
    df_equals(
        modin_series.nsmallest(n=10, keep="first"),
        pandas_series.nsmallest(n=10, keep="first"),
    )
    df_equals(
        modin_series.nsmallest(n=10, keep="last"),
        pandas_series.nsmallest(n=10, keep="last"),
    )
    df_equals(modin_series.nsmallest(keep="all"), pandas_series.nsmallest(keep="all"))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("dropna", [True, False], ids=["True", "False"])
def test_nunique(data, dropna):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.nunique(dropna=dropna), pandas_series.nunique(dropna=dropna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_pct_change(data):
    modin_series, pandas_series = create_test_series(data)
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.pct_change()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_pipe(data):
    modin_series, pandas_series = create_test_series(data)
    n = len(modin_series.index)
    a, b, c = 2 % n, 0, 3 % n

    def h(x):
        return x.dropna()

    def g(x, arg1=0):
        for _ in range(arg1):
            x = (pd if isinstance(x, pd.Series) else pandas).concat((x, x))
        return x

    def f(x, arg2=0, arg3=0):
        return x.drop(x.index[[arg2, arg3]])

    df_equals(
        f(g(h(modin_series), arg1=a), arg2=b, arg3=c),
        (modin_series.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),
    )
    df_equals(
        (modin_series.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),
        (pandas_series.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_plot(request, data):
    modin_series, pandas_series = create_test_series(data)

    if name_contains(request.node.name, numeric_dfs):
        # We have to test this way because equality in plots means same object.
        zipped_plot_lines = zip(modin_series.plot().lines, pandas_series.plot().lines)
        for left, right in zipped_plot_lines:
            if isinstance(left.get_xdata(), np.ma.core.MaskedArray) and isinstance(
                right.get_xdata(), np.ma.core.MaskedArray
            ):
                assert all((left.get_xdata() == right.get_xdata()).data)
            else:
                assert np.array_equal(left.get_xdata(), right.get_xdata())
            if isinstance(left.get_ydata(), np.ma.core.MaskedArray) and isinstance(
                right.get_ydata(), np.ma.core.MaskedArray
            ):
                assert all((left.get_ydata() == right.get_ydata()).data)
            else:
                assert np.array_equal(left.get_xdata(), right.get_xdata())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_pop(data):
    modin_series, pandas_series = create_test_series(data)

    for key in modin_series.keys():
        df_equals(modin_series.pop(key), pandas_series.pop(key))
        df_equals(modin_series, pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_pow(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "pow")


def test_product_alias():
    _assert_casting_functions_wrap_same_implementation(
        pd.Series.prod, pd.Series.product
    )


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("skipna", [False, True])
def test_prod(axis, skipna):
    expected_exception = None
    if axis:
        expected_exception = ValueError("No axis named 1 for object type Series")
    eval_general(
        *create_test_series(test_data["float_nan_data"]),
        lambda s: s.prod(axis=axis, skipna=skipna),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("numeric_only", [False, True])
@pytest.mark.parametrize(
    "min_count", int_arg_values, ids=arg_keys("min_count", int_arg_keys)
)
def test_prod_specific(min_count, numeric_only):
    eval_general(
        *create_test_series(test_data_diff_dtype),
        lambda df: df.prod(min_count=min_count, numeric_only=numeric_only),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("q", quantiles_values, ids=quantiles_keys)
def test_quantile(request, data, q):
    modin_series, pandas_series = create_test_series(data)
    if not name_contains(request.node.name, no_numeric_dfs):
        df_equals(modin_series.quantile(q), pandas_series.quantile(q))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_radd(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "radd")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "na_option", ["keep", "top", "bottom"], ids=["keep", "top", "bottom"]
)
def test_rank(data, na_option):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.rank(na_option=na_option)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.rank(na_option=na_option)
    else:
        modin_result = modin_series.rank(na_option=na_option)
        df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("order", [None, "C", "F", "A", "K"])
def test_ravel(data, order):
    modin_series, pandas_series = create_test_series(data)
    np.testing.assert_equal(
        modin_series.ravel(order=order), pandas_series.ravel(order=order)
    )


@pytest.mark.parametrize(
    "data",
    [
        pandas.Categorical(np.arange(1000), ordered=True),
        pandas.Categorical(np.arange(1000), ordered=False),
        pandas.Categorical(np.arange(1000), categories=np.arange(500), ordered=True),
        pandas.Categorical(np.arange(1000), categories=np.arange(500), ordered=False),
    ],
)
@pytest.mark.parametrize("order", [None, "C", "F", "A", "K"])
def test_ravel_category(data, order):
    modin_series, pandas_series = create_test_series(data)
    categories_equals(modin_series.ravel(order=order), pandas_series.ravel(order=order))


@pytest.mark.parametrize(
    "data",
    [
        pandas.Categorical(np.arange(10), ordered=True),
        pandas.Categorical(np.arange(10), ordered=False),
        pandas.Categorical(np.arange(10), categories=np.arange(5), ordered=True),
        pandas.Categorical(np.arange(10), categories=np.arange(5), ordered=False),
    ],
)
@pytest.mark.parametrize("order", [None, "C", "F", "A", "K"])
def test_ravel_simple_category(data, order):
    modin_series, pandas_series = create_test_series(data)
    categories_equals(modin_series.ravel(order=order), pandas_series.ravel(order=order))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rdiv(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "rdiv")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_reindex(data):
    modin_series, pandas_series = create_test_series(data)
    pandas_result = pandas_series.reindex(
        list(pandas_series.index) + ["_A_NEW_ROW"], fill_value=0
    )
    modin_result = modin_series.reindex(
        list(modin_series.index) + ["_A_NEW_ROW"], fill_value=0
    )
    df_equals(pandas_result, modin_result)

    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 10, 11],
        "col4": [12, 13, 14, 15],
        "col5": [0, 0, 0, 0],
    }
    pandas_df = pandas.DataFrame(frame_data)
    modin_df = pd.DataFrame(frame_data)

    for col in pandas_df.columns:
        modin_series = modin_df[col]
        pandas_series = pandas_df[col]
        df_equals(
            modin_series.reindex([0, 3, 2, 1]), pandas_series.reindex([0, 3, 2, 1])
        )
        df_equals(modin_series.reindex([0, 6, 2]), pandas_series.reindex([0, 6, 2]))
        df_equals(
            modin_series.reindex(index=[0, 1, 5]),
            pandas_series.reindex(index=[0, 1, 5]),
        )

    # MultiIndex
    modin_series, pandas_series = create_test_series(data)
    modin_series.index, pandas_series.index = [
        generate_multiindex(len(pandas_series))
    ] * 2
    pandas_result = pandas_series.reindex(list(reversed(pandas_series.index)))
    modin_result = modin_series.reindex(list(reversed(modin_series.index)))
    df_equals(pandas_result, modin_result)


def test_reindex_like():
    o_data = [
        [24.3, 75.7, "high"],
        [31, 87.8, "high"],
        [22, 71.6, "medium"],
        [35, 95, "medium"],
    ]
    o_columns = ["temp_celsius", "temp_fahrenheit", "windspeed"]
    o_index = pd.date_range(start="2014-02-12", end="2014-02-15", freq="D")
    new_data = [[28, "low"], [30, "low"], [35.1, "medium"]]
    new_columns = ["temp_celsius", "windspeed"]
    new_index = pd.DatetimeIndex(["2014-02-12", "2014-02-13", "2014-02-15"])
    modin_df1 = pd.DataFrame(o_data, columns=o_columns, index=o_index)
    modin_df2 = pd.DataFrame(new_data, columns=new_columns, index=new_index)
    modin_result = modin_df2["windspeed"].reindex_like(modin_df1["windspeed"])

    pandas_df1 = pandas.DataFrame(o_data, columns=o_columns, index=o_index)
    pandas_df2 = pandas.DataFrame(new_data, columns=new_columns, index=new_index)
    pandas_result = pandas_df2["windspeed"].reindex_like(pandas_df1["windspeed"])
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rename(data):
    modin_series, pandas_series = create_test_series(data)
    new_name = "NEW_NAME"
    df_equals(modin_series.rename(new_name), pandas_series.rename(new_name))

    modin_series_cp = modin_series.copy()
    pandas_series_cp = pandas_series.copy()
    modin_series_cp.rename(new_name, inplace=True)
    pandas_series_cp.rename(new_name, inplace=True)
    df_equals(modin_series_cp, pandas_series_cp)

    modin_result = modin_series.rename("{}__".format)
    pandas_result = pandas_series.rename("{}__".format)
    df_equals(modin_result, pandas_result)


def test_reorder_levels():
    data = np.random.randint(1, 100, 12)
    modin_series = pd.Series(
        data,
        index=pd.MultiIndex.from_tuples(
            [
                (num, letter, color)
                for num in range(1, 3)
                for letter in ["a", "b", "c"]
                for color in ["Red", "Green"]
            ],
            names=["Number", "Letter", "Color"],
        ),
    )
    pandas_series = pandas.Series(
        data,
        index=pandas.MultiIndex.from_tuples(
            [
                (num, letter, color)
                for num in range(1, 3)
                for letter in ["a", "b", "c"]
                for color in ["Red", "Green"]
            ],
            names=["Number", "Letter", "Color"],
        ),
    )
    modin_result = modin_series.reorder_levels(["Letter", "Color", "Number"])
    pandas_result = pandas_series.reorder_levels(["Letter", "Color", "Number"])
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "repeats", [0, 2, 3, 4], ids=["repeats_{}".format(i) for i in [0, 2, 3, 4]]
)
def test_repeat(data, repeats):
    eval_general(pd.Series(data), pandas.Series(data), lambda df: df.repeat(repeats))


@pytest.mark.parametrize("data", [np.arange(256)])
@pytest.mark.parametrize(
    "repeats",
    [
        0,
        2,
        [2],
        np.arange(256),
        [0] * 64 + [2] * 64 + [3] * 32 + [4] * 32 + [5] * 64,
        [2] * 257,
    ],
    ids=["0_case", "scalar", "one-elem-list", "array", "list", "wrong_list"],
)
def test_repeat_lists(data, repeats, request):
    expected_exception = None
    if "wrong_list" in request.node.callspec.id:
        expected_exception = ValueError(
            "operands could not be broadcast together with shape (256,) (257,)"
        )
    eval_general(
        *create_test_series(data),
        lambda df: df.repeat(repeats),
        expected_exception=expected_exception,
    )


def test_clip_4485():
    modin_result = pd.Series([1]).clip([3])
    pandas_result = pandas.Series([1]).clip([3])
    df_equals(modin_result, pandas_result)


def test_replace():
    modin_series = pd.Series([0, 1, 2, 3, 4])
    pandas_series = pandas.Series([0, 1, 2, 3, 4])
    modin_result = modin_series.replace(0, 5)
    pandas_result = pandas_series.replace(0, 5)
    df_equals(modin_result, pandas_result)

    modin_result = modin_series.replace([1, 2], method="bfill")
    pandas_result = pandas_series.replace([1, 2], method="bfill")
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("closed", ["left", "right"])
@pytest.mark.parametrize("label", ["right", "left"])
@pytest.mark.parametrize("level", [None, 1])
@pytest.mark.exclude_in_sanity
def test_resample(closed, label, level):
    rule = "5min"
    freq = "h"

    index = pandas.date_range("1/1/2000", periods=12, freq=freq)
    pandas_series = pandas.Series(range(12), index=index)
    modin_series = pd.Series(range(12), index=index)

    if level is not None:
        index = pandas.MultiIndex.from_product(
            [["a", "b", "c"], pandas.date_range("31/12/2000", periods=4, freq=freq)]
        )
        pandas_series.index = index
        modin_series.index = index
    pandas_resampler = pandas_series.resample(
        rule, closed=closed, label=label, level=level
    )
    modin_resampler = modin_series.resample(
        rule, closed=closed, label=label, level=level
    )

    df_equals(modin_resampler.count(), pandas_resampler.count())
    df_equals(modin_resampler.var(0), pandas_resampler.var(0))
    df_equals(modin_resampler.sum(), pandas_resampler.sum())
    df_equals(modin_resampler.std(), pandas_resampler.std())
    df_equals(modin_resampler.sem(), pandas_resampler.sem())
    df_equals(modin_resampler.size(), pandas_resampler.size())
    df_equals(modin_resampler.prod(), pandas_resampler.prod())
    df_equals(modin_resampler.ohlc(), pandas_resampler.ohlc())
    df_equals(modin_resampler.min(), pandas_resampler.min())
    df_equals(modin_resampler.median(), pandas_resampler.median())
    df_equals(modin_resampler.mean(), pandas_resampler.mean())
    df_equals(modin_resampler.max(), pandas_resampler.max())
    df_equals(modin_resampler.last(), pandas_resampler.last())
    df_equals(modin_resampler.first(), pandas_resampler.first())
    df_equals(modin_resampler.nunique(), pandas_resampler.nunique())
    df_equals(
        modin_resampler.pipe(lambda x: x.max() - x.min()),
        pandas_resampler.pipe(lambda x: x.max() - x.min()),
    )
    df_equals(
        modin_resampler.transform(lambda x: (x - x.mean()) / x.std()),
        pandas_resampler.transform(lambda x: (x - x.mean()) / x.std()),
    )
    df_equals(
        modin_resampler.aggregate("max"),
        pandas_resampler.aggregate("max"),
    )
    df_equals(
        modin_resampler.apply("sum"),
        pandas_resampler.apply("sum"),
    )
    df_equals(
        modin_resampler.get_group(name=list(modin_resampler.groups)[0]),
        pandas_resampler.get_group(name=list(pandas_resampler.groups)[0]),
    )
    assert pandas_resampler.indices == modin_resampler.indices
    assert pandas_resampler.groups == modin_resampler.groups
    df_equals(modin_resampler.quantile(), pandas_resampler.quantile())
    # Upsampling from level= or on= selection is not supported
    if level is None:
        df_equals(
            modin_resampler.interpolate(),
            pandas_resampler.interpolate(),
        )
        df_equals(modin_resampler.asfreq(), pandas_resampler.asfreq())
        df_equals(
            modin_resampler.fillna(method="nearest"),
            pandas_resampler.fillna(method="nearest"),
        )
        df_equals(modin_resampler.nearest(), pandas_resampler.nearest())
        df_equals(modin_resampler.bfill(), pandas_resampler.bfill())
        df_equals(modin_resampler.ffill(), pandas_resampler.ffill())
    df_equals(
        modin_resampler.apply(["sum", "mean", "max"]),
        pandas_resampler.apply(["sum", "mean", "max"]),
    )
    df_equals(
        modin_resampler.aggregate(["sum", "mean", "max"]),
        pandas_resampler.aggregate(["sum", "mean", "max"]),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("drop", [True, False], ids=["True", "False"])
@pytest.mark.parametrize("name", [lib.no_default, "Custom name"])
@pytest.mark.parametrize("inplace", [True, False])
def test_reset_index(data, drop, name, inplace):
    expected_exception = None
    if inplace and not drop:
        expected_exception = TypeError(
            "Cannot reset_index inplace on a Series to create a DataFrame"
        )
    eval_general(
        *create_test_series(data),
        lambda df, *args, **kwargs: df.reset_index(*args, **kwargs),
        drop=drop,
        name=name,
        inplace=inplace,
        __inplace__=inplace,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rfloordiv(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "rfloordiv")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rmod(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "rmod")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rmul(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "rmul")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_round(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.round(), pandas_series.round())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rpow(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "rpow")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rsub(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "rsub")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_rtruediv(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "rtruediv")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_sample(data):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.sample(frac=0.5, random_state=21019)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.sample(frac=0.5, random_state=21019)
    else:
        modin_result = modin_series.sample(frac=0.5, random_state=21019)
        df_equals(pandas_result, modin_result)

    try:
        pandas_result = pandas_series.sample(n=12, random_state=21019)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.sample(n=12, random_state=21019)
    else:
        modin_result = modin_series.sample(n=12, random_state=21019)
        df_equals(pandas_result, modin_result)

    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        df_equals(
            modin_series.sample(n=0, random_state=21019),
            pandas_series.sample(n=0, random_state=21019),
        )
    with pytest.raises(ValueError):
        modin_series.sample(n=-3)


@pytest.mark.parametrize("single_value_data", [True, False])
@pytest.mark.parametrize("use_multiindex", [True, False])
@pytest.mark.parametrize("sorter", [True, None])
@pytest.mark.parametrize("values_number", [1, 2, 5])
@pytest.mark.parametrize("side", ["left", "right"])
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.exclude_in_sanity
def test_searchsorted(
    data, side, values_number, sorter, use_multiindex, single_value_data
):
    data = data if not single_value_data else data[next(iter(data.keys()))][0]
    if not sorter:
        modin_series, pandas_series = create_test_series(vals=data, sort=True)
    else:
        modin_series, pandas_series = create_test_series(vals=data)
        sorter = np.argsort(list(modin_series))

    if use_multiindex:
        rows_number = len(modin_series.index)
        level_0_series = random_state.choice([0, 1], rows_number)
        level_1_series = random_state.choice([2, 3], rows_number)
        index_series = pd.MultiIndex.from_arrays(
            [level_0_series, level_1_series], names=["first", "second"]
        )
        modin_series.index = index_series
        pandas_series.index = index_series

    min_sample = modin_series.min(skipna=True)
    max_sample = modin_series.max(skipna=True)

    if single_value_data:
        values = [data]
    else:
        values = []
        values.append(pandas_series.sample(n=values_number, random_state=random_state))
        values.append(
            random_state.uniform(low=min_sample, high=max_sample, size=values_number)
        )
        values.append(
            random_state.uniform(
                low=max_sample, high=2 * max_sample, size=values_number
            )
        )
        values.append(
            random_state.uniform(
                low=min_sample - max_sample, high=min_sample, size=values_number
            )
        )
        pure_float = random_state.uniform(float(min_sample), float(max_sample))
        pure_int = int(pure_float)
        values.append(pure_float)
        values.append(pure_int)

    test_cases = [
        modin_series.searchsorted(value=value, side=side, sorter=sorter)
        == pandas_series.searchsorted(value=value, side=side, sorter=sorter)
        for value in values
    ]
    test_cases = [
        case.all() if not isinstance(case, bool) else case for case in test_cases
    ]

    for case in test_cases:
        assert case


@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("ddof", int_arg_values, ids=arg_keys("ddof", int_arg_keys))
def test_sem_float_nan_only(skipna, ddof):
    eval_general(
        *create_test_series(test_data["float_nan_data"]),
        lambda df: df.sem(skipna=skipna, ddof=ddof),
    )


@pytest.mark.parametrize("ddof", int_arg_values, ids=arg_keys("ddof", int_arg_keys))
def test_sem_int_only(ddof):
    eval_general(
        *create_test_series(test_data["int_data"]),
        lambda df: df.sem(ddof=ddof),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_set_axis(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    modin_series.set_axis(labels=["{}_{}".format(i, i + 1) for i in modin_series.index])


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_shape(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.shape == pandas_series.shape


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_size(data):
    modin_series, pandas_series = create_test_series(data)
    assert modin_series.size == pandas_series.size


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
def test_skew(data, skipna):
    eval_general(*create_test_series(data), lambda df: df.skew(skipna=skipna))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("index", ["default", "ndarray", "has_duplicates"])
@pytest.mark.parametrize("periods", [0, 1, -1, 10, -10, 1000000000, -1000000000])
@pytest.mark.parametrize("name", [None, "foo"])
def test_shift(data, index, periods, name):
    modin_series, pandas_series = create_test_series(data, name=name)
    if index == "ndarray":
        data_column_length = len(data[next(iter(data))])
        modin_series.index = pandas_series.index = np.arange(2, data_column_length + 2)
    elif index == "has_duplicates":
        modin_series.index = pandas_series.index = list(modin_series.index[:-3]) + [
            0,
            1,
            2,
        ]

    df_equals(
        modin_series.shift(periods=periods),
        pandas_series.shift(periods=periods),
    )
    df_equals(
        modin_series.shift(periods=periods, fill_value=777),
        pandas_series.shift(periods=periods, fill_value=777),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("ascending", [False, True])
@pytest.mark.parametrize(
    "sort_remaining", bool_arg_values, ids=arg_keys("sort_remaining", bool_arg_keys)
)
@pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"])
def test_sort_index(data, ascending, sort_remaining, na_position):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda df: df.sort_index(
            ascending=ascending,
            sort_remaining=sort_remaining,
            na_position=na_position,
        ),
    )

    eval_general(
        modin_series.copy(),
        pandas_series.copy(),
        lambda df: df.sort_index(
            ascending=ascending,
            sort_remaining=sort_remaining,
            na_position=na_position,
            inplace=True,
        ),
        __inplace__=True,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"])
def test_sort_values(data, ascending, na_position):
    modin_series, pandas_series = create_test_series(data)
    modin_result = modin_series.sort_values(
        ascending=ascending, na_position=na_position
    )
    pandas_result = pandas_series.sort_values(
        ascending=ascending, na_position=na_position
    )
    # Note: For `ascending=False` only
    # For some reason, the indexing of Series and DataFrame differ in the underlying
    # algorithm. The order of values is the same, but the index values are shuffled.
    # Since we use `DataFrame.sort_values` even for Series, the index can be different
    # between `pandas.Series.sort_values`. For this reason, we check that the values are
    # identical instead of the index as well.
    if ascending:
        df_equals_with_non_stable_indices(modin_result, pandas_result)
    else:
        np.testing.assert_equal(modin_result.values, pandas_result.values)

    modin_series_cp = modin_series.copy()
    pandas_series_cp = pandas_series.copy()
    modin_series_cp.sort_values(
        ascending=ascending, na_position=na_position, inplace=True
    )
    pandas_series_cp.sort_values(
        ascending=ascending, na_position=na_position, inplace=True
    )
    # See above about `ascending=False`
    if ascending:
        df_equals_with_non_stable_indices(modin_result, pandas_result)
    else:
        np.testing.assert_equal(modin_series_cp.values, pandas_series_cp.values)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_squeeze(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.squeeze(None), pandas_series.squeeze(None))
    df_equals(modin_series.squeeze(0), pandas_series.squeeze(0))
    with pytest.raises(ValueError):
        modin_series.squeeze(1)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("ddof", int_arg_values, ids=arg_keys("ddof", int_arg_keys))
def test_std(request, data, skipna, ddof):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.std(skipna=skipna, ddof=ddof)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.std(skipna=skipna, ddof=ddof)
    else:
        modin_result = modin_series.std(skipna=skipna, ddof=ddof)
        df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_sub(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "sub")


def test_6782():
    datetime_scalar = datetime.datetime(1970, 1, 1, 0, 0)
    match = "Adding/subtracting object-dtype array to DatetimeArray not vectorized"
    with warnings.catch_warnings():
        warnings.filterwarnings("error", match, PerformanceWarning)
        pd.Series([datetime.datetime(2000, 1, 1)]) - datetime_scalar


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_subtract(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "subtract")


@pytest.mark.parametrize(
    "data",
    test_data_values + test_data_small_values,
    ids=test_data_keys + test_data_small_keys,
)
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("numeric_only", [False, True])
@pytest.mark.parametrize(
    "min_count", int_arg_values, ids=arg_keys("min_count", int_arg_keys)
)
@pytest.mark.exclude_in_sanity
def test_sum(data, skipna, numeric_only, min_count):
    eval_general(
        *create_test_series(data),
        lambda df, *args, **kwargs: df.sum(*args, **kwargs),
        skipna=skipna,
        numeric_only=numeric_only,
        min_count=min_count,
    )


@pytest.mark.parametrize("operation", ["sum", "shift"])
def test_sum_axis_1_except(operation):
    eval_general(
        *create_test_series(test_data["int_data"]),
        lambda df, *args, **kwargs: getattr(df, operation)(*args, **kwargs),
        axis=1,
        expected_exception=ValueError("No axis named 1 for object type Series"),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis1", [0, 1, "columns", "index"])
@pytest.mark.parametrize("axis2", [0, 1, "columns", "index"])
def test_swapaxes(data, axis1, axis2):
    modin_series, pandas_series = create_test_series(data)
    try:
        pandas_result = pandas_series.swapaxes(axis1, axis2)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.swapaxes(axis1, axis2)
    else:
        modin_result = modin_series.swapaxes(axis1, axis2)
        df_equals(modin_result, pandas_result)


def test_swaplevel():
    data = np.random.randint(1, 100, 12)
    modin_s = pd.Series(
        data,
        index=pd.MultiIndex.from_tuples(
            [
                (num, letter, color)
                for num in range(1, 3)
                for letter in ["a", "b", "c"]
                for color in ["Red", "Green"]
            ],
            names=["Number", "Letter", "Color"],
        ),
    )
    pandas_s = pandas.Series(
        data,
        index=pandas.MultiIndex.from_tuples(
            [
                (num, letter, color)
                for num in range(1, 3)
                for letter in ["a", "b", "c"]
                for color in ["Red", "Green"]
            ],
            names=["Number", "Letter", "Color"],
        ),
    )
    df_equals(
        modin_s.swaplevel("Number", "Color"), pandas_s.swaplevel("Number", "Color")
    )
    df_equals(modin_s.swaplevel(), pandas_s.swaplevel())
    df_equals(modin_s.swaplevel(1, 0), pandas_s.swaplevel(1, 0))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("n", int_arg_values, ids=arg_keys("n", int_arg_keys))
def test_tail(data, n):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.tail(n), pandas_series.tail(n))
    df_equals(
        modin_series.tail(len(modin_series)), pandas_series.tail(len(pandas_series))
    )


def test_take():
    modin_s = pd.Series(["falcon", "parrot", "lion", "cat"], index=[0, 2, 3, 1])
    pandas_s = pandas.Series(["falcon", "parrot", "lion", "cat"], index=[0, 2, 3, 1])
    a = modin_s.take([0, 3])
    df_equals(a, pandas_s.take([0, 3]))
    try:
        pandas_s.take([2], axis=1)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_s.take([2], axis=1)


@pytest.mark.parametrize(
    "ignore_index", bool_arg_values, ids=arg_keys("ignore_index", bool_arg_keys)
)
def test_explode(ignore_index):
    # Some items in this test data are lists that explode() should expand.
    data = [[1, 2, 3], "foo", [], [3, 4]]
    modin_series, pandas_series = create_test_series(data)
    df_equals(
        modin_series.explode(ignore_index=ignore_index),
        pandas_series.explode(ignore_index=ignore_index),
    )


def test_to_period():
    idx = pd.date_range("1/1/2012", periods=5, freq="M")
    series = pd.Series(np.random.randint(0, 100, size=(len(idx))), index=idx)
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(series)
    ):
        series.to_period()


@pytest.mark.parametrize(
    "data",
    test_data_values + test_data_large_categorical_series_values,
    ids=test_data_keys + test_data_large_categorical_series_keys,
)
def test_to_numpy(data):
    modin_series, pandas_series = create_test_series(data)
    assert_array_equal(modin_series.to_numpy(), pandas_series.to_numpy())


def test_to_numpy_dtype():
    modin_series, pandas_series = create_test_series(test_data["float_nan_data"])
    assert_array_equal(
        modin_series.to_numpy(dtype="int64"),
        pandas_series.to_numpy(dtype="int64"),
        strict=True,
    )


@pytest.mark.parametrize(
    "data",
    test_data_values + test_data_large_categorical_series_values,
    ids=test_data_keys + test_data_large_categorical_series_keys,
)
def test_series_values(data):
    modin_series, pandas_series = create_test_series(data)
    assert_array_equal(modin_series.values, pandas_series.values)


def test_series_empty_values():
    modin_series, pandas_series = pd.Series(), pandas.Series()
    assert_array_equal(modin_series.values, pandas_series.values)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_to_string(request, data):
    eval_general(
        *create_test_series(data),
        lambda df: df.to_string(),
    )


def test_to_timestamp():
    idx = pd.date_range("1/1/2012", periods=5, freq="M")
    series = pd.Series(np.random.randint(0, 100, size=(len(idx))), index=idx)
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(series)
    ):
        series.to_period().to_timestamp()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_to_xarray(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.to_xarray()


def test_to_xarray_mock():
    modin_series = pd.Series([])

    with mock.patch("pandas.Series.to_xarray") as to_xarray:
        modin_series.to_xarray()
    to_xarray.assert_called_once()
    assert len(to_xarray.call_args[0]) == 1
    df_equals(modin_series, to_xarray.call_args[0][0])


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_tolist(data):
    modin_series, _ = create_test_series(data)  # noqa: F841
    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        modin_series.tolist()


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "func", [lambda x: x + 1, [np.sqrt, np.exp]], ids=["lambda", "list_udfs"]
)
def test_transform(data, func, request):
    if "list_udfs" in request.node.callspec.id:
        pytest.xfail(reason="https://github.com/modin-project/modin/issues/6998")
    eval_general(
        *create_test_series(data),
        lambda df: df.transform(func),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("func", agg_func_except_values, ids=agg_func_except_keys)
def test_transform_except(data, func):
    eval_general(
        *create_test_series(data),
        lambda df: df.transform(func),
        expected_exception=ValueError("Function did not transform"),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_transpose(data):
    modin_series, pandas_series = create_test_series(data)
    df_equals(modin_series.transpose(), modin_series)
    df_equals(modin_series.transpose(), pandas_series.transpose())
    df_equals(modin_series.transpose(), pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_truediv(data):
    modin_series, pandas_series = create_test_series(data)
    inter_df_math_helper(modin_series, pandas_series, "truediv")


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_truncate(data):
    modin_series, pandas_series = create_test_series(data)

    before = 1
    after = len(modin_series - 3)
    df_equals(
        modin_series.truncate(before, after), pandas_series.truncate(before, after)
    )

    before = 1
    after = 3
    df_equals(
        modin_series.truncate(before, after), pandas_series.truncate(before, after)
    )

    before = None
    after = None
    df_equals(
        modin_series.truncate(before, after), pandas_series.truncate(before, after)
    )


def test_tz_convert():
    modin_idx = pd.date_range(
        "1/1/2012", periods=400, freq="2D", tz="America/Los_Angeles"
    )
    pandas_idx = pandas.date_range(
        "1/1/2012", periods=400, freq="2D", tz="America/Los_Angeles"
    )
    data = np.random.randint(0, 100, size=len(modin_idx))
    modin_series = pd.Series(data, index=modin_idx)
    pandas_series = pandas.Series(data, index=pandas_idx)
    modin_result = modin_series.tz_convert("UTC", axis=0)
    pandas_result = pandas_series.tz_convert("UTC", axis=0)
    df_equals(modin_result, pandas_result)

    modin_multi = pd.MultiIndex.from_arrays([modin_idx, range(len(modin_idx))])
    pandas_multi = pandas.MultiIndex.from_arrays([pandas_idx, range(len(modin_idx))])
    modin_series = pd.Series(data, index=modin_multi)
    pandas_series = pandas.Series(data, index=pandas_multi)
    df_equals(
        modin_series.tz_convert("UTC", axis=0, level=0),
        pandas_series.tz_convert("UTC", axis=0, level=0),
    )


def test_tz_localize():
    idx = pd.date_range("1/1/2012", periods=400, freq="2D")
    data = np.random.randint(0, 100, size=len(idx))
    modin_series = pd.Series(data, index=idx)
    pandas_series = pandas.Series(data, index=idx)
    df_equals(
        modin_series.tz_localize("America/Los_Angeles"),
        pandas_series.tz_localize("America/Los_Angeles"),
    )
    df_equals(
        modin_series.tz_localize("UTC"),
        pandas_series.tz_localize("UTC"),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_unique(data):
    comparator = lambda *args: sort_if_range_partitioning(  # noqa: E731
        *args, comparator=assert_array_equal
    )

    modin_series, pandas_series = create_test_series(data)
    modin_result = modin_series.unique()
    pandas_result = pandas_series.unique()
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape
    assert type(modin_result) is type(pandas_result)

    modin_result = pd.Series([2, 1, 3, 3], name="A").unique()
    pandas_result = pandas.Series([2, 1, 3, 3], name="A").unique()
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape
    assert type(modin_result) is type(pandas_result)

    modin_result = pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique()
    pandas_result = pandas.Series(
        [pd.Timestamp("2016-01-01") for _ in range(3)]
    ).unique()
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape
    assert type(modin_result) is type(pandas_result)

    modin_result = pd.Series(
        [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)]
    ).unique()
    pandas_result = pandas.Series(
        [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)]
    ).unique()
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape
    assert type(modin_result) is type(pandas_result)

    modin_result = pandas.Series(pd.Categorical(list("baabc"))).unique()
    pandas_result = pd.Series(pd.Categorical(list("baabc"))).unique()
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape
    assert type(modin_result) is type(pandas_result)

    modin_result = pd.Series(
        pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
    ).unique()
    pandas_result = pandas.Series(
        pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
    ).unique()
    comparator(modin_result, pandas_result)
    assert modin_result.shape == pandas_result.shape
    assert type(modin_result) is type(pandas_result)


def test_unique_pyarrow_dtype():
    # See #6227 for details
    modin_series, pandas_series = create_test_series(
        [1, 0, pd.NA], dtype="uint8[pyarrow]"
    )

    def comparator(df1, df2):
        # Perform our own non-strict version of dtypes equality check
        df_equals(df1, df2)
        # to be sure `unique` return `ArrowExtensionArray`
        assert type(df1) is type(df2)

    eval_general(
        modin_series, pandas_series, lambda df: df.unique(), comparator=comparator
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_unstack(data):
    modin_series, pandas_series = create_test_series(data)
    index = generate_multiindex(len(pandas_series), nlevels=4, is_tree_like=True)

    modin_series = pd.Series(data[next(iter(data.keys()))], index=index)
    pandas_series = pandas.Series(data[next(iter(data.keys()))], index=index)

    df_equals(modin_series.unstack(), pandas_series.unstack())
    df_equals(modin_series.unstack(level=0), pandas_series.unstack(level=0))
    df_equals(modin_series.unstack(level=[0, 1]), pandas_series.unstack(level=[0, 1]))
    df_equals(
        modin_series.unstack(level=[0, 1, 2]), pandas_series.unstack(level=[0, 1, 2])
    )


def test_unstack_error_no_multiindex():
    modin_series = pd.Series([0, 1, 2])
    with pytest.raises(ValueError, match="index must be a MultiIndex to unstack"):
        modin_series.unstack()


@pytest.mark.parametrize(
    "data, other_data",
    [([1, 2, 3], [4, 5, 6]), ([1, 2, 3], [4, 5, 6, 7, 8]), ([1, 2, 3], [4, np.nan, 6])],
)
def test_update(data, other_data):
    modin_series, pandas_series = pd.Series(data), pandas.Series(data)
    modin_series.update(pd.Series(other_data))
    pandas_series.update(pandas.Series(other_data))
    df_equals(modin_series, pandas_series)


@pytest.mark.parametrize("sort", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("normalize", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("bins", [3, None])
@pytest.mark.parametrize(
    "dropna",
    [
        pytest.param(None),
        pytest.param(False),
        pytest.param(True),
    ],
)
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.exclude_in_sanity
def test_value_counts(sort, normalize, bins, dropna, ascending):
    def sort_sensitive_comparator(df1, df2):
        # We sort indices for Modin and pandas result because of issue #1650
        return (
            df_equals_with_non_stable_indices(df1, df2)
            if sort
            else df_equals(df1.sort_index(), df2.sort_index())
        )

    eval_general(
        *create_test_series(test_data_values[0]),
        lambda df: df.value_counts(
            sort=sort,
            bins=bins,
            normalize=normalize,
            dropna=dropna,
            ascending=ascending,
        ),
        comparator=sort_sensitive_comparator,
    )

    # from issue #2365
    arr = np.random.rand(2**6)
    arr[::10] = np.nan
    eval_general(
        *create_test_series(arr),
        lambda df: df.value_counts(
            sort=sort,
            bins=bins,
            normalize=normalize,
            dropna=dropna,
            ascending=ascending,
        ),
        comparator=sort_sensitive_comparator,
    )


def test_value_counts_categorical():
    # from issue #3571
    data = np.array(["a"] * 50000 + ["b"] * 10000 + ["c"] * 1000)
    random_state = np.random.RandomState(seed=42)
    random_state.shuffle(data)
    eval_general(
        *create_test_series(data, dtype="category"),
        lambda df: df.value_counts(),
        comparator=df_equals,
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_values(data):
    modin_series, pandas_series = create_test_series(data)

    np.testing.assert_equal(modin_series.values, pandas_series.values)


def test_values_non_numeric():
    data = ["str{0}".format(i) for i in range(0, 10**3)]
    modin_series, pandas_series = create_test_series(data)

    modin_series = modin_series.astype("category")
    pandas_series = pandas_series.astype("category")

    df_equals(modin_series.values, pandas_series.values)


def test_values_ea():
    data = pandas.arrays.SparseArray(np.arange(10, dtype="int64"))
    modin_series, pandas_series = create_test_series(data)
    modin_values = modin_series.values
    pandas_values = pandas_series.values

    assert modin_values.dtype == pandas_values.dtype
    df_equals(modin_values, pandas_values)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("skipna", [False, True])
@pytest.mark.parametrize("ddof", int_arg_values, ids=arg_keys("ddof", int_arg_keys))
def test_var(data, skipna, ddof):
    modin_series, pandas_series = create_test_series(data)

    try:
        pandas_result = pandas_series.var(skipna=skipna, ddof=ddof)
    except Exception as err:
        with pytest.raises(type(err)):
            modin_series.var(skipna=skipna, ddof=ddof)
    else:
        modin_result = modin_series.var(skipna=skipna, ddof=ddof)
        df_equals(modin_result, pandas_result)


def test_view():
    modin_series = pd.Series([-2, -1, 0, 1, 2], dtype="int8")
    pandas_series = pandas.Series([-2, -1, 0, 1, 2], dtype="int8")
    modin_result = modin_series.view(dtype="uint8")
    pandas_result = pandas_series.view(dtype="uint8")
    df_equals(modin_result, pandas_result)

    modin_series = pd.Series([-20, -10, 0, 10, 20], dtype="int32")
    pandas_series = pandas.Series([-20, -10, 0, 10, 20], dtype="int32")
    modin_result = modin_series.view(dtype="float32")
    pandas_result = pandas_series.view(dtype="float32")
    df_equals(modin_result, pandas_result)

    modin_series = pd.Series([-200, -100, 0, 100, 200], dtype="int64")
    pandas_series = pandas.Series([-200, -100, 0, 100, 200], dtype="int64")
    modin_result = modin_series.view(dtype="float64")
    pandas_result = pandas_series.view(dtype="float64")
    df_equals(modin_result, pandas_result)


def test_where():
    frame_data = random_state.randn(100)
    pandas_series = pandas.Series(frame_data)
    modin_series = pd.Series(frame_data)
    pandas_cond_series = pandas_series % 5 < 2
    modin_cond_series = modin_series % 5 < 2

    pandas_result = pandas_series.where(pandas_cond_series, -pandas_series)
    modin_result = modin_series.where(modin_cond_series, -modin_series)
    assert all((to_pandas(modin_result) == pandas_result))

    other_data = random_state.randn(100)
    modin_other, pandas_other = pd.Series(other_data), pandas.Series(other_data)
    pandas_result = pandas_series.where(pandas_cond_series, pandas_other, axis=0)
    modin_result = modin_series.where(modin_cond_series, modin_other, axis=0)
    assert all(to_pandas(modin_result) == pandas_result)

    pandas_result = pandas_series.where(pandas_series < 2, True)
    modin_result = modin_series.where(modin_series < 2, True)
    assert all(to_pandas(modin_result) == pandas_result)


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize(
    "key",
    [0, slice(0, len(test_string_data_values) / 2)],
    ids=["single_key", "slice_key"],
)
def test_str___getitem__(data, key):
    modin_series, pandas_series = create_test_series(data)
    modin_result = modin_series.str[key]
    pandas_result = pandas_series.str[key]
    df_equals(
        modin_result,
        pandas_result,
        # https://github.com/modin-project/modin/issues/5968
        check_dtypes=False,
    )


# Test str operations
@pytest.mark.parametrize(
    "others",
    [["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.nan], None],
    ids=["list", "None"],
)
def test_str_cat(others):
    data = ["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.nan]
    eval_general(*create_test_series(data), lambda s: s.str.cat(others=others))


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("n", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("expand", [False, True])
def test_str_split(data, pat, n, expand):
    eval_general(
        *create_test_series(data),
        lambda series: series.str.split(pat, n=n, expand=expand),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("n", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("expand", [False, True])
def test_str_rsplit(data, pat, n, expand):
    eval_general(
        *create_test_series(data),
        lambda series: series.str.rsplit(pat, n=n, expand=expand),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("i", int_arg_values, ids=int_arg_keys)
def test_str_get(data, i):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.get(i))


@pytest.mark.parametrize(
    "data", test_string_list_data_values, ids=test_string_list_data_keys
)
@pytest.mark.parametrize("sep", string_sep_values, ids=string_sep_keys)
def test_str_join(data, sep):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.join(sep))


@pytest.mark.parametrize(
    "data", test_string_list_data_values, ids=test_string_list_data_keys
)
@pytest.mark.parametrize("sep", string_sep_values, ids=string_sep_keys)
def test_str_get_dummies(data, sep):
    modin_series, pandas_series = create_test_series(data)

    if sep:
        with warns_that_defaulting_to_pandas_if(
            not df_or_series_using_native_execution(modin_series)
        ):
            # We are only testing that this defaults to pandas, so we will just check for
            # the warning
            modin_series.str.get_dummies(sep)


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("case", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("na", string_na_rep_values, ids=string_na_rep_keys)
def test_str_contains(data, pat, case, na):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.contains(pat, case=case, na=na, regex=False),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )

    # Test regex
    pat = ",|b"
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.contains(pat, case=case, na=na, regex=True),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("repl", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("n", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("case", bool_arg_values, ids=bool_arg_keys)
def test_str_replace(data, pat, repl, n, case):
    eval_general(
        *create_test_series(data),
        lambda series: series.str.replace(pat, repl, n=n, case=case, regex=False),
        # https://github.com/modin-project/modin/issues/5970
        comparator_kwargs={"check_dtypes": pat is not None},
    )
    # Test regex
    eval_general(
        *create_test_series(data),
        lambda series: series.str.replace(
            pat=",|b", repl=repl, n=n, case=case, regex=True
        ),
        # https://github.com/modin-project/modin/issues/5970
        comparator_kwargs={"check_dtypes": pat is not None},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("repeats", int_arg_values, ids=int_arg_keys)
def test_str_repeat(data, repeats):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.repeat(repeats))


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_removeprefix(data):
    modin_series, pandas_series = create_test_series(data)
    prefix = "test_prefix"
    eval_general(
        modin_series,
        pandas_series,
        lambda series: (prefix + series).str.removeprefix(prefix),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_removesuffix(data):
    modin_series, pandas_series = create_test_series(data)
    suffix = "test_suffix"
    eval_general(
        modin_series,
        pandas_series,
        lambda series: (series + suffix).str.removesuffix(suffix),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("width", [-1, 0, 5])
@pytest.mark.parametrize(
    "side", ["left", "right", "both"], ids=["left", "right", "both"]
)
@pytest.mark.parametrize("fillchar", string_sep_values, ids=string_sep_keys)
def test_str_pad(data, width, side, fillchar):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.pad(width, side=side, fillchar=fillchar),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("width", [-1, 0, 5])
@pytest.mark.parametrize("fillchar", string_sep_values, ids=string_sep_keys)
def test_str_center(data, width, fillchar):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.center(width, fillchar=fillchar),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("width", [-1, 0, 5])
@pytest.mark.parametrize("fillchar", string_sep_values, ids=string_sep_keys)
def test_str_ljust(data, width, fillchar):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.ljust(width, fillchar=fillchar),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("width", [-1, 0, 5])
@pytest.mark.parametrize("fillchar", string_sep_values, ids=string_sep_keys)
def test_str_rjust(data, width, fillchar):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.rjust(width, fillchar=fillchar),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("width", [-1, 0, 5])
def test_str_zfill(data, width):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.zfill(width))


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("width", [-1, 0, 5])
def test_str_wrap(data, width):
    expected_exception = None
    if width != 5:
        expected_exception = ValueError(f"invalid width {width} (must be > 0)")
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.wrap(width),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("start", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("stop", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("step", [-2, 1, 3])
def test_str_slice(data, start, stop, step):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.slice(start=start, stop=stop, step=step),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("start", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("stop", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("repl", string_sep_values, ids=string_sep_keys)
def test_str_slice_replace(data, start, stop, repl):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.slice_replace(start=start, stop=stop, repl=repl),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
def test_str_count(data, pat):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.count(pat))


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("na", string_na_rep_values, ids=string_na_rep_keys)
def test_str_startswith(data, pat, na):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.startswith(pat, na=na),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("na", string_na_rep_values, ids=string_na_rep_keys)
def test_str_endswith(data, pat, na):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.endswith(pat, na=na),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
def test_str_findall(data, pat):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.findall(pat))


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
def test_str_fullmatch(data, pat):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.fullmatch(pat))


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("case", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("na", string_na_rep_values, ids=string_na_rep_keys)
def test_str_match(data, pat, case, na):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.match(pat, case=case, na=na),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("expand", [False, True])
@pytest.mark.parametrize("pat", [r"([ab])", r"([ab])(\d)"])
def test_str_extract(data, expand, pat):
    modin_series, pandas_series = create_test_series(data)

    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.extract(pat, expand=expand),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_extractall(data):
    modin_series, pandas_series = create_test_series(data)

    with warns_that_defaulting_to_pandas_if(
        not df_or_series_using_native_execution(modin_series)
    ):
        # We are only testing that this defaults to pandas, so we will just check for
        # the warning
        modin_series.str.extractall(r"([ab])(\d)")


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_len(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.len())


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("to_strip", string_sep_values, ids=string_sep_keys)
def test_str_strip(data, to_strip):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series, pandas_series, lambda series: series.str.strip(to_strip=to_strip)
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("to_strip", string_sep_values, ids=string_sep_keys)
def test_str_rstrip(data, to_strip):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series, pandas_series, lambda series: series.str.rstrip(to_strip=to_strip)
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("to_strip", string_sep_values, ids=string_sep_keys)
def test_str_lstrip(data, to_strip):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series, pandas_series, lambda series: series.str.lstrip(to_strip=to_strip)
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("sep", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("expand", [False, True])
def test_str_partition(data, sep, expand):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.partition(sep, expand=expand),
        # https://github.com/modin-project/modin/issues/5971
        comparator_kwargs={"check_dtypes": sep is not None},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("sep", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("expand", [False, True])
def test_str_rpartition(data, sep, expand):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.rpartition(sep, expand=expand),
        # https://github.com/modin-project/modin/issues/5971
        comparator_kwargs={"check_dtypes": sep is not None},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_lower(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.lower())


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_upper(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.upper())


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_title(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.title())


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("sub", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("start", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("end", int_arg_values, ids=int_arg_keys)
def test_str_find(data, sub, start, end):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.find(sub, start=start, end=end),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("sub", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize("start", int_arg_values, ids=int_arg_keys)
@pytest.mark.parametrize("end", int_arg_values, ids=int_arg_keys)
def test_str_rfind(data, sub, start, end):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.rfind(sub, start=start, end=end),
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("sub", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize(
    "start, end",
    [(0, None), (1, -1), (1, 3)],
    ids=["default", "non_default_working", "exception"],
)
def test_str_index(data, sub, start, end, request):
    modin_series, pandas_series = create_test_series(data)
    expected_exception = None
    if "exception-comma sep" in request.node.callspec.id:
        expected_exception = ValueError("substring not found")
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.index(sub, start=start, end=end),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("sub", string_sep_values, ids=string_sep_keys)
@pytest.mark.parametrize(
    "start, end",
    [(0, None), (1, -1), (1, 3)],
    ids=["default", "non_default_working", "exception"],
)
def test_str_rindex(data, sub, start, end, request):
    modin_series, pandas_series = create_test_series(data)
    expected_exception = None
    if "exception-comma sep" in request.node.callspec.id:
        expected_exception = ValueError("substring not found")
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.rindex(sub, start=start, end=end),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_capitalize(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.capitalize())


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_swapcase(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.swapcase())


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize(
    "form", ["NFC", "NFKC", "NFD", "NFKD"], ids=["NFC", "NFKC", "NFD", "NFKD"]
)
def test_str_normalize(data, form):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.normalize(form))


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
@pytest.mark.parametrize("pat", string_sep_values, ids=string_sep_keys)
def test_str_translate(data, pat):
    modin_series, pandas_series = create_test_series(data)

    # Test none table
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.translate(None),
        # https://github.com/modin-project/modin/issues/5970
        comparator_kwargs={"check_dtypes": False},
    )

    # Translation dictionary
    table = {pat: "DDD"}
    eval_general(
        modin_series, pandas_series, lambda series: series.str.translate(table)
    )

    # Translation table with maketrans (python3 only)
    if pat is not None:
        table = str.maketrans(pat, "d" * len(pat))
        eval_general(
            modin_series, pandas_series, lambda series: series.str.translate(table)
        )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_isalnum(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.isalnum(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_isalpha(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.isalpha(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_isdigit(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.isdigit(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_isspace(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.isspace(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_islower(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.islower(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_isupper(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.isupper(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_istitle(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.istitle(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_isnumeric(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.isnumeric(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_str_isdecimal(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.str.isdecimal(),
        # https://github.com/modin-project/modin/issues/5969
        comparator_kwargs={"check_dtypes": False},
    )


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_casefold(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(modin_series, pandas_series, lambda series: series.str.casefold())


@pytest.fixture
def str_encode_decode_test_data() -> list[str]:
    return [
        "abC|DeF,Hik",
        "234,3245.67",
        "gSaf,qWer|Gre",
        "asd3,4sad|",
        np.nan,
        None,
        # add a string that we can't encode in ascii, and whose utf-8 encoding
        # we cannot decode in ascii
        "ക",
    ]


@pytest.mark.parametrize("encoding", encoding_types)
@pytest.mark.parametrize("errors", ["strict", "ignore", "replace"])
def test_str_encode(encoding, errors, str_encode_decode_test_data):
    expected_exception = None
    if errors == "strict" and encoding == "ascii":
        # quite safe to check only types
        expected_exception = False
    eval_general(
        *create_test_series(str_encode_decode_test_data),
        lambda s: s.str.encode(encoding, errors=errors),
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize(
    "encoding",
    encoding_types,
)
@pytest.mark.parametrize("errors", ["strict", "ignore", "replace"])
def test_str_decode(encoding, errors, str_encode_decode_test_data):
    expected_exception = None
    if errors == "strict":
        # it's quite safe here to check only types of exceptions
        expected_exception = False
    eval_general(
        *create_test_series(
            [
                s.encode("utf-8") if isinstance(s, str) else s
                for s in str_encode_decode_test_data
            ]
        ),
        lambda s: s.str.decode(encoding, errors=errors),
        expected_exception=expected_exception,
    )


def test_list_general():
    pa = pytest.importorskip("pyarrow")

    # Copied from pandas examples
    modin_series, pandas_series = create_test_series(
        [
            [1, 2, 3],
            [3],
        ],
        dtype=pd.ArrowDtype(pa.list_(pa.int64())),
    )
    eval_general(modin_series, pandas_series, lambda series: series.list.flatten())
    eval_general(modin_series, pandas_series, lambda series: series.list.len())
    eval_general(modin_series, pandas_series, lambda series: series.list[0])


def test_struct_general():
    pa = pytest.importorskip("pyarrow")

    # Copied from pandas examples
    modin_series, pandas_series = create_test_series(
        [
            {"version": 1, "project": "pandas"},
            {"version": 2, "project": "pandas"},
            {"version": 1, "project": "numpy"},
        ],
        dtype=pd.ArrowDtype(
            pa.struct([("version", pa.int64()), ("project", pa.string())])
        ),
    )
    eval_general(modin_series, pandas_series, lambda series: series.struct.dtypes)
    eval_general(
        modin_series, pandas_series, lambda series: series.struct.field("project")
    )
    eval_general(modin_series, pandas_series, lambda series: series.struct.explode())

    # nested struct types
    version_type = pa.struct(
        [
            ("major", pa.int64()),
            ("minor", pa.int64()),
        ]
    )
    modin_series, pandas_series = create_test_series(
        [
            {"version": {"major": 1, "minor": 5}, "project": "pandas"},
            {"version": {"major": 2, "minor": 1}, "project": "pandas"},
            {"version": {"major": 1, "minor": 26}, "project": "numpy"},
        ],
        dtype=pd.ArrowDtype(
            pa.struct([("version", version_type), ("project", pa.string())])
        ),
    )
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.struct.field(["version", "minor"]),
    )


def _case_when_caselists():
    def permutations(values):
        return [
            p
            for r in range(1, len(values) + 1)
            for p in itertools.permutations(values, r)
        ]

    conditions = permutations(
        [
            [True, False, False, False] * 10,
            pandas.Series([True, False, False, False] * 10),
            pandas.Series([True, False, False, False] * 10, index=range(78, -2, -2)),
            lambda df: df.gt(0),
        ]
    )
    replacements = permutations([[0, 3, 4, 5] * 10, 0, lambda df: 1])
    caselists = []
    for c in conditions:
        for r in replacements:
            if len(c) == len(r):
                caselists.append(list(zip(c, r)))
    return caselists


@pytest.mark.parametrize(
    "base",
    [
        pandas.Series(range(40)),
        pandas.Series([0, 7, 8, 9] * 10, name="c", index=range(0, 80, 2)),
    ],
)
@pytest.mark.parametrize(
    "caselist",
    _case_when_caselists(),
)
@pytest.mark.skipif(
    Engine.get() == "Dask",
    reason="https://github.com/modin-project/modin/issues/7148",
)
def test_case_when(base, caselist):
    pandas_result = base.case_when(caselist)
    modin_bases = [pd.Series(base)]

    # 'base' and serieses from 'caselist' must have equal lengths, however in this test we want
    # to verify that 'case_when' works correctly even if partitioning of 'base' and 'caselist' isn't equal.
    # BaseOnPython always uses a single partition, thus skipping this test for them.
    if not (
        f"{StorageFormat.get()}On{Engine.get()}" == "BaseOnPython"
        or current_execution_is_native()
    ):
        # we can only import this function for partitioned execution modes.
        from modin.tests.core.storage_formats.pandas.test_internals import (
            construct_modin_df_by_scheme,
        )

        modin_base_repart = construct_modin_df_by_scheme(
            base.to_frame(),
            partitioning_scheme={"row_lengths": [14, 14, 12], "column_widths": [1]},
        ).squeeze(axis=1)
        assert (
            modin_bases[0]._query_compiler._modin_frame._partitions.shape
            != modin_base_repart._query_compiler._modin_frame._partitions.shape
        )
        modin_base_repart.name = base.name
        modin_bases.append(modin_base_repart)

    for modin_base in modin_bases:
        df_equals(pandas_result, modin_base.case_when(caselist))
        if any(
            isinstance(data, pandas.Series)
            for case_tuple in caselist
            for data in case_tuple
        ):
            caselist = [
                tuple(
                    pd.Series(data) if isinstance(data, pandas.Series) else data
                    for data in case_tuple
                )
                for case_tuple in caselist
            ]
            df_equals(pandas_result, modin_base.case_when(caselist))


@pytest.mark.parametrize("data", test_string_data_values, ids=test_string_data_keys)
def test_non_commutative_add_string_to_series(data):
    # This test checks that add and radd do different things when addition is
    # not commutative, e.g. for adding a string to a string. For context see
    # https://github.com/modin-project/modin/issues/4908
    eval_general(*create_test_series(data), lambda s: "string" + s)
    eval_general(*create_test_series(data), lambda s: s + "string")


def test_non_commutative_multiply_pandas():
    # The non commutative integer class implementation is tricky. Check that
    # multiplying such an integer with a pandas series is really not
    # commutative.
    pandas_series = pandas.Series(1, dtype=int)
    integer = NonCommutativeMultiplyInteger(2)
    assert not (integer * pandas_series).equals(pandas_series * integer)


def test_non_commutative_multiply():
    # This test checks that mul and rmul do different things when
    # multiplication is not commutative, e.g. for adding a string to a string.
    # For context see https://github.com/modin-project/modin/issues/5238
    modin_series, pandas_series = create_test_series(1, dtype=int)
    integer = NonCommutativeMultiplyInteger(2)
    eval_general(modin_series, pandas_series, lambda s: integer * s)
    eval_general(modin_series, pandas_series, lambda s: s * integer)


@pytest.mark.parametrize(
    "is_sparse_data", [True, False], ids=["is_sparse", "is_not_sparse"]
)
def test_hasattr_sparse(is_sparse_data):
    modin_df, pandas_df = (
        create_test_series(
            pandas.arrays.SparseArray(test_data["float_nan_data"].values())
        )
        if is_sparse_data
        else create_test_series(test_data["float_nan_data"])
    )
    eval_general(modin_df, pandas_df, lambda df: hasattr(df, "sparse"))


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_categories(data):
    modin_series, pandas_series = create_test_series(data.copy())
    df_equals(modin_series.cat.categories, pandas_series.cat.categories)

    def set_categories(ser):
        ser.cat.categories = list("qwert")
        return ser

    # pandas 2.0.0: Removed setting Categorical.categories directly (GH47834)
    # Just check the exception
    expected_exception = AttributeError("can't set attribute")
    if sys.version_info >= (3, 10):
        # The exception message varies across different versions of Python
        expected_exception = False
    eval_general(
        modin_series,
        pandas_series,
        set_categories,
        expected_exception=expected_exception,
    )


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_ordered(data):
    modin_series, pandas_series = create_test_series(data.copy())
    assert modin_series.cat.ordered == pandas_series.cat.ordered


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_codes(data):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_result = pandas_series.cat.codes
    modin_result = modin_series.cat.codes
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "set_min_row_partition_size",
    [1, 2],
    ids=["four_row_partitions", "two_row_partitions"],
    indirect=True,
)
def test_cat_codes_issue5650(set_min_row_partition_size):
    data = {"name": ["abc", "def", "ghi", "jkl"]}
    pandas_df = pandas.DataFrame(data)
    pandas_df = pandas_df.astype("category")
    modin_df = pd.DataFrame(data)
    modin_df = modin_df.astype("category")
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df["name"].cat.codes,
        comparator_kwargs={"check_dtypes": True},
    )


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_rename_categories(data):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_result = pandas_series.cat.rename_categories(list("qwert"))
    modin_result = modin_series.cat.rename_categories(list("qwert"))
    df_equals(modin_series, pandas_series)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
@pytest.mark.parametrize("ordered", bool_arg_values, ids=bool_arg_keys)
def test_cat_reorder_categories(data, ordered):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_result = pandas_series.cat.reorder_categories(list("tades"), ordered=ordered)
    modin_result = modin_series.cat.reorder_categories(list("tades"), ordered=ordered)
    df_equals(modin_series, pandas_series)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_add_categories(data):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_result = pandas_series.cat.add_categories(list("qw"))
    modin_result = modin_series.cat.add_categories(list("qw"))
    df_equals(modin_series, pandas_series)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_remove_categories(data):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_result = pandas_series.cat.remove_categories(list("at"))
    modin_result = modin_series.cat.remove_categories(list("at"))
    df_equals(modin_series, pandas_series)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_remove_unused_categories(data):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_series[1] = np.nan
    pandas_result = pandas_series.cat.remove_unused_categories()
    modin_series[1] = np.nan
    modin_result = modin_series.cat.remove_unused_categories()
    df_equals(modin_series, pandas_series)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
@pytest.mark.parametrize("ordered", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("rename", [True, False])
def test_cat_set_categories(data, ordered, rename):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_result = pandas_series.cat.set_categories(
        list("qwert"), ordered=ordered, rename=rename
    )
    modin_result = modin_series.cat.set_categories(
        list("qwert"), ordered=ordered, rename=rename
    )
    df_equals(modin_series, pandas_series)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_as_ordered(data):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_result = pandas_series.cat.as_ordered()
    modin_result = modin_series.cat.as_ordered()
    df_equals(modin_series, pandas_series)
    df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
    "data", test_data_categorical_values, ids=test_data_categorical_keys
)
def test_cat_as_unordered(data):
    modin_series, pandas_series = create_test_series(data.copy())
    pandas_result = pandas_series.cat.as_unordered()
    modin_result = modin_series.cat.as_unordered()
    df_equals(modin_series, pandas_series)
    df_equals(modin_result, pandas_result)


def test_peculiar_callback():
    def func(val):
        if not isinstance(val, tuple):
            raise BaseException("Urgh...")
        return val

    pandas_df = pandas.DataFrame({"col": [(0, 1)]})
    pandas_series = pandas_df["col"].apply(func)

    modin_df = pd.DataFrame({"col": [(0, 1)]})
    modin_series = modin_df["col"].apply(func)

    df_equals(modin_series, pandas_series)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_apply_return_df(data):
    modin_series, pandas_series = create_test_series(data)
    eval_general(
        modin_series,
        pandas_series,
        lambda series: series.apply(
            lambda x: pandas.Series([x + i for i in range(100)])
        ),
    )


@pytest.mark.parametrize(
    "apply_function",
    (
        lambda series, function: function(series),
        lambda series, function: series.apply(function),
        lambda series, function: series.map(function),
    ),
)
@pytest.mark.parametrize("function", UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS)
def test_unary_numpy_universal_function_issue_6483_and_7645(function, apply_function):
    eval_general(
        *create_test_series(test_data["float_nan_data"]),
        lambda series: apply_function(series, function),
    )


def test_binary_numpy_universal_function_issue_6483():
    eval_general(
        *create_test_series(test_data["float_nan_data"]),
        lambda series: np.arctan2(series, np.sin(series)),
    )


def test__reduce__():
    # `Series.__reduce__` will be called implicitly when lambda expressions are
    # pre-processed for the distributed engine.
    series_data = ["Major League Baseball", "National Basketball Association"]
    abbr_md, abbr_pd = create_test_series(series_data, index=["MLB", "NBA"])

    dataframe_data = {
        "name": ["Mariners", "Lakers"] * 500,
        "league_abbreviation": ["MLB", "NBA"] * 500,
    }
    teams_md, teams_pd = create_test_dfs(dataframe_data)

    result_md = (
        teams_md.set_index("name")
        .league_abbreviation.apply(lambda abbr: abbr_md.loc[abbr])
        .rename("league")
    )

    result_pd = (
        teams_pd.set_index("name")
        .league_abbreviation.apply(lambda abbr: abbr_pd.loc[abbr])
        .rename("league")
    )
    df_equals(result_md, result_pd)


@pytest.mark.parametrize(
    "op",
    [
        "add",
        "radd",
        "divmod",
        "eq",
        "floordiv",
        "ge",
        "gt",
        "le",
        "lt",
        "mod",
        "mul",
        "rmul",
        "ne",
        "pow",
        "rdivmod",
        "rfloordiv",
        "rmod",
        "rpow",
        "rsub",
        "rtruediv",
        "sub",
        "truediv",
    ],
)
def test_binary_with_fill_value_issue_7381(op):
    # Ensures that series binary operations respect the fill_value flag
    series_md, series_pd = create_test_series([0, 1, 2, 3])
    rhs_md, rhs_pd = create_test_series([0])
    result_md = getattr(series_md, op)(rhs_md, fill_value=2)
    result_pd = getattr(series_pd, op)(rhs_pd, fill_value=2)
    df_equals(result_md, result_pd)


@pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"])
def test_logical_binary_with_list(op):
    series_md, series_pd = create_test_series([0, 1, 2])
    rhs = [2, 1, 0]
    result_md = getattr(series_md, op)(rhs)
    result_pd = getattr(series_pd, op)(rhs)
    df_equals(result_md, result_pd)


@pytest.mark.parametrize("op", ["argmax", "argmin"])
def test_argmax_argmin_7413(op):
    # Ensures that argmin/argmax use positional index, not the actual index value
    series_md, series_pd = create_test_series([1, 2, 3], index=["b", "a", "c"])
    result_md = getattr(series_md, op)()
    result_pd = getattr(series_pd, op)()
    assert result_md == result_pd


def test_rename_axis():
    series_md, series_pd = create_test_series([0, 1, 2])
    eval_general(series_md, series_pd, lambda ser: ser.rename_axis("name"))
    eval_general(
        series_md,
        series_pd,
        lambda ser: ser.rename_axis("new_name", inplace=True),
        __inplace__=True,
    )
    # axis=1 is invalid for series
    eval_general(
        series_md,
        series_pd,
        lambda ser: ser.rename_axis("newer_name", axis=1),
        expected_exception=ValueError("No axis named 1 for object type Series"),
    )


================================================
FILE: modin/tests/pandas/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from __future__ import annotations

import csv
import functools
import itertools
import math
import os
import re
from contextlib import contextmanager
from io import BytesIO
from pathlib import Path
from string import ascii_letters
from typing import Union

import numpy as np
import pandas
import psutil
import pytest
from pandas.core.dtypes.common import (
    is_bool_dtype,
    is_datetime64_any_dtype,
    is_list_like,
    is_numeric_dtype,
    is_object_dtype,
    is_string_dtype,
    is_timedelta64_dtype,
)

import modin.pandas as pd
from modin import set_execution
from modin.config import (
    Backend,
    Engine,
    MinColumnPartitionSize,
    MinRowPartitionSize,
    NativePandasDeepCopy,
    NPartitions,
    RangePartitioning,
    StorageFormat,
    TestDatasetSize,
    TrackFileLeaks,
)
from modin.pandas.io import to_pandas
from modin.pandas.testing import (
    assert_extension_array_equal,
    assert_frame_equal,
    assert_index_equal,
    assert_series_equal,
)
from modin.utils import try_cast_to_pandas

random_state = np.random.RandomState(seed=42)

DATASET_SIZE_DICT = {
    "Small": (2**6, 2**6),
    "Normal": (2**6, 2**8),
    "Big": (2**7, 2**12),
}

# Size of test dataframes
NCOLS, NROWS = DATASET_SIZE_DICT.get(TestDatasetSize.get(), DATASET_SIZE_DICT["Normal"])
NGROUPS = 10

# Range for values for test data
RAND_LOW = 0
RAND_HIGH = 100

# Input data and functions for the tests
# The test data that we will test our code against
test_data = {
    # "empty_data": {},
    # "columns_only": {"col1": [], "col2": [], "col3": [], "col4": [], "col5": []},
    "int_data": {
        "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): random_state.randint(
            RAND_LOW, RAND_HIGH, size=(NROWS)
        )
        for i in range(NCOLS)
    },
    "float_nan_data": {
        "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): [
            (
                x
                if (j % 4 == 0 and i > NCOLS // 2) or (j != i and i <= NCOLS // 2)
                else np.nan
            )
            for j, x in enumerate(
                random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS))
            )
        ]
        for i in range(NCOLS)
    },
    # "int_float_object_data": {
    #     "col3": [1, 2, 3, 4],
    #     "col4": [4, 5, 6, 7],
    #     "col1": [8.0, 9.4, 10.1, 11.3],
    #     "col2": ["a", "b", "c", "d"],
    # },
    # "datetime_timedelta_data": {
    #     "col3": [
    #         np.datetime64("2010"),
    #         np.datetime64("2011"),
    #         np.datetime64("2011-06-15T00:00"),
    #         np.datetime64("2009-01-01"),
    #     ],
    #     "col4": [
    #         np.datetime64("2010"),
    #         np.datetime64("2011"),
    #         np.datetime64("2011-06-15T00:00"),
    #         np.datetime64("2009-01-01"),
    #     ],
    #     "col1": [
    #         np.timedelta64(1, "M"),
    #         np.timedelta64(2, "D"),
    #         np.timedelta64(3, "Y"),
    #         np.timedelta64(20, "D"),
    #     ],
    #     "col2": [
    #         np.timedelta64(1, "M"),
    #         np.timedelta64(2, "D"),
    #         np.timedelta64(3, "Y"),
    #         np.timedelta64(20, "D"),
    #     ],
    # },
    # "all_data": {
    #     "col3": 1.0,
    #     "col4": np.datetime64("2011-06-15T00:00"),
    #     "col5": np.array([3] * 4, dtype="int32"),
    #     "col1": "foo",
    #     "col2": True,
    # },
}
# The parse_dates param can take several different types and combinations of
# types. Use the following values to test date parsing on a CSV created for
# that purpose at `time_parsing_csv_path`
parse_dates_values_by_id = {
    "bool": False,
    "list_of_single_int": [0],
    "list_of_single_string": ["timestamp"],
    "list_of_list_of_strings": [["year", "month", "date"]],
    "list_of_string_and_list_of_strings": ["timestamp", ["year", "month", "date"]],
    "list_of_list_of_ints": [[1, 2, 3]],
    "list_of_list_of_strings_and_ints": [["year", 2, "date"]],
    "empty_list": [],
    "dict": {"year_and_month": [1, 2], "day": ["date"]},
    "nonexistent_string_column": ["z"],
    "nonexistent_int_column": [99],
}

# See details in #1403
test_data["int_data"]["index"] = test_data["int_data"].pop(
    "col{}".format(int(NCOLS / 2))
)

for col in test_data["float_nan_data"]:
    for row in range(NROWS // 2):
        if row % 16 == 0:
            test_data["float_nan_data"][col][row] = np.nan

test_data_values = list(test_data.values())
test_data_keys = list(test_data.keys())

test_bool_data = {
    "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): random_state.choice(
        [True, False], size=(NROWS)
    )
    for i in range(NCOLS)
}

test_groupby_data = {f"col{i}": np.arange(NCOLS) % NGROUPS for i in range(NROWS)}

test_data_resample = {
    "data": {
        f"col{i}": random_state.randint(RAND_LOW, RAND_HIGH, size=NROWS)
        for i in range(10)
    },
    "index": pandas.date_range("31/12/2000", periods=NROWS, freq="h"),
}

test_data_with_duplicates = {
    "no_duplicates": {
        "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): range(NROWS)
        for i in range(NCOLS)
    },
    "all_duplicates": {
        "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): [
            float(i) for _ in range(NROWS)
        ]
        for i in range(NCOLS)
    },
    "some_duplicates": {
        "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): [
            i if j % 7 == 0 else x for j, x in enumerate(range(NROWS))
        ]
        for i in range(NCOLS)
    },
    "has_name_column": {
        "name": ["one", "two", "two", "three"],
        "col1": [1, 2, 2, 3],
        "col3": [10, 20, 20, 3],
        "col7": [100, 201, 200, 300],
    },
    "str_columns": {
        "col_str{}".format(int((i - NCOLS / 2) % NCOLS + 1)): [
            "s" + str(x % 5) for x in range(NROWS)
        ]
        for i in range(NCOLS)
    },
}

test_data_with_duplicates["float_nan"] = test_data["float_nan_data"]

test_data_small = {
    "small": {
        "col0": [1, 2, 3, 4],
        "col1": [8.0, 9.4, 10.1, 11.3],
        "col2": [4, 5, 6, 7],
    }
}

test_data_diff_dtype = {
    "int_col": [-5, 2, 7, 16],
    "float_col": [np.nan, -9.4, 10.1, np.nan],
    "str_col": ["a", np.nan, "c", "d"],
    "bool_col": [False, True, True, False],
}

test_data_small_values = list(test_data_small.values())
test_data_small_keys = list(test_data_small.keys())

test_data_with_duplicates_values = list(test_data_with_duplicates.values())
test_data_with_duplicates_keys = list(test_data_with_duplicates.keys())

test_data_categorical = {
    "ordered": pandas.Categorical(list("testdata"), ordered=True),
    "unordered": pandas.Categorical(list("testdata"), ordered=False),
}

test_data_categorical_values = list(test_data_categorical.values())
test_data_categorical_keys = list(test_data_categorical.keys())

# Fully fill all of the partitions used in tests.
test_data_large_categorical_dataframe = {
    i: pandas.Categorical(np.arange(NPartitions.get() * MinRowPartitionSize.get()))
    for i in range(NPartitions.get() * MinColumnPartitionSize.get())
}
test_data_large_categorical_series_values = [
    pandas.Categorical(np.arange(NPartitions.get() * MinRowPartitionSize.get()))
]
test_data_large_categorical_series_keys = ["categorical_series"]

numeric_dfs = [
    "empty_data",
    "columns_only",
    "int_data",
    "float_nan_data",
    "with_index_column",
]

no_numeric_dfs = ["datetime_timedelta_data"]

# String test data
test_string_data = {
    "separator data": [
        "abC|DeF,Hik",
        "234,3245.67",
        "gSaf,qWer|Gre",
        "asd3,4sad|",
        np.nan,
    ]
}

test_string_data_values = list(test_string_data.values())
test_string_data_keys = list(test_string_data.keys())

# List of strings test data
test_string_list_data = {"simple string": [["a"], ["CdE"], ["jDf"], ["werB"]]}

test_string_list_data_values = list(test_string_list_data.values())
test_string_list_data_keys = list(test_string_list_data.keys())

string_seperators = {"comma sep": ","}

string_sep_values = list(string_seperators.values())
string_sep_keys = list(string_seperators.keys())

string_na_rep = {"None na_rep": None, "- na_rep": "-", "nan na_rep": np.nan}

string_na_rep_values = list(string_na_rep.values())
string_na_rep_keys = list(string_na_rep.keys())

join_type = {"left": "left", "right": "right", "inner": "inner", "outer": "outer"}

join_type_keys = list(join_type.keys())
join_type_values = list(join_type.values())


UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS = (
    np.negative,
    np.abs,
    np.sin,
    np.positive,
    np.absolute,
    np.fabs,
    np.rint,
    np.sign,
    np.conj,
    np.conjugate,
    np.exp,
    np.exp2,
    np.log,
    np.log2,
    np.log10,
    np.expm1,
    np.log1p,
    np.sqrt,
    np.square,
    np.cbrt,
    np.reciprocal,
    np.sin,
    np.cos,
    np.tan,
    np.arcsin,
    np.arccos,
    np.arctan,
    np.sinh,
    np.cosh,
    np.tanh,
    np.arcsinh,
    np.arccosh,
    np.arctanh,
    np.degrees,
    np.radians,
    np.deg2rad,
    np.rad2deg,
    np.logical_not,
    np.isfinite,
    np.isinf,
    np.isnan,
    np.fabs,
    np.signbit,
    np.spacing,
    np.floor,
    np.ceil,
    np.trunc,
)

# Test functions for applymap
test_func = {
    "plus one": lambda x: x + 1,
    "convert to string": str,
    "square": lambda x: x * x,
    "identity": lambda x: x,
    "return false": lambda x: False,
    **{func.__name__: func for func in UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS},
}
test_func_keys = list(test_func.keys())
test_func_values = list(test_func.values())

numeric_test_funcs = ["plus one", "square"]

# Test functions for query
query_func = {
    "col1 < col2": "col1 < col2",
    "col3 > col4": "col3 > col4",
    "col1 == col2": "col1 == col2",
    "(col2 > col1) and (col1 < col3)": "(col2 > col1) and (col1 < col3)",
    # this is how to query for values of an unnamed index per
    # https://pandas.pydata.org/docs/user_guide/indexing.html#multiindex-query-syntax
    "ilevel_0 % 2 == 1": "ilevel_0 % 2 == 1",
}
query_func_keys = list(query_func.keys())
query_func_values = list(query_func.values())

# Test agg functions for apply, agg, and aggregate
agg_func = {
    "sum": "sum",
    "df sum": lambda df: df.sum(),
    "str": str,
    "sum mean": ["sum", "mean"],
    "sum df sum": ["sum", lambda df: df.sum()],
    # The case verifies that returning a scalar that is based on a frame's data doesn't cause a problem
    "sum of certain elements": lambda axis: (
        axis.iloc[0] + axis.iloc[-1] if isinstance(axis, pandas.Series) else axis + axis
    ),
    "should raise AssertionError": 1,
}
agg_func_keys = list(agg_func.keys())
agg_func_values = list(agg_func.values())

# For this sort of parameters pandas throws an exception.
# See details in pandas issue 36036.
agg_func_except = {
    "sum sum": ["sum", "sum"],
}
agg_func_except_keys = list(agg_func_except.keys())
agg_func_except_values = list(agg_func_except.values())

numeric_agg_funcs = ["sum mean", "sum sum", "sum df sum"]

udf_func = {
    "return self": lambda x, *args, **kwargs: type(x)(x.values),
    "change index": lambda x, *args, **kwargs: pandas.Series(
        x.values, index=np.arange(-1, len(x.index) - 1)
    ),
    "return none": lambda x, *args, **kwargs: None,
    "return empty": lambda x, *args, **kwargs: pandas.Series(),
    "access self": lambda x, other, *args, **kwargs: pandas.Series(
        x.values, index=other.index
    ),
}
udf_func_keys = list(udf_func.keys())
udf_func_values = list(udf_func.values())

# Test q values for quantiles
quantiles = {
    "0.25": 0.25,
    "0.5": 0.5,
    "0.75": 0.75,
    "0.66": 0.66,
    "0.01": 0.01,
    "list": [0.25, 0.5, 0.75, 0.66, 0.01],
}
quantiles_keys = list(quantiles.keys())
quantiles_values = list(quantiles.values())

# Test indices for get, set_index, __contains__, insert
indices = {
    "col1": "col1",
    "col2": "col2",
    "A": "A",
    "B": "B",
    "does not exist": "does not exist",
}
indices_keys = list(indices.keys())
indices_values = list(indices.values())

# Test functions for groupby apply
groupby_apply_func = {"sum": lambda df: df.sum(), "negate": lambda df: -df}
groupby_apply_func_keys = list(groupby_apply_func.keys())
groupby_apply_func_values = list(groupby_apply_func.values())

# Test functions for groupby agg
groupby_agg_func = {"min": "min", "max": "max"}
groupby_agg_func_keys = list(groupby_agg_func.keys())
groupby_agg_func_values = list(groupby_agg_func.values())

# Test functions for groupby transform
groupby_transform_func = {
    "add 4": lambda df: df + 4,
    "negatie and minus 10": lambda df: -df - 10,
}
groupby_transform_func_keys = list(groupby_transform_func.keys())
groupby_transform_func_values = list(groupby_transform_func.values())

# Test functions for groupby pipe
groupby_pipe_func = {"sum": lambda df: df.sum()}
groupby_pipe_func_keys = list(groupby_pipe_func.keys())
groupby_pipe_func_values = list(groupby_pipe_func.values())

# END Test input data and functions

# Parametrizations of common kwargs
axis = {
    "over_rows_int": 0,
    "over_rows_str": "rows",
    "over_columns_int": 1,
    "over_columns_str": "columns",
}
axis_keys = list(axis.keys())
axis_values = list(axis.values())

bool_arg = {"True": True, "False": False, "None": None}
bool_arg_keys = list(bool_arg.keys())
bool_arg_values = list(bool_arg.values())

int_arg = {"-5": -5, "-1": -1, "0": 0, "1": 1, "5": 5}
int_arg_keys = list(int_arg.keys())
int_arg_values = list(int_arg.values())

# END parametrizations of common kwargs

json_short_string = """[{"project": "modin"}]"""
json_long_string = """{
        "quiz": {
            "sport": {
                "q1": {
                    "question": "Which one is correct team name in NBA?",
                    "options": [
                        "New York Bulls",
                        "Los Angeles Kings",
                        "Golden State Warriros",
                        "Huston Rocket"
                    ],
                    "answer": "Huston Rocket"
                }
            },
            "maths": {
                "q1": {
                    "question": "5 + 7 = ?",
                    "options": [
                        "10",
                        "11",
                        "12",
                        "13"
                    ],
                    "answer": "12"
                },
                "q2": {
                    "question": "12 - 8 = ?",
                    "options": [
                        "1",
                        "2",
                        "3",
                        "4"
                    ],
                    "answer": "4"
                }
            }
        }
    }"""
json_long_bytes = BytesIO(json_long_string.encode(encoding="UTF-8"))
json_short_bytes = BytesIO(json_short_string.encode(encoding="UTF-8"))


# Text encoding types
encoding_types = [
    "ascii",
    "utf_32",
    "utf_32_be",
    "utf_32_le",
    "utf_16",
    "utf_16_be",
    "utf_16_le",
    "utf_7",
    "utf_8",
    "utf_8_sig",
]

default_to_pandas_ignore_string = "default:.*defaulting to pandas.*:UserWarning"

# Files compression to extension mapping
COMP_TO_EXT = {"gzip": "gz", "bz2": "bz2", "xz": "xz", "zip": "zip"}


time_parsing_csv_path = "modin/tests/pandas/data/test_time_parsing.csv"


class CustomIntegerForAddition:
    def __init__(self, value: int):
        self.value = value

    def __add__(self, other):
        return self.value + other

    def __radd__(self, other):
        return other + self.value


class NonCommutativeMultiplyInteger:
    """int-like class with non-commutative multiply operation.

    We need to test that rmul and mul do different things even when
    multiplication is not commutative, but almost all multiplication is
    commutative. This class' fake multiplication overloads are not commutative
    when you multiply an instance of this class with pandas.series, which
    does not know how to __mul__ with this class. e.g.

    NonCommutativeMultiplyInteger(2) * pd.Series(1, dtype=int) == pd.Series(2, dtype=int)
    pd.Series(1, dtype=int) * NonCommutativeMultiplyInteger(2) == pd.Series(3, dtype=int)
    """

    def __init__(self, value: int):
        if not isinstance(value, int):
            raise TypeError(
                f"must initialize with integer, but got {value} of type {type(value)}"
            )
        self.value = value

    def __mul__(self, other):
        # Note that we need to check other is an int, otherwise when we (left) mul
        # this with a series, we'll just multiply self.value by the series, whereas
        # we want to make the series do an rmul instead.
        if not isinstance(other, int):
            return NotImplemented
        return self.value * other

    def __rmul__(self, other):
        return self.value * other + 1


def categories_equals(left, right):
    assert (left.ordered and right.ordered) or (not left.ordered and not right.ordered)
    assert_extension_array_equal(left, right)


def df_categories_equals(df1, df2):
    if not hasattr(df1, "select_dtypes"):
        if isinstance(df1, pandas.CategoricalDtype):
            categories_equals(df1, df2)
        elif isinstance(getattr(df1, "dtype"), pandas.CategoricalDtype) and isinstance(
            getattr(df2, "dtype"), pandas.CategoricalDtype
        ):
            categories_equals(df1.dtype, df2.dtype)
        return True

    df1_categorical = df1.select_dtypes(include="category")
    df2_categorical = df2.select_dtypes(include="category")
    assert df1_categorical.columns.equals(df2_categorical.columns)
    # Use an index instead of a column name to iterate through columns. There
    # may be duplicate colum names. e.g. if two columns are named col1,
    # selecting df1_categorical["col1"] gives a dataframe of width 2 instead of a series.
    for i in range(len(df1_categorical.columns)):
        assert_extension_array_equal(
            df1_categorical.iloc[:, i].values,
            df2_categorical.iloc[:, i].values,
            check_dtype=False,
        )


def assert_empty_frame_equal(df1, df2):
    """
    Test if df1 and df2 are empty.

    Parameters
    ----------
    df1 : pandas.DataFrame or pandas.Series
    df2 : pandas.DataFrame or pandas.Series

    Raises
    ------
    AssertionError
        If check fails.
    """

    if (df1.empty and not df2.empty) or (df2.empty and not df1.empty):
        assert False, "One of the passed frames is empty, when other isn't"
    elif df1.empty and df2.empty and type(df1) is not type(df2):
        assert False, f"Empty frames have different types: {type(df1)} != {type(df2)}"


def assert_all_act_same(condition, *objs):
    """
    Assert that all of the objs give the same boolean result for the passed condition (either all True or all False).

    Parameters
    ----------
    condition : callable(obj) -> bool
        Condition to run on the passed objects.
    *objs :
        Objects to pass to the condition.

    Returns
    -------
    bool
        Result of the condition.
    """
    results = [condition(obj) for obj in objs]
    if len(results) < 2:
        return results[0] if len(results) else None

    assert all(results[0] == res for res in results[1:])
    return results[0]


def assert_dtypes_equal(df1, df2):
    """
    Assert that the two passed DataFrame/Series objects have equal dtypes.

    The function doesn't require that the dtypes are identical, it has the following reliefs:
        1. The dtypes are not required to be in the same order
           (e.g. {"col1": int, "col2": float} == {"col2": float, "col1": int})
        2. The dtypes are only required to be in the same class
           (e.g. both numerical, both categorical, etc...)

    Parameters
    ----------
    df1 : DataFrame or Series
    df2 : DataFrame or Series
    """
    if not isinstance(
        df1, (pandas.Series, pd.Series, pandas.DataFrame, pd.DataFrame)
    ) or not isinstance(
        df2, (pandas.Series, pd.Series, pandas.DataFrame, pd.DataFrame)
    ):
        return

    if isinstance(df1.dtypes, (pandas.Series, pd.Series)):
        dtypes1 = df1.dtypes
        dtypes2 = df2.dtypes
    else:
        # Case when `dtypes` is a scalar
        dtypes1 = pandas.Series({"col": df1.dtypes})
        dtypes2 = pandas.Series({"col": df2.dtypes})

    # Don't require for dtypes to be in the same order
    assert len(dtypes1.index.difference(dtypes2.index)) == 0
    assert len(dtypes1) == len(dtypes2)

    dtype_comparators = (
        is_numeric_dtype,
        lambda obj: is_object_dtype(obj) or is_string_dtype(obj),
        is_bool_dtype,
        lambda obj: isinstance(obj, pandas.CategoricalDtype),
        is_datetime64_any_dtype,
        is_timedelta64_dtype,
        lambda obj: isinstance(obj, pandas.PeriodDtype),
    )

    for idx in range(len(dtypes1)):
        for comparator in dtype_comparators:
            if assert_all_act_same(comparator, dtypes1.iloc[idx], dtypes2.iloc[idx]):
                # We met a dtype that both types satisfy, so we can stop iterating
                # over comparators and compare next dtypes
                break


def assert_set_of_rows_identical(df1, df2):
    """
    Assert that the set of rows for the passed dataframes is identical.

    Works much slower than ``df1.equals(df2)``, so it's recommended to use this
    function only in exceptional cases.
    """
    # replacing NaN with None to pass the comparison: 'NaN == NaN -> false; None == None -> True'
    df1, df2 = map(
        lambda df: (df.to_frame() if df.ndim == 1 else df).replace({np.nan: None}),
        (df1, df2),
    )
    rows1 = set((idx, *row.tolist()) for idx, row in df1.iterrows())
    rows2 = set((idx, *row.tolist()) for idx, row in df2.iterrows())
    assert rows1 == rows2


def sort_data(data):
    """Sort the passed sequence."""
    if isinstance(data, (pandas.DataFrame, pd.DataFrame)):
        return data.sort_values(data.columns.to_list(), ignore_index=True)
    elif isinstance(data, (pandas.Series, pd.Series)):
        return data.sort_values()
    else:
        return np.sort(data)


def sort_if_range_partitioning(df1, df2, comparator=None, force=False):
    """Sort the passed objects if 'RangePartitioning' is enabled and compare the sorted results."""
    if comparator is None:
        comparator = df_equals

    if force or RangePartitioning.get():
        df1, df2 = sort_data(df1), sort_data(df2)

    comparator(df1, df2)


def df_equals(df1, df2, check_dtypes=True):
    """Tests if df1 and df2 are equal.

    Args:
        df1: (pandas or modin DataFrame or series) dataframe to test if equal.
        df2: (pandas or modin DataFrame or series) dataframe to test if equal.

    Returns:
        True if df1 is equal to df2.
    """
    # Gets AttributError if modin's groupby object is not import like this
    from modin.pandas.groupby import DataFrameGroupBy

    groupby_types = (pandas.core.groupby.DataFrameGroupBy, DataFrameGroupBy)

    # The typing behavior of how pandas treats its index is not consistent when the
    # length of the DataFrame or Series is 0, so we just verify that the contents are
    # the same.
    if (
        hasattr(df1, "index")
        and hasattr(df2, "index")
        and len(df1) == 0
        and len(df2) == 0
    ):
        if type(df1).__name__ == type(df2).__name__:
            if hasattr(df1, "name") and hasattr(df2, "name") and df1.name == df2.name:
                return
            if (
                hasattr(df1, "columns")
                and hasattr(df2, "columns")
                and df1.columns.equals(df2.columns)
            ):
                return
        assert False

    if isinstance(df1, (list, tuple)) and all(
        isinstance(d, (pd.DataFrame, pd.Series, pandas.DataFrame, pandas.Series))
        for d in df1
    ):
        assert isinstance(df2, type(df1)), "Different type of collection"
        assert len(df1) == len(df2), "Different length result"
        return (df_equals(d1, d2) for d1, d2 in zip(df1, df2))

    if check_dtypes:
        assert_dtypes_equal(df1, df2)

    # Convert to pandas
    if isinstance(df1, (pd.DataFrame, pd.Series)):
        df1 = to_pandas(df1)
    if isinstance(df2, (pd.DataFrame, pd.Series)):
        df2 = to_pandas(df2)

    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):
        assert_empty_frame_equal(df1, df2)

    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):
        assert_frame_equal(
            df1,
            df2,
            check_dtype=False,
            check_datetimelike_compat=True,
            check_index_type=False,
            check_column_type=False,
            check_categorical=False,
        )
        df_categories_equals(df1, df2)
    elif isinstance(df1, pandas.Index) and isinstance(df2, pandas.Index):
        assert_index_equal(df1, df2)
    elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series):
        assert_series_equal(df1, df2, check_dtype=False, check_series_type=False)
    elif (
        hasattr(df1, "dtype")
        and hasattr(df2, "dtype")
        and isinstance(df1.dtype, pandas.core.dtypes.dtypes.ExtensionDtype)
        and isinstance(df2.dtype, pandas.core.dtypes.dtypes.ExtensionDtype)
    ):
        assert_extension_array_equal(df1, df2)
    elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types):
        for g1, g2 in zip(df1, df2):
            assert g1[0] == g2[0]
            df_equals(g1[1], g2[1])
    elif (
        isinstance(df1, pandas.Series)
        and isinstance(df2, pandas.Series)
        and df1.empty
        and df2.empty
    ):
        assert all(df1.index == df2.index)
        assert df1.dtypes == df2.dtypes
    elif isinstance(df1, pandas.core.arrays.NumpyExtensionArray):
        assert isinstance(df2, pandas.core.arrays.NumpyExtensionArray)
        assert df1 == df2
    elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray):
        np.testing.assert_array_equal(df1, df2)
    else:
        res = df1 != df2
        if res.any() if isinstance(res, np.ndarray) else res:
            np.testing.assert_almost_equal(df1, df2)


def modin_df_almost_equals_pandas(modin_df, pandas_df, max_diff=0.0001):
    df_categories_equals(modin_df._to_pandas(), pandas_df)

    modin_df = to_pandas(modin_df)

    if hasattr(modin_df, "select_dtypes"):
        modin_df = modin_df.select_dtypes(exclude=["category"])
    if hasattr(pandas_df, "select_dtypes"):
        pandas_df = pandas_df.select_dtypes(exclude=["category"])

    if modin_df.equals(pandas_df):
        return

    isna = modin_df.isna().all()
    if isinstance(isna, bool):
        if isna:
            assert pandas_df.isna().all()
            return
    elif isna.all():
        assert pandas_df.isna().all().all()
        return

    diff = (modin_df - pandas_df).abs()
    diff /= pandas_df.abs()
    diff_max = diff.max() if isinstance(diff, pandas.Series) else diff.max().max()
    assert diff_max < max_diff, f"{diff_max} >= {max_diff}"


def try_modin_df_almost_equals_compare(df1, df2):
    """Compare two dataframes as nearly equal if possible, otherwise compare as completely equal."""
    # `modin_df_almost_equals_pandas` is numeric-only comparator
    dtypes1, dtypes2 = [
        dtype if is_list_like(dtype := df.dtypes) else [dtype] for df in (df1, df2)
    ]
    if all(map(is_numeric_dtype, dtypes1)) and all(map(is_numeric_dtype, dtypes2)):
        modin_df_almost_equals_pandas(df1, df2)
    else:
        df_equals(df1, df2)


def df_is_empty(df):
    """Tests if df is empty.

    Args:
        df: (pandas or modin DataFrame) dataframe to test if empty.

    Returns:
        True if df is empty.
    """
    assert df.size == 0 and df.empty
    assert df.shape[0] == 0 or df.shape[1] == 0


def arg_keys(arg_name, keys):
    """Appends arg_name to the front of all values in keys.

    Args:
        arg_name: (string) String containing argument name.
        keys: (list of strings) Possible inputs of argument.

    Returns:
        List of strings with arg_name append to front of keys.
    """
    return ["{0}_{1}".format(arg_name, key) for key in keys]


def name_contains(test_name, vals):
    """Determines if any string in vals is a substring of test_name.

    Args:
        test_name: (string) String to determine if contains substrings.
        vals: (list of strings) List of substrings to test for.

    Returns:
        True if a substring in vals is in test_name, else False.
    """
    return any(val in test_name for val in vals)


def check_df_columns_have_nans(df, cols):
    """Checks if there are NaN values in specified columns of a dataframe.

    :param df: Dataframe to check.
    :param cols: One column name or list of column names.
    :return:
        True if specified columns of dataframe contains NaNs.
    """
    return (
        pandas.api.types.is_list_like(cols)
        and (
            any(isinstance(x, str) and x in df.columns and df[x].hasnans for x in cols)
            or any(
                isinstance(x, pd.Series) and x._parent is df and x.hasnans for x in cols
            )
        )
    ) or (
        not pandas.api.types.is_list_like(cols)
        and cols in df.columns
        and df[cols].hasnans
    )


class NoModinException(Exception):
    pass


def eval_general(
    modin_df,
    pandas_df,
    operation,
    comparator=df_equals,
    __inplace__=False,
    expected_exception=None,
    check_kwargs_callable=True,
    md_extra_kwargs=None,
    comparator_kwargs=None,
    check_for_execution_propagation=True,
    no_check_for_execution_propagation_reason=None,
    **kwargs,
):
    md_kwargs, pd_kwargs = {}, {}

    if isinstance(modin_df, (pd.DataFrame, pd.Series)):
        original_engine = modin_df._query_compiler.engine
        original_storage_format = modin_df._query_compiler.storage_format
    else:
        original_engine = None
        original_storage_format = None

    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
        try:
            pd_result = fn(pandas_df, **pd_kwargs)
        except Exception as pd_e:
            try:
                if inplace:
                    _ = fn(modin_df, **md_kwargs)
                    try_cast_to_pandas(modin_df)  # force materialization
                else:
                    try_cast_to_pandas(
                        fn(modin_df, **md_kwargs)
                    )  # force materialization
            except Exception as md_e:
                assert isinstance(
                    md_e, type(pd_e)
                ), "Got Modin Exception type {}, but pandas Exception type {} was expected".format(
                    type(md_e), type(pd_e)
                )
                if expected_exception:
                    if Engine.get() == "Ray":
                        from ray.exceptions import RayTaskError

                        # unwrap ray exceptions from remote worker
                        if isinstance(md_e, RayTaskError):
                            md_e = md_e.args[0]
                    assert (
                        type(md_e) is type(expected_exception)
                        and md_e.args == expected_exception.args
                    ), f"not acceptable Modin's exception: [{repr(md_e)}]"
                    assert (
                        pd_e.args == expected_exception.args
                    ), f"not acceptable Pandas' exception: [{repr(pd_e)}]"
                elif expected_exception is False:
                    # The only way to disable exception message checking.
                    pass
                else:
                    # It’s not enough that Modin and pandas have the same types of exceptions;
                    # we need to explicitly specify the instance of an exception
                    # (using `expected_exception`) in tests so that we can check exception messages.
                    # This allows us to eliminate situations where exceptions are thrown
                    # that we don't expect, which could hide different bugs.
                    raise pd_e
            else:
                raise NoModinException(
                    f"Modin doesn't throw an exception, while pandas does: [{repr(pd_e)}]"
                )
        else:
            md_result = fn(modin_df, **md_kwargs)
            return (md_result, pd_result) if not inplace else (modin_df, pandas_df)

    for key, value in kwargs.items():
        if check_kwargs_callable and callable(value):
            values = execute_callable(value)
            # that means, that callable raised an exception
            if values is None:
                return
            else:
                md_value, pd_value = values
        else:
            md_value, pd_value = value, value

        md_kwargs[key] = md_value
        pd_kwargs[key] = pd_value

        if md_extra_kwargs:
            assert isinstance(md_extra_kwargs, dict)
            md_kwargs.update(md_extra_kwargs)

    values = execute_callable(
        operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
    )
    if values is not None:
        assert isinstance(values, tuple) and len(values) == 2
        modin_result, pandas_result = values
        if (
            isinstance(modin_result, (pd.DataFrame, pd.Series))
            and original_engine is not None
            and original_storage_format is not None
        ):
            if check_for_execution_propagation:
                assert modin_result._query_compiler.engine == original_engine, (
                    f"Result engine {modin_result._query_compiler.engine} does "
                    + f"not match expected engine {original_engine}"
                )
                assert (
                    modin_result._query_compiler.storage_format
                    == original_storage_format
                ), (
                    "Result storage format "
                    + f"{modin_result._query_compiler.storage_format} does "
                    + f"not match expected storage format {original_storage_format}"
                )
            else:
                assert (
                    isinstance(no_check_for_execution_propagation_reason, str)
                    and len(no_check_for_execution_propagation_reason) > 0
                ), (
                    "Must provide a reason for not expecting the operation to "
                    + "propagate dataframe/series engine."
                )
        comparator(modin_result, pandas_result, **(comparator_kwargs or {}))


def eval_io(
    fn_name,
    comparator=df_equals,
    cast_to_str=False,
    expected_exception=None,
    check_kwargs_callable=True,
    modin_warning=None,
    modin_warning_str_match=None,
    md_extra_kwargs=None,
    *args,
    **kwargs,
):
    """Evaluate I/O operation outputs equality check.

    Parameters
    ----------
    fn_name: str
        I/O operation name ("read_csv" for example).
    comparator: obj
        Function to perform comparison.
    cast_to_str: bool
        There could be some mismatches in dtypes, so we're
        casting the whole frame to `str` before comparison.
        See issue #1931 for details.
    expected_exception: Exception
        Exception that should be raised even if it is raised
        both by Pandas and Modin.
    modin_warning: obj
        Warning that should be raised by Modin.
    modin_warning_str_match: str
        If `modin_warning` is set, checks that the raised warning matches this string.
    md_extra_kwargs: dict
        Modin operation specific kwargs.
    """

    def applyier(module, *args, **kwargs):
        result = getattr(module, fn_name)(*args, **kwargs)
        if cast_to_str:
            result = result.astype(str)
        if isinstance(result, (pd.DataFrame, pd.Series)):
            # Input methods that return a dataframe, e.g. read_csv, should
            # return a dataframe with engine and storage_format that match
            # the default Engine and StorageFormat, respectively.
            assert result._query_compiler.engine == Engine.get()
            assert result._query_compiler.storage_format == StorageFormat.get()
        return result

    def call_eval_general():
        eval_general(
            pd,
            pandas,
            applyier,
            comparator=comparator,
            expected_exception=expected_exception,
            check_kwargs_callable=check_kwargs_callable,
            md_extra_kwargs=md_extra_kwargs,
            *args,
            **kwargs,
        )

    warn_match = modin_warning_str_match if modin_warning is not None else None
    if modin_warning:
        with pytest.warns(modin_warning, match=warn_match):
            call_eval_general()
    else:
        call_eval_general()


def eval_io_from_str(csv_str: str, unique_filename: str, **kwargs):
    """Evaluate I/O operation outputs equality check by using `csv_str`
    data passed as python str (csv test file will be created from `csv_str`).

    Parameters
    ----------
    csv_str: str
        Test data for storing to csv file.
    unique_filename: str
        csv file name.
    """
    with open(unique_filename, "w") as f:
        f.write(csv_str)

    eval_io(
        filepath_or_buffer=unique_filename,
        fn_name="read_csv",
        **kwargs,
    )


def create_test_dfs(
    *args, post_fn=None, backend=None, **kwargs
) -> tuple[pd.DataFrame, pandas.DataFrame]:
    if post_fn is None:
        post_fn = lambda df: (  # noqa: E731
            df.convert_dtypes(dtype_backend=backend) if backend is not None else df
        )
    elif backend is not None:
        post_fn = lambda df: post_fn(df).convert_dtypes(  # noqa: E731
            dtype_backend=backend
        )
    return tuple(
        map(post_fn, [pd.DataFrame(*args, **kwargs), pandas.DataFrame(*args, **kwargs)])
    )


def create_test_series(
    vals, sort=False, backend=None, **kwargs
) -> tuple[pd.Series, pandas.Series]:
    if isinstance(vals, dict):
        modin_series = pd.Series(vals[next(iter(vals.keys()))], **kwargs)
        pandas_series = pandas.Series(vals[next(iter(vals.keys()))], **kwargs)
    else:
        modin_series = pd.Series(vals, **kwargs)
        pandas_series = pandas.Series(vals, **kwargs)
    if sort:
        modin_series = modin_series.sort_values().reset_index(drop=True)
        pandas_series = pandas_series.sort_values().reset_index(drop=True)

    if backend is not None:
        modin_series = modin_series.convert_dtypes(dtype_backend=backend)
        pandas_series = pandas_series.convert_dtypes(dtype_backend=backend)
    return modin_series, pandas_series


def generate_dfs():
    df = pandas.DataFrame(
        {
            "col1": [0, 1, 2, 3],
            "col2": [4, 5, 6, 7],
            "col3": [8, 9, 10, 11],
            "col4": [12, 13, 14, 15],
            "col5": [0, 0, 0, 0],
        }
    )

    df2 = pandas.DataFrame(
        {
            "col1": [0, 1, 2, 3],
            "col2": [4, 5, 6, 7],
            "col3": [8, 9, 10, 11],
            "col6": [12, 13, 14, 15],
            "col7": [0, 0, 0, 0],
        }
    )
    return df, df2


def generate_multiindex_dfs(axis=1):
    def generate_multiindex(index):
        return pandas.MultiIndex.from_tuples(
            [("a", x) for x in index.values], names=["name1", "name2"]
        )

    df1, df2 = generate_dfs()
    df1.axes[axis], df2.axes[axis] = map(
        generate_multiindex, [df1.axes[axis], df2.axes[axis]]
    )
    return df1, df2


def generate_multiindex(elements_number, nlevels=2, is_tree_like=False):
    def generate_level(length, nlevel):
        src = ["bar", "baz", "foo", "qux"]
        return [src[i % len(src)] + f"-{nlevel}-{i}" for i in range(length)]

    if is_tree_like:
        for penalty_level in [0, 1]:
            lvl_len_f, lvl_len_d = math.modf(
                round(elements_number ** (1 / (nlevels - penalty_level)), 12)
            )
            if lvl_len_d >= 2 and lvl_len_f == 0:
                break

        if lvl_len_d < 2 or lvl_len_f != 0:
            raise RuntimeError(
                f"Can't generate Tree-like MultiIndex with lenght: {elements_number} and number of levels: {nlevels}"
            )

        lvl_len = int(lvl_len_d)
        result = pd.MultiIndex.from_product(
            [generate_level(lvl_len, i) for i in range(nlevels - penalty_level)],
            names=[f"level-{i}" for i in range(nlevels - penalty_level)],
        )
        if penalty_level:
            result = pd.MultiIndex.from_tuples(
                [("base_level", *ml_tuple) for ml_tuple in result],
                names=[f"level-{i}" for i in range(nlevels)],
            )
        return result.sort_values()
    else:
        base_level = ["first"] * (elements_number // 2 + elements_number % 2) + [
            "second"
        ] * (elements_number // 2)
        primary_levels = [generate_level(elements_number, i) for i in range(1, nlevels)]
        arrays = [base_level] + primary_levels
        return pd.MultiIndex.from_tuples(
            list(zip(*arrays)), names=[f"level-{i}" for i in range(nlevels)]
        ).sort_values()


def generate_none_dfs():
    df = pandas.DataFrame(
        {
            "col1": [0, 1, 2, 3],
            "col2": [4, 5, None, 7],
            "col3": [8, 9, 10, 11],
            "col4": [12, 13, 14, 15],
            "col5": [None, None, None, None],
        }
    )

    df2 = pandas.DataFrame(
        {
            "col1": [0, 1, 2, 3],
            "col2": [4, 5, 6, 7],
            "col3": [8, 9, 10, 11],
            "col6": [12, 13, 14, 15],
            "col7": [0, 0, 0, 0],
        }
    )
    return df, df2


def get_unique_filename(
    test_name: str = "test",
    kwargs: dict = {},
    extension: str = "csv",
    data_dir: Union[str, Path] = "",
    suffix: str = "",
    debug_mode=False,
):
    """Returns unique file name with specified parameters.

    Parameters
    ----------
    test_name: str
        name of the test for which the unique file name is needed.
    kwargs: list of ints
        Unique combiantion of test parameters for creation of unique name.
    extension: str, default: "csv"
        Extension of unique file.
    data_dir: Union[str, Path]
        Data directory where test files will be created.
    suffix: str
        String to append to the resulted name.
    debug_mode: bool, default: False
        Get unique filename containing kwargs values.
        Otherwise kwargs values will be replaced with hash equivalent.

    Returns
    -------
        Unique file name.
    """
    suffix_part = f"_{suffix}" if suffix else ""
    extension_part = f".{extension}" if extension else ""
    if debug_mode:
        # shortcut if kwargs parameter are not provided
        if len(kwargs) == 0 and extension == "csv" and suffix == "":
            return os.path.join(data_dir, (test_name + suffix_part + f".{extension}"))

        assert "." not in extension, "please provide pure extension name without '.'"
        prohibited_chars = ['"', "\n"]
        non_prohibited_char = "np_char"
        char_counter = 0
        kwargs_name = dict(kwargs)
        for key, value in kwargs_name.items():
            for char in prohibited_chars:
                if isinstance(value, str) and char in value or callable(value):
                    kwargs_name[key] = non_prohibited_char + str(char_counter)
                    char_counter += 1
        parameters_values = "_".join(
            [
                (
                    str(value)
                    if not isinstance(value, (list, tuple))
                    else "_".join([str(x) for x in value])
                )
                for value in kwargs_name.values()
            ]
        )
        return os.path.join(
            data_dir, test_name + parameters_values + suffix_part + extension_part
        )
    else:
        import uuid

        return os.path.join(data_dir, uuid.uuid1().hex + suffix_part + extension_part)


def get_random_string():
    random_string = "".join(
        random_state.choice([x for x in ascii_letters], size=10).tolist()
    )
    return random_string


def insert_lines_to_csv(
    csv_name: str,
    lines_positions: list,
    lines_type: str = "blank",
    encoding: str = None,
    **csv_reader_writer_params,
):
    """Insert lines to ".csv" file.

    Parameters
    ----------
    csv_name: str
        ".csv" file that should be modified.
    lines_positions: list of ints
        Lines postions that sghould be modified (serial number
        of line - begins from 0, ends in <rows_number> - 1).
    lines_type: str
        Lines types that should be inserted to ".csv" file. Possible types:
        "blank" - empty line without any delimiters/separators,
        "bad" - lines with len(lines_data) > cols_number
    encoding: str
        Encoding type that should be used during file reading and writing.
    """
    if lines_type == "blank":
        lines_data = []
    elif lines_type == "bad":
        cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
        lines_data = [x for x in range(cols_number + 1)]
    else:
        raise ValueError(
            f"acceptable values for  parameter are ['blank', 'bad'], actually passed {lines_type}"
        )
    lines = []
    with open(csv_name, "r", encoding=encoding, newline="") as read_file:
        try:
            dialect = csv.Sniffer().sniff(read_file.read())
            read_file.seek(0)
        except Exception:
            dialect = None

        reader = csv.reader(
            read_file,
            dialect=dialect if dialect is not None else "excel",
            **csv_reader_writer_params,
        )
        counter = 0
        for row in reader:
            if counter in lines_positions:
                lines.append(lines_data)
            else:
                lines.append(row)
            counter += 1
    with open(csv_name, "w", encoding=encoding, newline="") as write_file:
        writer = csv.writer(
            write_file,
            dialect=dialect if dialect is not None else "excel",
            **csv_reader_writer_params,
        )
        writer.writerows(lines)


def _get_open_files():
    """
    psutil open_files() can return a lot of extra information that we can allow to
    be different, like file position; for simplicity we care about path and fd only.
    """
    return sorted((info.path, info.fd) for info in psutil.Process().open_files())


def check_file_leaks(func):
    """
    A decorator that ensures that no *newly* opened file handles are left
    after decorated function is finished.
    """
    if not TrackFileLeaks.get():
        return func

    @functools.wraps(func)
    def check(*a, **kw):
        fstart = _get_open_files()
        try:
            return func(*a, **kw)
        finally:
            leaks = []
            for item in _get_open_files():
                try:
                    fstart.remove(item)
                except ValueError:
                    # Ignore files in /proc/, as they have nothing to do with
                    # modin reading any data (and this is what we care about).
                    if item[0].startswith("/proc/"):
                        continue
                    # Ignore files in /tmp/ray/session_*/logs (ray session logs)
                    # because Ray intends to keep these logs open even after
                    # work has been done.
                    if re.search(r"/tmp/ray/session_.*/logs", item[0]):
                        continue
                    leaks.append(item)

            assert (
                not leaks
            ), f"Unexpected open handles left for: {', '.join(item[0] for item in leaks)}"

    return check


def dummy_decorator():
    """A problematic decorator that does not use `functools.wraps`. This introduces unwanted local variables for
    inspect.currentframe. This decorator is used in test_io to test `read_csv` and `read_table`
    """

    def wrapper(method):
        def wrapped_function(self, *args, **kwargs):
            result = method(self, *args, **kwargs)
            return result

        return wrapped_function

    return wrapper


def generate_dataframe(row_size=NROWS, additional_col_values=None, idx_name=None):
    dates = pandas.date_range("2000", freq="h", periods=row_size)
    data = {
        "col1": np.arange(row_size) * 10,
        "col2": [str(x.date()) for x in dates],
        "col3": np.arange(row_size) * 10,
        "col4": [str(x.time()) for x in dates],
        "col5": [get_random_string() for _ in range(row_size)],
        "col6": random_state.uniform(low=0.0, high=10000.0, size=row_size),
    }
    index = None if idx_name is None else pd.RangeIndex(0, row_size, name=idx_name)

    if additional_col_values is not None:
        assert isinstance(additional_col_values, (list, tuple))
        data.update({"col7": random_state.choice(additional_col_values, size=row_size)})
    return pandas.DataFrame(data, index=index)


def _make_csv_file(data_dir):
    def _csv_file_maker(
        filename=None,
        row_size=NROWS,
        force=True,
        delimiter=",",
        encoding=None,
        compression="infer",
        additional_col_values=None,
        remove_randomness=False,
        add_blank_lines=False,
        add_bad_lines=False,
        add_nan_lines=False,
        thousands_separator=None,
        decimal_separator=None,
        comment_col_char=None,
        quoting=csv.QUOTE_MINIMAL,
        quotechar='"',
        doublequote=True,
        escapechar=None,
        lineterminator=None,
    ):
        if filename is None:
            filename = get_unique_filename(data_dir=data_dir)
        if os.path.exists(filename) and not force:
            return None
        else:
            df = generate_dataframe(row_size, additional_col_values)
            if remove_randomness:
                df = df[["col1", "col2", "col3", "col4"]]
            if add_nan_lines:
                for i in range(0, row_size, row_size // (row_size // 10)):
                    df.loc[i] = pandas.Series()
            if comment_col_char:
                char = comment_col_char if isinstance(comment_col_char, str) else "#"
                df.insert(
                    loc=0,
                    column="col_with_comments",
                    value=[char if (x + 2) == 0 else x for x in range(row_size)],
                )

            if thousands_separator is not None:
                for col_id in ["col1", "col3"]:
                    df[col_id] = df[col_id].apply(
                        lambda x: f"{x:,d}".replace(",", thousands_separator)
                    )
                df["col6"] = df["col6"].apply(
                    lambda x: f"{x:,f}".replace(",", thousands_separator)
                )
            filename = (
                f"{filename}.{COMP_TO_EXT[compression]}"
                if compression != "infer"
                else filename
            )
            df.to_csv(
                filename,
                sep=delimiter,
                encoding=encoding,
                compression=compression,
                index=False,
                decimal=decimal_separator if decimal_separator else ".",
                lineterminator=lineterminator,
                quoting=quoting,
                quotechar=quotechar,
                doublequote=doublequote,
                escapechar=escapechar,
            )
            csv_reader_writer_params = {
                "delimiter": delimiter,
                "doublequote": doublequote,
                "escapechar": escapechar,
                "lineterminator": lineterminator if lineterminator else os.linesep,
                "quotechar": quotechar,
                "quoting": quoting,
            }
            if add_blank_lines:
                insert_lines_to_csv(
                    csv_name=filename,
                    lines_positions=[
                        x for x in range(5, row_size, row_size // (row_size // 10))
                    ],
                    lines_type="blank",
                    encoding=encoding,
                    **csv_reader_writer_params,
                )
            if add_bad_lines:
                insert_lines_to_csv(
                    csv_name=filename,
                    lines_positions=[
                        x for x in range(6, row_size, row_size // (row_size // 10))
                    ],
                    lines_type="bad",
                    encoding=encoding,
                    **csv_reader_writer_params,
                )
            return filename

    return _csv_file_maker


def sort_index_for_equal_values(df, ascending=True):
    """Sort `df` indices of equal rows."""
    if df.index.dtype == np.float64:
        # HACK: workaround for pandas bug:
        # https://github.com/pandas-dev/pandas/issues/34455
        df.index = df.index.astype("str")
    res = df.groupby(by=df if df.ndim == 1 else df.columns, sort=False).apply(
        lambda df: df.sort_index(ascending=ascending)
    )
    if res.index.nlevels > df.index.nlevels:
        # Sometimes GroupBy adds an extra level with 'by' to the result index.
        # GroupBy is very inconsistent about when it's doing this, so that's
        # why this clumsy if-statement is used.
        res.index = res.index.droplevel(0)
    # GroupBy overwrites original index names with 'by', so the following line restores original names
    res.index.names = df.index.names
    return res


def df_equals_with_non_stable_indices(df1, df2):
    """Assert equality of two frames regardless of the index order for equal values."""
    df1, df2 = map(try_cast_to_pandas, (df1, df2))
    np.testing.assert_array_equal(df1.values, df2.values)
    sorted1, sorted2 = map(sort_index_for_equal_values, (df1, df2))
    df_equals(sorted1, sorted2)


def rotate_decimal_digits_or_symbols(value):
    if value.dtype == object:
        # When dtype is object, we assume that it is actually strings from MultiIndex level names
        return [x[-1] + x[:-1] for x in value]
    else:
        tens = value // 10
        ones = value % 10
        return tens + ones * 10


def make_default_file(file_type: str, data_dir: str):
    """Helper function for pytest fixtures."""

    def _create_file(filename, force, nrows, ncols, func: str, func_kw=None):
        """
        Helper function that creates a dataframe before writing it to a file.

        Eliminates the duplicate code that is needed before of output functions calls.

        Notes
        -----
        Importantly, names of created files are added to `filenames` variable for
        their further automatic deletion. Without this step, files created by
        `pytest` fixtures will not be deleted.
        """
        if force or not os.path.exists(filename):
            df = pandas.DataFrame(
                {f"col{x + 1}": np.arange(nrows) for x in range(ncols)}
            )
            getattr(df, func)(filename, **func_kw if func_kw else {})

    file_type_to_extension = {
        "excel": "xlsx",
        "fwf": "txt",
        "pickle": "pkl",
    }
    extension = file_type_to_extension.get(file_type, file_type)

    def _make_default_file(nrows=NROWS, ncols=2, force=True, **kwargs):
        filename = get_unique_filename(extension=extension, data_dir=data_dir)

        if file_type == "json":
            lines = kwargs.get("lines")
            func_kw = {"lines": lines, "orient": "records"} if lines else {}
            _create_file(filename, force, nrows, ncols, "to_json", func_kw)
        elif file_type in ("html", "excel", "feather", "stata", "pickle"):
            _create_file(filename, force, nrows, ncols, f"to_{file_type}")
        elif file_type == "hdf":
            func_kw = {"key": "df", "format": kwargs.get("format")}
            _create_file(filename, force, nrows, ncols, "to_hdf", func_kw)
        elif file_type == "fwf":
            if force or not os.path.exists(filename):
                fwf_data = kwargs.get("fwf_data")
                if fwf_data is None:
                    with open("modin/tests/pandas/data/test_data.fwf", "r") as fwf_file:
                        fwf_data = fwf_file.read()
                with open(filename, "w") as f:
                    f.write(fwf_data)
        else:
            raise ValueError(f"Unsupported file type: {file_type}")
        return filename

    return _make_default_file


def value_equals(obj1, obj2):
    """Check wherher two scalar or list-like values are equal and raise an ``AssertionError`` if they aren't."""
    if is_list_like(obj1):
        np.testing.assert_array_equal(obj1, obj2)
    else:
        assert (obj1 == obj2) or (np.isnan(obj1) and np.isnan(obj2))


def dict_equals(dict1, dict2):
    """Check whether two dictionaries are equal and raise an ``AssertionError`` if they aren't."""
    for key1, key2 in itertools.zip_longest(sorted(dict1), sorted(dict2)):
        value_equals(key1, key2)
        value_equals(dict1[key1], dict2[key2])


@contextmanager
def switch_execution(engine: str, storage_format: str):
    old_engine = Engine.get()
    old_storage = StorageFormat.get()
    try:
        set_execution(engine, storage_format)
        yield
    finally:
        set_execution(old_engine, old_storage)


def is_native_shallow_copy() -> bool:
    """Return if the current configuration uses native pandas execution and performs shallow copies."""
    return (
        Backend.get() == "Pandas"
        and not NativePandasDeepCopy.get()
        and not pandas.get_option("mode.copy_on_write")
    )


================================================
FILE: modin/tests/polars/test_dataframe.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import polars
import polars.testing

import modin.polars as pl


def test_init_roundtrip():
    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
    df = pl.DataFrame(data)
    polars_df = polars.DataFrame(data)
    to_polars = polars.from_pandas(df._query_compiler.to_pandas())
    polars.testing.assert_frame_equal(polars_df, to_polars)


================================================
FILE: modin/tests/test_dataframe_api_standard.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pytest

import modin.pandas


def test_dataframe_api_standard() -> None:
    """
    Test some basic methods of the dataframe consortium standard.

    Full testing is done at https://github.com/data-apis/dataframe-api-compat,
    this is just to check that the entry point works as expected.
    """
    pytest.importorskip("dataframe_api_compat")
    df_pd = modin.pandas.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    df = df_pd.__dataframe_consortium_standard__()
    result_1 = df.get_column_names()
    expected_1 = ["a", "b"]
    assert result_1 == expected_1

    ser = modin.pandas.Series([1, 2, 3])
    col = ser.__column_consortium_standard__()
    result_2 = col.get_value(1)
    expected_2 = 2
    assert result_2 == expected_2


================================================
FILE: modin/tests/test_docstring_urls.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import importlib
import pkgutil
from concurrent.futures import ThreadPoolExecutor
from urllib.error import HTTPError
from urllib.request import urlopen

import pytest

import modin.pandas
from modin.utils import PANDAS_API_URL_TEMPLATE


@pytest.fixture
def doc_urls(get_generated_doc_urls):
    # ensure all docstring are generated - import _everything_ under 'modin.pandas'
    for modinfo in pkgutil.walk_packages(modin.pandas.__path__, "modin.pandas."):
        try:
            importlib.import_module(modinfo.name)
        except ModuleNotFoundError:
            # some optional 3rd-party dep missing, ignore
            pass
    return sorted(get_generated_doc_urls())


def test_all_urls_exist(doc_urls):
    broken = []
    # TODO: remove the hack after pandas fixes it
    methods_with_broken_urls = (
        "pandas.DataFrame.flags",
        "pandas.Series.info",
        "pandas.DataFrame.isetitem",
        "pandas.Series.swapaxes",
        "pandas.DataFrame.to_numpy",
        "pandas.Series.axes",
        "pandas.Series.divmod",
        "pandas.Series.rdivmod",
    )
    for broken_method in methods_with_broken_urls:
        doc_urls.remove(PANDAS_API_URL_TEMPLATE.format(broken_method))

    def _test_url(url):
        try:
            with urlopen(url):
                pass
        except HTTPError:
            broken.append(url)

    with ThreadPoolExecutor(32) as pool:
        pool.map(_test_url, doc_urls)

    assert not broken, "Invalid URLs detected"


================================================
FILE: modin/tests/test_envvar_catcher.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import os

import pytest


@pytest.fixture
def nameset():
    name = "hey_i_am_an_env_var"
    os.environ[name] = "i am a value"
    yield name
    del os.environ[name]


def test_envvar_catcher(nameset):
    with pytest.raises(AssertionError):
        os.environ.get("Modin_FOO", "bar")
    with pytest.raises(AssertionError):
        "modin_qux" not in os.environ
    assert "yay_random_name" not in os.environ
    assert os.environ[nameset]


================================================
FILE: modin/tests/test_envvar_npartitions.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pytest

import modin.pandas as pd
from modin.config import NPartitions


@pytest.mark.parametrize("num_partitions", [2, 4, 6, 8, 10])
def test_set_npartitions(num_partitions):
    NPartitions.put(num_partitions)
    data = np.random.randint(0, 100, size=(2**16, 2**8))
    df = pd.DataFrame(data)
    part_shape = df._query_compiler._modin_frame._partitions.shape
    assert part_shape[0] == num_partitions and part_shape[1] == min(num_partitions, 8)


@pytest.mark.parametrize("left_num_partitions", [2, 4, 6, 8, 10])
@pytest.mark.parametrize("right_num_partitions", [2, 4, 6, 8, 10])
def test_runtime_change_npartitions(left_num_partitions, right_num_partitions):
    NPartitions.put(left_num_partitions)
    data = np.random.randint(0, 100, size=(2**16, 2**8))
    left_df = pd.DataFrame(data)
    part_shape = left_df._query_compiler._modin_frame._partitions.shape
    assert part_shape[0] == left_num_partitions and part_shape[1] == min(
        left_num_partitions, 8
    )

    NPartitions.put(right_num_partitions)
    right_df = pd.DataFrame(data)
    part_shape = right_df._query_compiler._modin_frame._partitions.shape
    assert part_shape[0] == right_num_partitions and part_shape[1] == min(
        right_num_partitions, 8
    )


================================================
FILE: modin/tests/test_executions_api.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pytest

from modin.core.storage_formats import BaseQueryCompiler, PandasQueryCompiler

BASE_EXECUTION = BaseQueryCompiler
EXECUTIONS = [PandasQueryCompiler]


def test_base_abstract_methods():
    allowed_abstract_methods = [
        "__init__",
        "free",
        "finalize",
        "execute",
        "to_pandas",
        "from_pandas",
        "from_arrow",
        "default_to_pandas",
        "from_interchange_dataframe",
        "to_interchange_dataframe",
        "engine",
        "storage_format",
    ]

    not_implemented_methods = BASE_EXECUTION.__abstractmethods__.difference(
        allowed_abstract_methods
    )

    # sorting for beauty output in error
    not_implemented_methods = list(not_implemented_methods)
    not_implemented_methods.sort()

    assert (
        len(not_implemented_methods) == 0
    ), f"{BASE_EXECUTION} has not implemented abstract methods: {not_implemented_methods}"


@pytest.mark.parametrize("execution", EXECUTIONS)
def test_api_consistent(execution):
    base_methods = set(BASE_EXECUTION.__dict__)
    custom_methods = set(
        [key for key in execution.__dict__.keys() if not key.startswith("_")]
    )

    extra_methods = custom_methods.difference(base_methods)
    # checking that custom execution do not implements extra api methods
    assert (
        len(extra_methods) == 0
    ), f"{execution} implement these extra methods: {extra_methods}"


================================================
FILE: modin/tests/test_headers.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import os
from os.path import abspath, dirname

# This is the python file root directory (modin/modin)
rootdir = dirname(dirname(abspath(__file__)))
exclude_files = ["_version.py"]


def test_headers():
    with open("{}{}".format(dirname(rootdir), "/LICENSE_HEADER"), "r") as f:
        # Lines to check each line individually
        header_lines = f.readlines()

    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            filepath = os.path.join(subdir, file)
            if file.endswith(".py") and file not in exclude_files:
                with open(filepath, "r", encoding="utf8") as f:
                    # Lines for line by line comparison
                    py_file_lines = f.readlines()
                    for left, right in zip(
                        header_lines, py_file_lines[: len(header_lines)]
                    ):
                        assert left == right


def test_line_endings():
    # This is the project root
    rootdir = dirname(dirname(abspath(__file__)))
    for subdir, dirs, files in os.walk(rootdir):
        if any(i in subdir for i in [".git", ".idea", "__pycache__"]):
            continue
        for file in files:
            if file.endswith(".parquet"):
                continue
            filepath = os.path.join(subdir, file)
            with open(filepath, "rb+") as f:
                file_contents = f.read()
                new_contents = file_contents.replace(b"\r\n", b"\n")
                assert new_contents == file_contents, "File has CRLF: {}".format(
                    filepath
                )


================================================
FILE: modin/tests/test_logging.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import collections
import logging

import pytest

import modin.logging
from modin.config import LogMode


class _FakeLogger:
    _loggers = {}

    def __init__(self, namespace):
        self.messages = collections.defaultdict(list)
        self.namespace = namespace

    def log(self, log_level, message, *args, **kw):
        self.messages[log_level].append(message.format(*args, **kw))

    def exception(self, message, *args, **kw):
        self.messages["exception"].append(message.format(*args, **kw))

    @classmethod
    def make(cls, namespace):
        return cls._loggers.setdefault(namespace, cls(namespace))

    @classmethod
    def get(cls, namespace="modin.logger.default"):
        return cls._loggers[namespace].messages

    @classmethod
    def clear(cls):
        cls._loggers = {}


def _get_logger(namespace="modin.logger.default"):
    return _FakeLogger.make(namespace)


def mock_get_logger(ctx):
    ctx.setattr(logging, "getLogger", _get_logger)


@pytest.fixture
def get_log_messages():
    old = LogMode.get()
    LogMode.enable()
    modin.logging.get_logger()  # initialize the logging pior to mocking getLogger()

    yield _FakeLogger.get

    _FakeLogger.clear()
    LogMode.put(old)


def test_function_decorator(monkeypatch, get_log_messages):
    @modin.logging.enable_logging
    def func(do_raise):
        if do_raise:
            raise ValueError()

    with monkeypatch.context() as ctx:
        # NOTE: we cannot patch in the fixture as mockin logger.getLogger()
        # without monkeypatch.context() breaks pytest
        mock_get_logger(ctx)

        func(do_raise=False)
        with pytest.raises(ValueError):
            func(do_raise=True)

    assert "func" in get_log_messages()[logging.INFO][0]
    assert "START" in get_log_messages()[logging.INFO][0]
    assert get_log_messages("modin.logger.errors")["exception"] == [
        "STOP::PANDAS-API::func"
    ]


def test_function_decorator_on_outer_function_6237(monkeypatch, get_log_messages):
    @modin.logging.enable_logging
    def inner_func():
        raise ValueError()

    @modin.logging.enable_logging
    def outer_func():
        inner_func()

    with monkeypatch.context() as ctx:
        # NOTE: we cannot patch in the fixture as mockin logger.getLogger()
        # without monkeypatch.context() breaks pytest
        mock_get_logger(ctx)

        with pytest.raises(ValueError):
            outer_func()

    assert get_log_messages("modin.logger.errors")["exception"] == [
        "STOP::PANDAS-API::inner_func"
    ]


def test_class_decorator(monkeypatch, get_log_messages):
    @modin.logging.enable_logging("CUSTOM")
    class Foo:
        def method1(self):
            pass

        @classmethod
        def method2(cls):
            pass

        @staticmethod
        def method3():
            pass

    class Bar(Foo):
        def method4(self):
            pass

    with monkeypatch.context() as ctx:
        mock_get_logger(ctx)
        Foo().method1()
        Foo.method2()
        Foo.method3()

        Bar().method1()
        Bar().method4()

    assert get_log_messages()[logging.INFO] == [
        "START::CUSTOM::Foo.method1",
        "STOP::CUSTOM::Foo.method1",
        "START::CUSTOM::Foo.method2",
        "STOP::CUSTOM::Foo.method2",
        "START::CUSTOM::Foo.method3",
        "STOP::CUSTOM::Foo.method3",
        "START::CUSTOM::Foo.method1",
        "STOP::CUSTOM::Foo.method1",
    ]


def test_class_inheritance(monkeypatch, get_log_messages):
    class Foo(modin.logging.ClassLogger, modin_layer="CUSTOM"):
        def method1(self):
            pass

    class Bar(Foo):
        def method2(self):
            pass

    with monkeypatch.context() as ctx:
        mock_get_logger(ctx)
        Foo().method1()
        Bar().method1()
        Bar().method2()

    assert get_log_messages()[logging.INFO] == [
        "START::CUSTOM::Foo.method1",
        "STOP::CUSTOM::Foo.method1",
        "START::CUSTOM::Foo.method1",
        "STOP::CUSTOM::Foo.method1",
        "START::CUSTOM::Bar.method2",
        "STOP::CUSTOM::Bar.method2",
    ]


================================================
FILE: modin/tests/test_metrics.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from typing import Union

import pytest

import modin.logging
import modin.pandas as pd
from modin.config import MetricsMode
from modin.logging.metrics import (
    _metric_handlers,
    add_metric_handler,
    clear_metric_handler,
    emit_metric,
)


class FakeTelemetryClient:

    def __init__(self):
        self._metrics = {}
        self._metric_handler = None

    def metric_handler_fail(self, name: str, value: Union[int, float]):
        raise KeyError("Poorly implemented metric handler")

    def metric_handler_pass(self, name: str, value: Union[int, float]):
        self._metrics[name] = value


@modin.logging.enable_logging
def func(do_raise):
    if do_raise:
        raise ValueError()


@pytest.fixture()
def metric_client():
    MetricsMode.enable()
    client = FakeTelemetryClient()
    yield client
    clear_metric_handler(client._metric_handler)
    MetricsMode.disable()


def test_metrics_api_timings(metric_client):
    assert len(_metric_handlers) == 0
    metric_client._metric_handler = metric_client.metric_handler_pass
    add_metric_handler(metric_client._metric_handler)
    assert len(_metric_handlers) == 1
    assert _metric_handlers[0] == metric_client._metric_handler
    func(do_raise=False)
    assert len(metric_client._metrics) == 1
    assert metric_client._metrics["modin.pandas-api.func"] is not None
    assert metric_client._metrics["modin.pandas-api.func"] > 0.0


def test_df_metrics(metric_client):
    metric_client._metric_handler = metric_client.metric_handler_pass
    add_metric_handler(metric_client._metric_handler)
    df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
    df.sum()
    assert len(metric_client._metrics) == 54
    assert metric_client._metrics["modin.pandas-api.dataframe.sum"] is not None
    assert metric_client._metrics["modin.pandas-api.dataframe.sum"] > 0.0


def test_metrics_handler_fails(metric_client):
    assert len(metric_client._metrics) == 0
    metric_client._metric_handler = metric_client.metric_handler_fail
    add_metric_handler(metric_client._metric_handler)
    assert len(_metric_handlers) == 1
    func(do_raise=False)
    assert len(_metric_handlers) == 0
    assert len(metric_client._metrics) == 0


def test_emit_name_enforced():
    MetricsMode.enable()
    with pytest.raises(KeyError):
        emit_metric("Not::A::Valid::Metric::Name", 1.0)


def test_metrics_can_be_opt_out(metric_client):
    MetricsMode.enable()
    assert len(metric_client._metrics) == 0
    metric_client._metric_handler = metric_client.metric_handler_pass
    add_metric_handler(metric_client._metric_handler)
    # If Metrics are disabled after the addition of a handler
    # no metrics are emitted
    MetricsMode.disable()
    assert len(_metric_handlers) == 1
    func(do_raise=False)
    assert len(metric_client._metrics) == 0


================================================
FILE: modin/tests/test_partition_api.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import Engine, NPartitions
from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
from modin.distributed.dataframe.pandas import from_partitions, unwrap_partitions
from modin.pandas.indexing import compute_sliced_len
from modin.tests.pandas.utils import df_equals, test_data

PartitionClass = (
    FactoryDispatcher.get_factory().io_cls.frame_cls._partition_mgr_cls._partition_class
)

if Engine.get() == "Ray":
    from modin.core.execution.ray.common import RayWrapper
    from modin.core.execution.ray.common.utils import ObjectIDType

    put_func = RayWrapper.put
    get_func = RayWrapper.materialize
    is_future = lambda obj: isinstance(obj, ObjectIDType)  # noqa: E731
elif Engine.get() == "Dask":
    from distributed import Future

    from modin.core.execution.dask.common import DaskWrapper

    # Looks like there is a key collision;
    # https://github.com/dask/distributed/issues/3703#issuecomment-619446739
    # recommends to use `hash=False`. Perhaps this should be the default value of `put`.
    put_func = lambda obj: DaskWrapper.put(obj, hash=False)  # noqa: E731
    get_func = DaskWrapper.materialize
    is_future = lambda obj: isinstance(obj, Future)  # noqa: E731
elif Engine.get() == "Unidist":
    from unidist import is_object_ref

    from modin.core.execution.unidist.common import UnidistWrapper

    put_func = UnidistWrapper.put
    get_func = UnidistWrapper.materialize
    is_future = is_object_ref
elif Engine.get() == "Python":
    put_func = lambda x: x  # noqa: E731
    get_func = lambda x: x  # noqa: E731
    is_future = lambda obj: isinstance(obj, object)  # noqa: E731
else:
    raise NotImplementedError(
        f"'{Engine.get()}' engine is not supported by these test suites"
    )

NPartitions.put(4)
# HACK: implicit engine initialization (Modin issue #2989)
pd.DataFrame([])


@pytest.mark.parametrize("axis", [None, 0, 1])
@pytest.mark.parametrize("reverse_index", [True, False])
@pytest.mark.parametrize("reverse_columns", [True, False])
def test_unwrap_partitions(axis, reverse_index, reverse_columns):
    data = test_data["int_data"]

    def get_df(lib, data):
        df = lib.DataFrame(data)
        if reverse_index:
            df.index = df.index[::-1]
        if reverse_columns:
            df.columns = df.columns[::-1]
        return df

    df = get_df(pd, data)
    # `df` should not have propagated the index and column updates to its
    # partitions yet. The partitions of `expected_df` should have the updated
    # metadata because we construct `expected_df` directly from the updated
    # pandas dataframe.
    expected_df = pd.DataFrame(get_df(pandas, data))
    expected_partitions = expected_df._query_compiler._modin_frame._partitions
    if axis is None:
        actual_partitions = np.array(unwrap_partitions(df, axis=axis))
        assert expected_partitions.shape == actual_partitions.shape
        for row_idx in range(expected_partitions.shape[0]):
            for col_idx in range(expected_partitions.shape[1]):
                df_equals(
                    get_func(expected_partitions[row_idx][col_idx].list_of_blocks[0]),
                    get_func(actual_partitions[row_idx][col_idx]),
                )
    else:
        expected_axis_partitions = (
            expected_df._query_compiler._modin_frame._partition_mgr_cls.axis_partition(
                expected_partitions, axis ^ 1
            )
        )
        expected_axis_partitions = [
            axis_partition.force_materialization().unwrap(squeeze=True)
            for axis_partition in expected_axis_partitions
        ]
        actual_axis_partitions = unwrap_partitions(df, axis=axis)
        assert len(expected_axis_partitions) == len(actual_axis_partitions)
        for item_idx in range(len(expected_axis_partitions)):
            if Engine.get() in ["Ray", "Dask", "Unidist"]:
                df_equals(
                    get_func(expected_axis_partitions[item_idx]),
                    get_func(actual_axis_partitions[item_idx]),
                )


def test_unwrap_virtual_partitions():
    # see #5164 for details
    data = test_data["int_data"]
    df = pd.DataFrame(data)
    virtual_partitioned_df = pd.concat([df] * 10)
    actual_partitions = np.array(unwrap_partitions(virtual_partitioned_df, axis=None))
    expected_df = pd.concat([pd.DataFrame(data)] * 10)
    expected_partitions = expected_df._query_compiler._modin_frame._partitions
    assert expected_partitions.shape == actual_partitions.shape

    for row_idx in range(expected_partitions.shape[0]):
        for col_idx in range(expected_partitions.shape[1]):
            df_equals(
                get_func(
                    expected_partitions[row_idx][col_idx]
                    .force_materialization()
                    .list_of_blocks[0]
                ),
                get_func(actual_partitions[row_idx][col_idx]),
            )


@pytest.mark.parametrize("column_widths", [None, "column_widths"])
@pytest.mark.parametrize("row_lengths", [None, "row_lengths"])
@pytest.mark.parametrize("columns", [None, "columns"])
@pytest.mark.parametrize("index", [None, "index"])
@pytest.mark.parametrize("axis", [None, 0, 1])
def test_from_partitions(axis, index, columns, row_lengths, column_widths):
    data = test_data["int_data"]
    df1, df2 = pandas.DataFrame(data), pandas.DataFrame(data)
    num_rows, num_cols = df1.shape
    expected_df = pandas.concat([df1, df2], axis=1 if axis is None else axis)

    index = expected_df.index if index == "index" else None
    columns = expected_df.columns if columns == "columns" else None
    row_lengths = (
        None
        if row_lengths is None
        else [num_rows, num_rows] if axis == 0 else [num_rows]
    )
    column_widths = (
        None
        if column_widths is None
        else [num_cols] if axis == 0 else [num_cols, num_cols]
    )
    futures = []
    if axis is None:
        futures = [[put_func(df1), put_func(df2)]]
    else:
        futures = [put_func(df1), put_func(df2)]
    actual_df = from_partitions(
        futures,
        axis,
        index=index,
        columns=columns,
        row_lengths=row_lengths,
        column_widths=column_widths,
    )
    df_equals(expected_df, actual_df)


@pytest.mark.parametrize("columns", ["original_col", "new_col"])
@pytest.mark.parametrize("index", ["original_idx", "new_idx"])
@pytest.mark.parametrize("axis", [None, 0, 1])
def test_from_partitions_mismatched_labels(axis, index, columns):
    expected_df = pd.DataFrame(test_data["int_data"])
    partitions = unwrap_partitions(expected_df, axis=axis)

    index = (
        expected_df.index
        if index == "original_idx"
        else [f"row{i}" for i in expected_df.index]
    )
    columns = (
        expected_df.columns
        if columns == "original_col"
        else [f"col{i}" for i in expected_df.columns]
    )

    expected_df.index = index
    expected_df.columns = columns
    actual_df = from_partitions(partitions, axis=axis, index=index, columns=columns)
    df_equals(expected_df, actual_df)


@pytest.mark.parametrize("row_labels", [[0, 2], slice(None)])
@pytest.mark.parametrize("col_labels", [[0, 2], slice(None)])
@pytest.mark.parametrize("is_length_future", [False, True])
@pytest.mark.parametrize("is_width_future", [False, True])
def test_mask_preserve_cache(row_labels, col_labels, is_length_future, is_width_future):
    def deserialize(obj):
        if is_future(obj):
            return get_func(obj)
        return obj

    def compute_length(indices, length):
        if not isinstance(indices, slice):
            return len(indices)
        return compute_sliced_len(indices, length)

    df = pandas.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [9, 10, 11, 12]})
    obj_id = put_func(df)

    partition_shape = [
        put_func(len(df)) if is_length_future else len(df),
        put_func(len(df.columns)) if is_width_future else len(df.columns),
    ]

    source_partition = PartitionClass(obj_id, *partition_shape)
    masked_partition = source_partition.mask(
        row_labels=row_labels, col_labels=col_labels
    )

    expected_length = compute_length(row_labels, len(df))
    expected_width = compute_length(col_labels, len(df.columns))

    # Check that the cache is preserved
    assert expected_length == deserialize(masked_partition._length_cache)
    assert expected_width == deserialize(masked_partition._width_cache)
    # Check that the cache is interpreted properly
    assert expected_length == masked_partition.length()
    assert expected_width == masked_partition.width()
    # Recompute shape explicitly to check that the cached data was correct
    expected_length, expected_width = [
        masked_partition._length_cache,
        masked_partition._width_cache,
    ]
    masked_partition._length_cache = None
    masked_partition._width_cache = None
    assert expected_length == masked_partition.length()
    assert expected_width == masked_partition.width()


================================================
FILE: modin/tests/test_utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib
import json
from textwrap import dedent, indent
from typing import Optional, Union
from unittest.mock import Mock, patch

import numpy as np
import pandas
import pytest

import modin.pandas as pd
import modin.utils
from modin.config import Engine, StorageFormat
from modin.error_message import ErrorMessage
from modin.tests.pandas.utils import create_test_dfs


# Note: classes below are used for purely testing purposes - they
# simulate real-world use cases for _inherit_docstring
class BaseParent:
    def method(self):
        """ordinary method (base)"""

    def base_method(self):
        """ordinary method in base only"""

    @property
    def prop(self):
        """property"""

    @staticmethod
    def static():
        """static method"""

    @classmethod
    def clsmtd(cls):
        """class method"""


class BaseChild(BaseParent):
    """this is class docstring"""

    def method(self):
        """ordinary method (child)"""

    def own_method(self):
        """own method"""

    def no_overwrite(self):
        """another own method"""

    F = property(method)


@pytest.fixture(scope="module")
def wrapped_cls():
    @modin.utils._inherit_docstrings(BaseChild)
    class Wrapped:
        def method(self):
            pass

        def base_method(self):
            pass

        def own_method(self):
            pass

        def no_overwrite(self):
            """not overwritten doc"""

        @property
        def prop(self):
            return None

        @staticmethod
        def static():
            pass

        @classmethod
        def clsmtd(cls):
            pass

        F = property(method)

    return Wrapped


def _check_doc(wrapped, orig):
    assert wrapped.__doc__ == orig.__doc__
    if isinstance(wrapped, property):
        assert wrapped.fget.__doc_inherited__
    else:
        assert wrapped.__doc_inherited__


def test_doc_inherit_clslevel(wrapped_cls):
    _check_doc(wrapped_cls, BaseChild)


def test_doc_inherit_methods(wrapped_cls):
    _check_doc(wrapped_cls.method, BaseChild.method)
    _check_doc(wrapped_cls.base_method, BaseParent.base_method)
    _check_doc(wrapped_cls.own_method, BaseChild.own_method)
    assert wrapped_cls.no_overwrite.__doc__ != BaseChild.no_overwrite.__doc__
    assert not getattr(wrapped_cls.no_overwrite, "__doc_inherited__", False)


def test_doc_inherit_special(wrapped_cls):
    _check_doc(wrapped_cls.static, BaseChild.static)
    _check_doc(wrapped_cls.clsmtd, BaseChild.clsmtd)


def test_doc_inherit_props(wrapped_cls):
    assert type(wrapped_cls.method) == type(BaseChild.method)  # noqa: E721
    _check_doc(wrapped_cls.prop, BaseChild.prop)
    _check_doc(wrapped_cls.F, BaseChild.F)


def test_doc_inherit_prop_builder():
    def builder(name):
        return property(lambda self: name)

    class Parent:
        prop = builder("Parent")

    @modin.utils._inherit_docstrings(Parent)
    class Child(Parent):
        prop = builder("Child")

    assert Parent().prop == "Parent"
    assert Child().prop == "Child"


@pytest.mark.parametrize(
    "source_doc,to_append,expected",
    [
        (
            "One-line doc.",
            "One-line message.",
            "One-line doc.One-line message.",
        ),
        (
            """
            Regular doc-string
                With the setted indent style.
            """,
            """
                    Doc-string having different indents
                        in comparison with the regular one.
            """,
            """
            Regular doc-string
                With the setted indent style.

            Doc-string having different indents
                in comparison with the regular one.
            """,
        ),
    ],
)
def test_append_to_docstring(source_doc, to_append, expected):
    def source_fn():
        pass

    source_fn.__doc__ = source_doc
    result_fn = modin.utils.append_to_docstring(to_append)(source_fn)

    answer = dedent(result_fn.__doc__)
    expected = dedent(expected)

    assert answer == expected


def test_align_indents():
    source = """
    Source string that sets
        the indent pattern."""
    target = indent(source, " " * 5)
    result = modin.utils.align_indents(source, target)
    assert source == result


def test_format_string():
    template = """
            Source template string that has some {inline_placeholder}s.
            Placeholder1:
            {new_line_placeholder1}
            Placeholder2:
            {new_line_placeholder2}
            Placeholder3:
            {new_line_placeholder3}
            Placeholder4:
            {new_line_placeholder4}Text text:
                Placeholder5:
                {new_line_placeholder5}
    """

    singleline_value = "Single-line value"
    multiline_value = """
        Some string
            Having different indentation
        From the source one."""
    multiline_value_new_line_at_the_end = multiline_value + "\n"
    multiline_value_new_line_at_the_begin = "\n" + multiline_value

    expected = """
            Source template string that has some Single-line values.
            Placeholder1:
            Some string
                Having different indentation
            From the source one.
            Placeholder2:
            Single-line value
            Placeholder3:
            
            Some string
                Having different indentation
            From the source one.
            Placeholder4:
            Some string
                Having different indentation
            From the source one.
            Text text:
                Placeholder5:
                Some string
                    Having different indentation
                From the source one.
    """  # noqa: W293
    answer = modin.utils.format_string(
        template,
        inline_placeholder=singleline_value,
        new_line_placeholder1=multiline_value,
        new_line_placeholder2=singleline_value,
        new_line_placeholder3=multiline_value_new_line_at_the_begin,
        new_line_placeholder4=multiline_value_new_line_at_the_end,
        new_line_placeholder5=multiline_value,
    )
    assert answer == expected


def warns_that_defaulting_to_pandas_if(
    condition: bool, prefix: Optional[str] = None, suffix: Optional[str] = None
):
    """
    Get a context manager that checks for a default to pandas warning if `condition`  is True.

    Parameters
    ----------
    condition : bool
        Whether to check for the default to pandas warning.
    prefix : Optional[str]
        If specified, checks that the start of the warning message matches this argument
        before "[Dd]efaulting to pandas".
    suffix : Optional[str]
        If specified, checks that the end of the warning message matches this argument
        after "[Dd]efaulting to pandas".

    Returns
    -------
    pytest.recwarn.WarningsChecker or contextlib.nullcontext
        If ``condition`` is True, ``WarningsChecker`` is returned, which will check for a
        ``UserWarning`` indicating that Modin is defaulting to Pandas.
        If it is False, a ``nullcontext`` is returned to avoid checking for the warning about
        defaulting to Pandas.
    """
    assert isinstance(condition, bool)
    return (
        warns_that_defaulting_to_pandas(prefix=prefix, suffix=suffix)
        if condition
        else contextlib.nullcontext()
    )


def warns_that_defaulting_to_pandas(prefix=None, suffix=None):
    """
    Assert that code warns that it's defaulting to pandas.

    Parameters
    ----------
    prefix : Optional[str]
        If specified, checks that the start of the warning message matches this argument
        before "[Dd]efaulting to pandas".
    suffix : Optional[str]
        If specified, checks that the end of the warning message matches this argument
        after "[Dd]efaulting to pandas".

    Returns
    -------
    pytest.recwarn.WarningsChecker
    """
    match = "[Dd]efaulting to pandas"
    if prefix:
        # Message may be separated by newlines
        match = match + "(.|\\n)+"
    if suffix:
        match += "(.|\\n)+" + suffix
    return pytest.warns(UserWarning, match=match)


@pytest.mark.parametrize("as_json", [True, False])
def test_show_versions(as_json, capsys):
    modin.utils.show_versions(as_json=as_json)
    versions = capsys.readouterr().out
    assert modin.__version__ in versions

    if as_json:
        versions = json.loads(versions)
        assert versions["modin dependencies"]["modin"] == modin.__version__


def test_warns_that_defaulting_to_pandas():
    with warns_that_defaulting_to_pandas():
        ErrorMessage.default_to_pandas()

    with warns_that_defaulting_to_pandas():
        ErrorMessage.default_to_pandas(message="Function name")


def test_warns_that_defaulting_to_pandas_if_false():
    with pytest.raises(UserWarning):
        with warns_that_defaulting_to_pandas_if(False):
            ErrorMessage.default_to_pandas()


def test_warns_that_defaulting_to_pandas_if_true():
    with warns_that_defaulting_to_pandas_if(True):
        ErrorMessage.default_to_pandas()


def test_warns_that_defaulting_to_pandas_if_non_bool():
    with pytest.raises(AssertionError):
        warns_that_defaulting_to_pandas_if(3)


def test_assert_dtypes_equal():
    """Verify that `assert_dtypes_equal` from test utils works correctly (raises an error when it has to)."""
    from modin.tests.pandas.utils import assert_dtypes_equal

    # Serieses with equal dtypes
    sr1, sr2 = pd.Series([1.0]), pandas.Series([1.0])
    assert sr1.dtype == sr2.dtype == "float"
    assert_dtypes_equal(sr1, sr2)  # shouldn't raise an error since dtypes are equal

    # Serieses with different dtypes belonging to the same class
    sr1 = sr1.astype("int")
    assert sr1.dtype != sr2.dtype and sr1.dtype == "int"
    assert_dtypes_equal(sr1, sr2)  # shouldn't raise an error since both are numeric

    # Serieses with different dtypes not belonging to the same class
    sr2 = sr2.astype("str")
    assert sr1.dtype != sr2.dtype and sr2.dtype == "object"
    with pytest.raises(AssertionError):
        assert_dtypes_equal(sr1, sr2)

    # Dfs with equal dtypes
    df1, df2 = create_test_dfs({"a": [1], "b": [1.0]})
    assert_dtypes_equal(df1, df2)  # shouldn't raise an error since dtypes are equal

    # Dfs with different dtypes belonging to the same class
    df1 = df1.astype({"a": "float"})
    assert df1.dtypes["a"] != df2.dtypes["a"]
    assert_dtypes_equal(df1, df2)  # shouldn't raise an error since both are numeric

    # Dfs with different dtypes
    df2 = df2.astype("str")
    with pytest.raises(AssertionError):
        assert_dtypes_equal(sr1, sr2)

    # Dfs with categorical dtypes
    df1 = df1.astype("category")
    df2 = df2.astype("category")
    assert_dtypes_equal(df1, df2)  # shouldn't raise an error since both are categorical

    # Dfs with different dtypes (categorical and str)
    df1 = df1.astype({"a": "str"})
    with pytest.raises(AssertionError):
        assert_dtypes_equal(df1, df2)


def test_execute():
    data = np.random.rand(100, 64)
    modin_df, pandas_df = create_test_dfs(data)
    partitions = modin_df._query_compiler._modin_frame._partitions.flatten()
    mgr_cls = modin_df._query_compiler._modin_frame._partition_mgr_cls

    # check modin case
    with patch.object(mgr_cls, "wait_partitions", new=Mock()):
        modin.utils.execute(modin_df)
        mgr_cls.wait_partitions.assert_called_once()
        assert (mgr_cls.wait_partitions.call_args[0] == partitions).all()

    # check pandas case without error
    with patch.object(mgr_cls, "wait_partitions", new=Mock()):
        modin.utils.execute(pandas_df)
        mgr_cls.wait_partitions.assert_not_called()

    with patch.object(mgr_cls, "wait_partitions", new=Mock()):
        modin.utils.execute(modin_df)
        mgr_cls.wait_partitions.assert_called_once()

    # check several modin dataframes
    with patch.object(mgr_cls, "wait_partitions", new=Mock()):
        modin.utils.execute(modin_df, modin_df[modin_df.columns[:4]])
        mgr_cls.wait_partitions.assert_called
        assert mgr_cls.wait_partitions.call_count == 2


def current_execution_is_native() -> bool:
    """Whether the current global execution mode is native."""
    return StorageFormat.get() == "Native" and Engine.get() == "Native"


def df_or_series_using_native_execution(df: Union[pd.DataFrame, pd.Series]) -> bool:
    """Whether this Modin DataFrame or Series is using native execution."""
    return (
        df._query_compiler.engine == "Native"
        and df._query_compiler.storage_format == "Native"
    )


================================================
FILE: modin/utils.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Collection of general utility functions, mostly for internal use."""

import codecs
import functools
import importlib
import inspect
import json
import os
import re
import sys
import types
import warnings
from pathlib import Path
from textwrap import dedent, indent
from typing import (
    Any,
    Callable,
    Iterable,
    List,
    Mapping,
    Optional,
    Protocol,
    TypeVar,
    Union,
    runtime_checkable,
)

import numpy as np
import pandas
from packaging import version
from pandas._typing import JSONSerializable
from pandas.util._decorators import Appender  # type: ignore
from pandas.util._print_versions import (  # type: ignore[attr-defined]
    _get_dependency_info,
    _get_sys_info,
)

from modin._version import get_versions
from modin.config import DocModule, Engine, StorageFormat

# Similar to pandas, sentinel value to use as kwarg in place of None when None has
# special meaning and needs to be distinguished from a user explicitly passing None.
sentinel = object()

T = TypeVar("T")
"""Generic type parameter"""

Fn = TypeVar("Fn", bound=Callable)
"""Function type parameter (used in decorators that don't change a function's signature)"""


@runtime_checkable
class SupportsPublicToPandas(Protocol):  # noqa: PR01
    """Structural type for objects with a ``to_pandas`` method (without a leading underscore)."""

    def to_pandas(self) -> Any:  # noqa: GL08
        pass


@runtime_checkable
class SupportsPublicToNumPy(Protocol):  # noqa: PR01
    """Structural type for objects with a ``to_numpy`` method (without a leading underscore)."""

    def to_numpy(self) -> Any:  # noqa: GL08
        pass


@runtime_checkable
class SupportsPrivateToNumPy(Protocol):  # noqa: PR01
    """Structural type for objects with a ``_to_numpy`` method (note the leading underscore)."""

    def _to_numpy(self) -> Any:  # noqa: GL08
        pass


MIN_RAY_VERSION = version.parse("2.10.0")
MIN_DASK_VERSION = version.parse("2.22.0")
MIN_UNIDIST_VERSION = version.parse("0.2.1")

PANDAS_API_URL_TEMPLATE = f"https://pandas.pydata.org/pandas-docs/version/{pandas.__version__}/reference/api/{{}}.html"

# The '__reduced__' name is used internally by the query compiler as a column name to
# represent pandas Series objects that are not explicitly assigned a name, so as to
# distinguish between an N-element series and 1xN dataframe.
MODIN_UNNAMED_SERIES_LABEL = "__reduced__"


def _make_api_url(token: str) -> str:
    """
    Generate the link to pandas documentation.

    Parameters
    ----------
    token : str
        Part of URL to use for generation.

    Returns
    -------
    str
        URL to pandas doc.

    Notes
    -----
    This function is extracted for better testability.
    """
    return PANDAS_API_URL_TEMPLATE.format(token)


def _get_indent(doc: str) -> int:
    """
    Compute indentation in docstring.

    Parameters
    ----------
    doc : str
        The docstring to compute indentation for.

    Returns
    -------
    int
        Minimal indent (excluding empty lines).
    """
    indents = _get_indents(doc)
    return min(indents) if indents else 0


def _get_indents(source: Union[list, str]) -> list:
    """
    Compute indentation for each line of the source string.

    Parameters
    ----------
    source : str or list of str
        String to compute indents for. Passed list considered
        as a list of lines of the source string.

    Returns
    -------
    list of ints
        List containing computed indents for each line.
    """
    indents = []

    if not isinstance(source, list):
        source = source.splitlines()

    for line in source:
        if not line.strip():
            continue
        for pos, ch in enumerate(line):
            if ch != " ":
                break
        indents.append(pos)
    return indents


def format_string(template: str, **kwargs: str) -> str:
    """
    Insert passed values at the corresponding placeholders of the specified template.

    In contrast with the regular ``str.format()`` this function computes proper
    indents for the placeholder values.

    Parameters
    ----------
    template : str
        Template to substitute values in.
    **kwargs : dict
        Dictionary that maps placeholder names with values.

    Returns
    -------
    str
        Formated string.
    """
    # We want to change indentation only for those values which placeholders are located
    # at the start of the line, in that case the placeholder sets an indentation
    # that the filling value has to obey.
    # RegExp determining placeholders located at the beginning of the line.
    regex = r"^( *)\{(\w+)\}"
    for line in template.splitlines():
        if line.strip() == "":
            continue
        match = re.search(regex, line)
        if match is None:
            continue
        nspaces = len(match.group(1))
        key = match.group(2)

        value = kwargs.get(key)
        if not value:
            continue
        value = dedent(value)

        # Since placeholder is located at the beginning of a new line,
        # it already has '\n' before it, so to avoid double new lines
        # we want to discard the first leading '\n' at the value line,
        # the others leading '\n' are considered as being put on purpose
        if value[0] == "\n":
            value = value[1:]
        # `.splitlines()` doesn't preserve last empty line,
        # so we have to restore it further
        value_lines = value.splitlines()
        # We're not indenting the first line of the value, since it's already indented
        # properly because of the placeholder indentation.
        indented_lines = [
            indent(line, " " * nspaces) if line != "\n" else line
            for line in value_lines[1:]
        ]
        # If necessary, restoring the last line dropped by `.splitlines()`
        if value[-1] == "\n":
            indented_lines += [" " * nspaces]

        indented_value = "\n".join([value_lines[0], *indented_lines])
        kwargs[key] = indented_value

    return template.format(**kwargs)


def align_indents(source: str, target: str) -> str:
    """
    Align indents of two strings.

    Parameters
    ----------
    source : str
        Source string to align indents with.
    target : str
        Target string to align indents.

    Returns
    -------
    str
        Target string with indents aligned with the source.
    """
    source_indent = _get_indent(source)
    target = dedent(target)
    return indent(target, " " * source_indent)


def append_to_docstring(message: str) -> Callable[[Fn], Fn]:
    """
    Create a decorator which appends passed message to the function's docstring.

    Parameters
    ----------
    message : str
        Message to append.

    Returns
    -------
    callable
    """

    def decorator(func: Fn) -> Fn:
        to_append = align_indents(func.__doc__ or "", message)
        return Appender(to_append)(func)

    return decorator


def _replace_doc(
    source_obj: object,
    target_obj: object,
    overwrite: bool,
    apilink: Optional[Union[str, List[str]]],
    parent_cls: Optional[Fn] = None,
    attr_name: Optional[str] = None,
) -> None:
    """
    Replace docstring in `target_obj`, possibly taking from `source_obj` and augmenting.

    Can append the link to pandas API online documentation.

    Parameters
    ----------
    source_obj : object
        Any object from which to take docstring from.
    target_obj : object
        The object which docstring to replace.
    overwrite : bool
        Forces replacing the docstring with the one from `source_obj` even
        if `target_obj` has its own non-empty docstring.
    apilink : str | List[str], optional
        If non-empty, insert the link(s) to pandas API documentation.
        Should be the prefix part in the URL template, e.g. "pandas.DataFrame".
    parent_cls : class, optional
        If `target_obj` is an attribute of a class, `parent_cls` should be that class.
        This is used for generating the API URL as well as for handling special cases
        like `target_obj` being a property or a cached_property.
    attr_name : str, optional
        Gives the name to `target_obj` if it's an attribute of `parent_cls`.
        Needed to handle some special cases and in most cases could be determined automatically.
    """
    if isinstance(target_obj, (staticmethod, classmethod)):
        # we cannot replace docs on decorated objects, we must replace them
        # on original functions instead
        target_obj = target_obj.__func__

    source_doc = source_obj.__doc__ or ""
    target_doc = target_obj.__doc__ or ""
    overwrite = overwrite or not target_doc
    doc = source_doc if overwrite else target_doc
    if doc == "":
        # Empty docstrings do not need to be inherited
        return

    if parent_cls and not attr_name:
        if isinstance(target_obj, property):
            attr_name = target_obj.fget.__name__  # type: ignore[union-attr]
        elif isinstance(target_obj, functools.cached_property):
            attr_name = target_obj.func.__name__
        elif isinstance(target_obj, (staticmethod, classmethod)):
            attr_name = target_obj.__func__.__name__
        else:
            attr_name = target_obj.__name__  # type: ignore[attr-defined]

    if (
        source_doc.strip()
        and apilink
        and "pandas API documentation for " not in target_doc
        and (not (attr_name or "").startswith("_"))
    ):
        apilink_l = [apilink] if not isinstance(apilink, list) and apilink else apilink
        links = []
        for link in apilink_l:
            if attr_name:
                token = f"{link}.{attr_name}"
            else:
                token = link
            url = _make_api_url(token)
            links.append(f"`{token} <{url}>`_")

        indent_line = " " * _get_indent(doc)
        notes_section = f"\n{indent_line}Notes\n{indent_line}-----\n"

        url_line = f"{indent_line}See pandas API documentation for {', '.join(links)} for more.\n"
        notes_section_with_url = notes_section + url_line

        if notes_section in doc:
            doc = doc.replace(notes_section, notes_section_with_url)
        else:
            doc += notes_section_with_url

    if parent_cls and isinstance(target_obj, property):
        if overwrite:
            target_obj.fget.__doc_inherited__ = True  # type: ignore[union-attr]
        assert attr_name is not None
        setattr(
            parent_cls,
            attr_name,
            property(target_obj.fget, target_obj.fset, target_obj.fdel, doc),
        )
    elif parent_cls and isinstance(target_obj, functools.cached_property):
        if overwrite:
            target_obj.func.__doc_inherited__ = True  # type: ignore[attr-defined]
        assert attr_name is not None
        target_obj.func.__doc__ = doc
        setattr(
            parent_cls,
            attr_name,
            functools.cached_property(target_obj.func),
        )
        # otherwise: `TypeError: Cannot use cached_property instance without calling __set_name__ on it.`
        getattr(parent_cls, attr_name).__set_name__(parent_cls, attr_name)
    else:
        if overwrite:
            target_obj.__doc_inherited__ = True  # type: ignore[attr-defined]
        target_obj.__doc__ = doc


# This is a map from objects whose docstrings we are overriding to functions that
# take a DocModule string and override the docstring according to the
# DocModule. When we update DocModule, we can use this map to update all
# inherited docstrings.
_docstring_inheritance_calls: list[Callable[[str], None]] = []

# This is a set of (class, attribute_name) pairs whose docstrings we have
# already replaced since we last updated DocModule. Note that we don't store
# the attributes themselves since we replace property attributes instead of
# modifying them in place:
# https://github.com/modin-project/modin/blob/e9dbcc127913db77473a83936e8b6bb94ef84f0d/modin/utils.py#L353
_attributes_with_docstrings_replaced: set[tuple[type, str]] = set()


def _documentable_obj(obj: object) -> bool:
    """
    Check whether we can replace the docstring of `obj`.

    Parameters
    ----------
    obj : object
        Object whose docstring we want to replace.

    Returns
    -------
    bool
        Whether we can replace the docstring.
    """
    return bool(
        callable(obj)
        and not inspect.isclass(obj)
        or (isinstance(obj, property) and obj.fget)
        or (isinstance(obj, functools.cached_property))
        or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__)
    )


def _update_inherited_docstrings(doc_module: DocModule) -> None:
    """
    Update all inherited docstrings.

    Parameters
    ----------
    doc_module : DocModule
        The current DocModule.
    """
    _attributes_with_docstrings_replaced.clear()
    _doc_module = doc_module.get()
    for doc_inheritance_call in _docstring_inheritance_calls:
        doc_inheritance_call(doc_module=_doc_module)  # type: ignore[call-arg]


def _inherit_docstrings_in_place(
    cls_or_func: Fn,
    doc_module: str,
    parent: object,
    excluded: List[object],
    overwrite_existing: bool = False,
    apilink: Optional[Union[str, List[str]]] = None,
) -> None:
    """
    Replace `cls_or_func` docstrings with `parent` docstrings in place.

    Parameters
    ----------
    cls_or_func : Fn
        The class or function whose docstrings we need to update.
    doc_module : str
        The docs module.
    parent : object
        Parent object from which the decorated object inherits __doc__.
    excluded : list, default: []
        List of parent objects from which the class does not
        inherit docstrings.
    overwrite_existing : bool, default: False
        Allow overwriting docstrings that already exist in
        the decorated class.
    apilink : str | List[str], optional
        If non-empty, insert the link(s) to pandas API documentation.
        Should be the prefix part in the URL template, e.g. "pandas.DataFrame".
    """
    # Import the docs module and get the class (e.g. `DataFrame`).
    imported_doc_module = importlib.import_module(doc_module)
    # Set the default parent so we can use it in case some docs are missing from
    # parent module.
    default_parent = parent
    # Try to get the parent object from the doc module, and if it isn't there,
    # get it from parent instead. We only do this if we are overriding pandas
    # documentation. We don't touch other docs.
    if doc_module != DocModule.default and "pandas" in str(
        getattr(parent, "__module__", "")
    ):
        parent_name = (
            # DocModule should use the class BasePandasDataset to override the
            # docstrings of BasePandasDataset, even if BasePandasDataset
            # normally inherits docstrings from a different `parent`.
            "BasePandasDataset"
            if getattr(cls_or_func, "__name__", "") == "BasePandasDataset"
            # For other classes, override docstrings with the class that has the
            # same name as the `parent` class, e.g. DataFrame inherits
            # docstrings from doc_module.DataFrame.
            else getattr(parent, "__name__", "")
        )
        parent = getattr(imported_doc_module, parent_name, parent)
    if parent != default_parent:
        # Reset API link in case the docs are overridden.
        apilink = None
        overwrite_existing = True

    if parent not in excluded:
        _replace_doc(parent, cls_or_func, overwrite_existing, apilink)

    if not isinstance(cls_or_func, types.FunctionType):
        seen = set()
        for base in cls_or_func.__mro__:  # type: ignore[attr-defined]
            if base is object:
                continue
            for attr, obj in base.__dict__.items():
                # only replace docstrings once to prevent https://github.com/modin-project/modin/issues/7113
                if attr in seen or (base, attr) in _attributes_with_docstrings_replaced:
                    continue
                seen.add(attr)
                if hasattr(obj, "_wrapped_superclass_method"):
                    # If this method originally comes from a superclass, we get
                    # docstrings directly from the wrapped superclass method
                    # rather than inheriting docstrings from the usual parent.
                    # For example, for BasePandasDataset and Series, the behavior is:
                    # - If Series inherits a method from BasePandasDataset, then
                    #   it gets the docstring from that method in BasePandasDataset.
                    # - If Series overrides a method or defines its own method
                    #   that's not present in BasePandasDataset, it follows the usual
                    #   inheritance hierarchy of `parent` and `default_parent`.
                    parent_obj = obj._wrapped_superclass_method
                else:
                    # Try to get the attribute from the docs class first, then
                    # from the default parent (pandas), and if it's not in either,
                    # set `parent_obj` to `None`.
                    parent_obj = getattr(
                        parent, attr, getattr(default_parent, attr, None)
                    )
                    if (
                        parent_obj in excluded
                        or not _documentable_obj(parent_obj)
                        or not _documentable_obj(obj)
                    ):
                        continue

                _replace_doc(
                    parent_obj,
                    obj,
                    overwrite_existing,
                    apilink,
                    parent_cls=base,
                    attr_name=attr,
                )

                _attributes_with_docstrings_replaced.add((base, attr))


def _inherit_docstrings(
    parent: object,
    excluded: List[object] = [],
    overwrite_existing: bool = False,
    apilink: Optional[Union[str, List[str]]] = None,
) -> Callable[[Fn], Fn]:
    """
    Create a decorator which overwrites decorated object docstring(s).

    It takes `parent` __doc__ attribute. Also overwrites __doc__ of
    methods and properties defined in the target or its ancestors if it's a class
    with the __doc__ of matching methods and properties from the `parent`.

    Parameters
    ----------
    parent : object
        Parent object from which the decorated object inherits __doc__.
    excluded : list, default: []
        List of parent objects from which the class does not
        inherit docstrings.
    overwrite_existing : bool, default: False
        Allow overwriting docstrings that already exist in
        the decorated class.
    apilink : str | List[str], optional
        If non-empty, insert the link(s) to pandas API documentation.
        Should be the prefix part in the URL template, e.g. "pandas.DataFrame".

    Returns
    -------
    callable
        Decorator which replaces the decorated object's documentation with `parent` documentation.

    Notes
    -----
    Keep in mind that the function will override docstrings even for attributes which
    are not defined in target class (but are defined in the ancestor class),
    which means that ancestor class attribute docstrings could also change.
    """

    def decorator(cls_or_func: Fn) -> Fn:
        inherit_docstring_in_place = functools.partial(
            _inherit_docstrings_in_place,
            cls_or_func=cls_or_func,
            parent=parent,
            excluded=excluded,
            overwrite_existing=overwrite_existing,
            apilink=apilink,
        )
        inherit_docstring_in_place(doc_module=DocModule.get())
        _docstring_inheritance_calls.append(inherit_docstring_in_place)
        return cls_or_func

    return decorator


DocModule.subscribe(_update_inherited_docstrings)


def expanduser_path_arg(argname: str) -> Callable[[Fn], Fn]:
    """
    Decorate a function replacing its path argument with "user-expanded" value.

    Parameters
    ----------
    argname : str
        Name of the argument which is containing a path to be expanded.

    Returns
    -------
    callable
        Decorator which performs the replacement.
    """

    def decorator(func: Fn) -> Fn:
        signature = inspect.signature(func)
        assert (
            getattr(signature.parameters.get(argname), "name", None) == argname
        ), f"Function {func} does not take '{argname}' as argument"

        @functools.wraps(func)
        def wrapped(*args: tuple, **kw: dict) -> Any:
            params = signature.bind(*args, **kw)
            if patharg := params.arguments.get(argname, None):
                if isinstance(patharg, str) and patharg.startswith("~"):
                    params.arguments[argname] = os.path.expanduser(patharg)
                elif isinstance(patharg, Path):
                    params.arguments[argname] = patharg.expanduser()
                return func(*params.args, **params.kwargs)
            return func(*args, **kw)

        return wrapped  # type: ignore[return-value]

    return decorator


def func_from_deprecated_location(
    func_name: str, module: str, deprecation_message: str
) -> Callable:
    """
    Create a function that decorates a function ``module.func_name`` with a ``FutureWarning``.

    Parameters
    ----------
    func_name : str
        Function name to decorate.
    module : str
        Module where the function is located.
    deprecation_message : str
        Message to print in a future warning.

    Returns
    -------
    callable
    """

    def deprecated_func(*args: tuple[Any], **kwargs: dict[Any, Any]) -> Any:
        """Call deprecated function."""
        func = getattr(importlib.import_module(module), func_name)
        # using 'FutureWarning' as 'DeprecationWarnings' are filtered out by default
        warnings.warn(deprecation_message, FutureWarning)
        return func(*args, **kwargs)

    return deprecated_func


def hashable(obj: bool) -> bool:
    """
    Return whether the `obj` is hashable.

    Parameters
    ----------
    obj : object
        The object to check.

    Returns
    -------
    bool
    """
    # Happy path: if there's no __hash__ method, the object definitely isn't hashable
    if not hasattr(obj, "__hash__"):
        return False
    # Otherwise, we may still need to check for type errors, as in the case of `hash(([],))`.
    # (e.g. an unhashable object inside a tuple)
    try:
        hash(obj)
    except TypeError:
        return False
    return True


def try_cast_to_pandas(obj: Any, squeeze: bool = False) -> Any:
    """
    Convert `obj` and all nested objects from Modin to pandas if it is possible.

    If no convertion possible return `obj`.

    Parameters
    ----------
    obj : object
        Object to convert from Modin to pandas.
    squeeze : bool, default: False
        Squeeze the converted object(s) before returning them.

    Returns
    -------
    object
        Converted object.
    """
    if isinstance(obj, SupportsPublicToPandas) or hasattr(obj, "modin"):
        result = obj.modin.to_pandas() if hasattr(obj, "modin") else obj.to_pandas()
        if squeeze:
            result = result.squeeze(axis=1)

        # QueryCompiler/low-level ModinFrame case, it doesn't have logic about convertion to Series
        if (
            isinstance(getattr(result, "name", None), str)
            and result.name == MODIN_UNNAMED_SERIES_LABEL
        ):
            result.name = None
        return result
    if isinstance(obj, (list, tuple)):
        return type(obj)([try_cast_to_pandas(o, squeeze=squeeze) for o in obj])
    if isinstance(obj, dict):
        return {k: try_cast_to_pandas(v, squeeze=squeeze) for k, v in obj.items()}
    if callable(obj):
        module_hierarchy = getattr(obj, "__module__", "").split(".")
        fn_name = getattr(obj, "__name__", None)
        if fn_name and module_hierarchy[0] == "modin":
            return (
                getattr(pandas.DataFrame, fn_name, obj)
                if module_hierarchy[-1] == "dataframe"
                else getattr(pandas.Series, fn_name, obj)
            )
    return obj


def execute(*objs: Iterable[Any]) -> None:
    """
    Trigger the lazy computations for each obj in `objs`, if any, and wait for them to complete.

    Parameters
    ----------
    *objs : Iterable[Any]
        A collection of objects to trigger lazy computations.
    """
    for obj in objs:
        if not hasattr(obj, "_query_compiler"):
            continue
        query_compiler = obj._query_compiler
        query_compiler.execute()


def wrap_into_list(*args: Any, skipna: bool = True) -> List[Any]:
    """
    Wrap a sequence of passed values in a flattened list.

    If some value is a list by itself the function appends its values
    to the result one by one instead inserting the whole list object.

    Parameters
    ----------
    *args : tuple
        Objects to wrap into a list.
    skipna : bool, default: True
        Whether or not to skip nan or None values.

    Returns
    -------
    list
        Passed values wrapped in a list.
    """

    def isnan(o: Any) -> bool:
        return o is None or (isinstance(o, float) and np.isnan(o))

    res = []
    for o in args:
        if skipna and isnan(o):
            continue
        if isinstance(o, list):
            res.extend(o)
        else:
            res.append(o)
    return res


def wrap_udf_function(func: Callable) -> Callable:
    """
    Create a decorator that makes `func` return pandas objects instead of Modin.

    Parameters
    ----------
    func : callable
        Function to wrap.

    Returns
    -------
    callable
    """

    def wrapper(*args: Any, **kwargs: Any) -> Any:
        result = func(*args, **kwargs)
        # if user accidently returns modin DataFrame or Series
        # casting it back to pandas to properly process
        return try_cast_to_pandas(result)

    wrapper.__name__ = func.__name__
    return wrapper


def get_current_execution() -> str:
    """
    Return current execution name as a string.

    Returns
    -------
    str
        Returns <StorageFormat>On<Engine>-like string.
    """
    return f"{StorageFormat.get()}On{Engine.get()}"


def instancer(_class: Callable[[], T]) -> T:
    """
    Create a dummy instance each time this is imported.

    This serves the purpose of allowing us to use all of pandas plotting methods
    without aliasing and writing each of them ourselves.

    Parameters
    ----------
    _class : object

    Returns
    -------
    object
        Instance of `_class`.
    """
    return _class()


def import_optional_dependency(name: str, message: str) -> types.ModuleType:
    """
    Import an optional dependecy.

    Parameters
    ----------
    name : str
        The module name.
    message : str
        Additional text to include in the ImportError message.

    Returns
    -------
    module : ModuleType
        The imported module.
    """
    try:
        return importlib.import_module(name)
    except ImportError:
        raise ImportError(
            f"Missing optional dependency '{name}'. {message} "
            + f"Use pip or conda to install {name}."
        ) from None


def _get_modin_deps_info() -> Mapping[str, Optional[JSONSerializable]]:
    """
    Return Modin-specific dependencies information as a JSON serializable dictionary.

    Returns
    -------
    Mapping[str, Optional[pandas.JSONSerializable]]
        The dictionary of Modin dependencies and their versions.
    """
    import modin  # delayed import so modin.__init__ is fully initialized

    result = {"modin": modin.__version__}

    for pkg_name, pkg_version in [
        ("ray", MIN_RAY_VERSION),
        ("dask", MIN_DASK_VERSION),
        ("distributed", MIN_DASK_VERSION),
    ]:
        try:
            pkg = importlib.import_module(pkg_name)
        except ImportError:
            result[pkg_name] = None
        else:
            result[pkg_name] = pkg.__version__ + (
                f" (outdated; >={pkg_version} required)"
                if version.parse(pkg.__version__) < pkg_version
                else ""
            )
    return result


def show_versions(as_json: Union[str, bool] = False) -> None:
    """
    Provide useful information, important for bug reports.

    It comprises info about hosting operation system, pandas version,
    and versions of other installed relative packages.

    Parameters
    ----------
    as_json : str or bool, default: False
        * If False, outputs info in a human readable form to the console.
        * If str, it will be considered as a path to a file.
          Info will be written to that file in JSON format.
        * If True, outputs info in JSON format to the console.

    Notes
    -----
    This is mostly a copy of pandas.show_versions() but adds separate listing
    of Modin-specific dependencies.
    """
    sys_info = _get_sys_info()
    sys_info["commit"] = get_versions()["full-revisionid"]
    modin_deps = _get_modin_deps_info()
    deps = _get_dependency_info()

    if as_json:
        j = {
            "system": sys_info,
            "modin dependencies": modin_deps,
            "dependencies": deps,
        }

        if as_json is True:
            sys.stdout.writelines(json.dumps(j, indent=2))
        else:
            assert isinstance(as_json, str)  # needed for mypy
            with codecs.open(as_json, "wb", encoding="utf8") as f:
                json.dump(j, f, indent=2)

    else:
        assert isinstance(sys_info["LOCALE"], dict)  # needed for mypy
        language_code = sys_info["LOCALE"]["language-code"]
        encoding = sys_info["LOCALE"]["encoding"]
        sys_info["LOCALE"] = f"{language_code}.{encoding}"

        maxlen = max(max(len(x) for x in d) for d in (deps, modin_deps))
        print("\nINSTALLED VERSIONS\n------------------")  # noqa: T201
        for k, v in sys_info.items():
            print(f"{k:<{maxlen}}: {v}")  # noqa: T201
        for name, d in (("Modin", modin_deps), ("pandas", deps)):
            print(f"\n{name} dependencies\n{'-' * (len(name) + 13)}")  # noqa: T201
            for k, v in d.items():
                print(f"{k:<{maxlen}}: {v}")  # noqa: T201


class ModinAssumptionError(Exception):
    """An exception that allows us defaults to pandas if any assumption fails."""

    pass


def _maybe_warn_on_default(message: str = "", *, reason: str = "") -> None:
    """
    Raise a warning on an operation that defaults to pandas if necessary.

    This checks the query compiler used by the current configured active backend, and prints
    a warning message about defaulting to pandas if needed.

    Parameters
    ----------
    message : str, default: ""
        The message to show.
    reason : str, default: ""
        The reason for defaulting.
    """
    # Avoids a module-level circular import
    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher

    FactoryDispatcher.get_factory().io_cls.query_compiler_cls._maybe_warn_on_default(
        message=message, reason=reason
    )


class classproperty:
    """
    Decorator that allows creating read-only class properties.

    Parameters
    ----------
    func : method

    Examples
    --------
    >>> class A:
    ...     field = 10
    ...     @classproperty
    ...     def field_x2(cls):
    ...             return cls.field * 2
    ...
    >>> print(A.field_x2)
    20
    """

    def __init__(self, func: Any):
        self.fget = func

    def __get__(self, instance: Any, owner: Any) -> Any:  # noqa: GL08
        return self.fget(owner)


def reload_modin() -> None:
    """
    Reload all previously imported Modin modules.

    The call to this function is required
    if an execution engine has been shut down and
    is going to be started up once again.
    """
    modules = sys.modules.copy()
    for name, module in modules.items():
        if name.startswith("modin"):
            importlib.reload(module)


================================================
FILE: modin-autoimport-pandas.pth
================================================
import os; os.environ.get("__MODIN_AUTOIMPORT_PANDAS__", None) and __import__("pandas")


================================================
FILE: mypy.ini
================================================
[mypy]
# Ignoring missing imports can be dangerous, should do this at module-by-module level
ignore_missing_imports = True
show_error_codes = True
show_column_numbers = True
check_untyped_defs = True
follow_imports = silent

# be strict
disallow_untyped_calls=True
disallow_untyped_defs=True
strict_optional=True
warn_no_return=True
warn_redundant_casts=True
warn_unused_ignores=True
disallow_any_generics=False
warn_unreachable=True

# We will add more files over time to increase coverage
files =
        modin/config/,
        modin/core/dataframe/base/,
        modin/logging/,
        modin/distributed/,
        modin/*.py

exclude = .*/tests/.*


================================================
FILE: requirements/env_unidist_linux.yml
================================================
name: modin_on_unidist
channels:
  - conda-forge
dependencies:
  - pip

  # required dependencies
  - pandas>=2.2,<2.4
  - numpy>=1.22.4
  - unidist-mpi>=0.2.1
  - mpich
  - fsspec>=2022.11.0
  - packaging>=21.0
  - psutil>=5.8.0

  # optional dependencies
  # NOTE Keep the ray and dask dependencies in sync with the Windows Unidist
  # environment and the general environment-dev.yml.
  # We include the ray and dask dependencies here because we want to test
  # switching dataframe backends to ray or dask.
  - ray-core>=2.10.0,<3
  # workaround for https://github.com/conda/conda/issues/11744
  - grpcio!=1.45.*
  - grpcio!=1.46.*
  - dask>=2.22.0
  - pyarrow>=10.0.1
  - xarray>=2022.12.0
  - jinja2>=3.1.2
  - scipy>=1.10.0
  - s3fs>=2022.11.0
  - lxml>=4.9.2
  - openpyxl>=3.1.0
  - xlrd>=2.0.1
  - matplotlib>=3.6.3
  - sqlalchemy>=2.0.0
  - pandas-gbq>=0.19.0
  - pytables>=3.8.0
  # pymssql==2.2.8 broken: https://github.com/modin-project/modin/issues/6429
  - pymssql>=2.1.5,!=2.2.8
  - psycopg2>=2.9.6
  - fastparquet>=2022.12.0
  - tqdm>=4.60.0
  - numexpr>=2.8.4

  # dependencies for making release
  - pygithub>=v1.58.0
  - pygit2>=1.9.2

  # test dependencies
  - coverage>=7.1.0
  - moto>=4.1.0
  - pytest>=7.3.2
  - pytest-cov>=4.0.0
  - pytest-xdist>=3.2.0
  - typing_extensions

  # code linters
  - black>=24.1.0
  - flake8>=6.0.0
  - flake8-no-implicit-concat>=0.3.4
  - flake8-print>=5.0.0
  - mypy>=1.0.0
  - pandas-stubs>=2.0.0

  - pip:
      # Fixes breaking ipywidgets changes, but didn't release yet.
      - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
      - connectorx>=0.2.6a4
      # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
      - numpydoc==1.6.0


================================================
FILE: requirements/env_unidist_win.yml
================================================
name: modin_on_unidist
channels:
  - conda-forge
dependencies:
  - pip

  # required dependencies
  - pandas>=2.2,<2.4
  - numpy>=1.22.4
  - unidist-mpi>=0.2.1
  - msmpi
  - fsspec>=2022.11.0
  - packaging>=21.0
  - psutil>=5.8.0

  # optional dependencies
  # NOTE Keep the ray and dask dependencies in sync with the Linux Unidist
  # environment and the general environment-dev.yml.
  # We include the ray and dask dependencies here because we want to test
  # switching dataframe backends to ray or dask.
  - ray-core>=2.10.0,<3
  # workaround for https://github.com/conda/conda/issues/11744
  - grpcio!=1.45.*
  - grpcio!=1.46.*
  - dask>=2.22.0
  - pyarrow>=10.0.1
  - xarray>=2022.12.0
  - jinja2>=3.1.2
  - scipy>=1.10.0
  - s3fs>=2022.11.0
  - lxml>=4.9.2
  - openpyxl>=3.1.0
  - xlrd>=2.0.1
  - matplotlib>=3.6.3
  - sqlalchemy>=2.0.0
  - pandas-gbq>=0.19.0
  - pytables>=3.8.0
  # pymssql==2.2.8 broken: https://github.com/modin-project/modin/issues/6429
  - pymssql>=2.1.5,!=2.2.8
  - psycopg2>=2.9.6
  - fastparquet>=2022.12.0
  - tqdm>=4.60.0
  - numexpr>=2.8.4

  # dependencies for making release
  - pygithub>=v1.58.0
  - pygit2>=1.9.2

  # test dependencies
  - coverage>=7.1.0
  - moto>=4.1.0
  - pytest>=7.3.2
  - pytest-cov>=4.0.0
  - pytest-xdist>=3.2.0
  - typing_extensions

  # code linters
  - black>=24.1.0
  - flake8>=6.0.0
  - flake8-no-implicit-concat>=0.3.4
  - flake8-print>=5.0.0
  - mypy>=1.0.0
  - pandas-stubs>=2.0.0

  - pip:
      - dataframe-api-compat>=0.2.7
      # Fixes breaking ipywidgets changes, but didn't release yet.
      - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
      - connectorx>=0.2.6a4
      # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
      - numpydoc==1.6.0


================================================
FILE: requirements/requirements-no-engine.yml
================================================
channels:
  - conda-forge
dependencies:
  - pip

  # required dependencies
  - pandas>=2.2,<2.4
  - numpy>=1.22.4
  - fsspec>=2022.11.0
  - packaging>=21.0
  - psutil>=5.8.0

  # optional dependencies
  - pyarrow>=10.0.1
  - xarray>=2022.12.0
  - jinja2>=3.1.2
  - scipy>=1.10.0
  - s3fs>=2022.11.0
  - lxml>=4.9.2
  - openpyxl>=3.1.0
  - xlrd>=2.0.1
  - matplotlib>=3.6.3
  - sqlalchemy>=2.0.0
  - pandas-gbq>=0.19.0
  - pytables>=3.8.0
  - tqdm>=4.60.0
  - numexpr>=2.8.4

  # dependencies for making release
  - pygithub>=v1.58.0
  - pygit2>=1.9.2

  # test dependencies
  - coverage>=7.1.0
  - moto>=4.1.0
  - pytest>=7.3.2
  - pytest-cov>=4.0.0
  - pytest-xdist>=3.2.0
  - typing_extensions

  # code linters
  - black>=24.1.0
  - flake8>=6.0.0
  - flake8-no-implicit-concat>=0.3.4
  - flake8-print>=5.0.0

  - pip:
      - dataframe-api-compat>=0.2.7
      - asv==0.5.1
      # no conda package for windows
      - connectorx>=0.2.6a4
      # Fixes breaking ipywidgets changes, but didn't release yet.
      - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
      # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
      - numpydoc==1.6.0


================================================
FILE: requirements-dev.txt
================================================
## required dependencies
pandas>=2.2,<2.4
numpy>=1.22.4
fsspec>=2022.11.0
packaging>=21.0
psutil>=5.8.0

## optional dependencies
ray>=2.10.0,<3
pyarrow>=10.0.1
dask[complete]>=2.22.0
distributed>=2.22.0
xarray>=2022.12.0
Jinja2>=3.1.2
scipy>=1.10.0
s3fs>=2022.11.0
lxml>=4.9.2
openpyxl>=3.1.0
xlrd>=2.0.1
matplotlib>=3.6.3
sqlalchemy>=2.0.0
pandas-gbq>=0.19.0
tables>=3.7.0
# pymssql==2.2.8 broken: https://github.com/modin-project/modin/issues/6429
pymssql>=2.1.5,!=2.2.8
# psycopg devs recommend the other way of installation for production
# but this is ok for testing and development
psycopg2-binary>=2.9.3
connectorx>=0.2.6a4
fastparquet>=2022.12.0
flask-cors
tqdm>=4.60.0
numexpr>=2.8.4
# Latest modin-spreadsheet with widget fix
git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
dataframe-api-compat>=0.2.7

## dependencies for making release
PyGithub>=1.58.0
pygit2>=1.9.2

## test dependencies
asv==0.5.1
coverage>=7.1.0
fuzzydata>=0.0.11
# The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
numpydoc==1.1.0
moto>=4.1.0
pytest>=7.3.2
pytest-benchmark>=4.0.0
pytest-cov>=4.0.0
pytest-xdist>=3.2.0
typing_extensions

## code linters
black>=24.1.0
flake8>=6.0.0
flake8-no-implicit-concat>=0.3.4
flake8-print>=5.0.0
mypy>=1.0.0
pandas-stubs>=2.0.0
isort>=5.12


================================================
FILE: scripts/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: scripts/doc_checker.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Validate docstrings using pydocstyle and numpydoc.

Example usage:
python scripts/doc_checker.py asv_bench/benchmarks/utils.py modin/pandas
"""

import argparse
import ast
import functools
import inspect
import logging
import os
import pathlib
import re
import shutil
import subprocess
import sys
from typing import List

from numpydoc.docscrape import NumpyDocString, get_doc_object
from numpydoc.validate import Validator

# Let the other modules to know that the doc checker is running.
os.environ["_MODIN_DOC_CHECKER_"] = "1"

logging.basicConfig(
    stream=sys.stdout, format="%(levelname)s:%(message)s", level=logging.INFO
)

MODIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, MODIN_PATH)

# error codes that pandas test in CI
# https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks
NUMPYDOC_BASE_ERROR_CODES = {
    *("GL01", "GL02", "GL03", "GL05", "GL06", "GL07", "GL08", "GL09", "GL10"),
    *("SS02", "SS03", "SS04", "SS05", "PR01", "PR02", "PR03", "PR04", "PR05"),
    *("PR08", "PR09", "PR10", "RT01", "RT04", "RT05", "SA02", "SA03"),
}

MODIN_ERROR_CODES = {
    "MD01": "'{parameter}' description should be '[type], default: [value]', found: '{found}'",
    "MD02": "Spelling error in line: {line}, found: '{word}', reference: '{reference}'",
    "MD03": "Section contents is over-indented (in section '{section}')",
}


def get_optional_args(doc: Validator) -> dict:
    """
    Get optional parameters for the object for which the docstring is checked.

    Parameters
    ----------
    doc : numpydoc.validate.Validator
        Validator handler.

    Returns
    -------
    dict
        Dict with default argument names and its values.
    """
    obj = doc.obj
    if not callable(obj) or inspect.isclass(obj):
        return {}
    signature = inspect.signature(obj)
    return {
        k: v.default
        for k, v in signature.parameters.items()
        if v.default is not inspect.Parameter.empty
    }


def check_optional_args(doc: Validator) -> list:
    """
    Check type description of optional arguments.

    Parameters
    ----------
    doc : numpydoc.validate.Validator

    Returns
    -------
    list
        List of tuples with Modin error code and its description.
    """
    # `not doc.raw_doc and doc.clean_doc` - means that docstring was
    # automatically generated by numpydoc with help of `pydoc.getdoc`.
    if not doc.doc_parameters or (not doc.raw_doc and doc.clean_doc):
        return []
    optional_args = get_optional_args(doc)
    if not optional_args:
        return []

    errors = []
    for parameter in optional_args:
        # case when not all parameters are listed in "Parameters" section;
        # it's handled by numpydoc itself
        if parameter not in doc.doc_parameters:
            continue
        type_line = doc.doc_parameters[parameter][0]
        has_default = "default: " in type_line
        has_optional = "optional" in type_line
        if not (has_default ^ has_optional):
            errors.append(
                (
                    "MD01",
                    MODIN_ERROR_CODES["MD01"].format(
                        parameter=parameter,
                        found=type_line,
                    ),
                )
            )
    return errors


def check_spelling_words(doc: Validator) -> list:
    """
    Check spelling of chosen words in doc.

    Parameters
    ----------
    doc : numpydoc.validate.Validator
        Validator handler.

    Returns
    -------
    list
        List of tuples with Modin error code and its description.

    Notes
    -----
    Any special words enclosed in apostrophes(") are treated as python string
    constants and are not checked for spelling.
    """
    if not doc.raw_doc:
        return []
    components = set(
        ["Modin", "pandas", "NumPy", "Ray", "Dask"] + ["PyArrow", "XGBoost", "Plasma"]
    )
    check_words = "|".join(x.lower() for x in components)

    # comments work only with re.VERBOSE
    pattern = r"""
    (?:                     # non-capturing group
        [^-\\\w\/]          # any symbol except: '-', '\', '/' and any from [a-zA-Z0-9_]
        | ^                 # or line start
    )
    ({check_words})         # words to check, example - "modin|pandas|numpy"
    (?:                     # non-capturing group
        [^-"\.\/\w\\]       # any symbol except: '-', '"', '.', '\', '/' and any from [a-zA-Z0-9_]
        | \.\s              # or '.' and any whitespace
        | \.$               # or '.' and line end
        | $                 # or line end
    )
    """.format(
        check_words=check_words
    )
    results = [
        set(re.findall(pattern, line, re.I | re.VERBOSE)) - components
        for line in doc.raw_doc.splitlines()
    ]

    docstring_start_line = None
    for idx, line in enumerate(inspect.getsourcelines(doc.code_obj)[0]):
        if '"""' in line or "'''" in line:
            docstring_start_line = doc.source_file_def_line + idx
            break

    errors = []
    for line_idx, words_in_line in enumerate(results):
        for word in words_in_line:
            reference = [x for x in components if x.lower() == word.lower()][0]
            errors.append(
                (
                    "MD02",
                    MODIN_ERROR_CODES["MD02"].format(
                        line=docstring_start_line + line_idx,
                        word=word,
                        reference=reference,
                    ),
                )
            )
    return errors


def check_docstring_indention(doc: Validator) -> list:
    """
    Check indention of docstring since numpydoc reports weird results.

    Parameters
    ----------
    doc : numpydoc.validate.Validator
        Validator handler.

    Returns
    -------
    list
        List of tuples with Modin error code and its description.
    """
    from modin.utils import _get_indent

    numpy_docstring = NumpyDocString(doc.clean_doc)
    numpy_docstring._doc.reset()
    numpy_docstring._parse_summary()
    sections = list(numpy_docstring._read_sections())
    errors = []
    for section in sections:
        description = "\n".join(section[1])
        if _get_indent(description) != 0:
            errors.append(
                ("MD03", MODIN_ERROR_CODES["MD03"].format(section=section[0]))
            )
    return errors


def validate_modin_error(doc: Validator, results: dict) -> list:
    """
    Validate custom Modin errors.

    Parameters
    ----------
    doc : numpydoc.validate.Validator
        Validator handler.
    results : dict
        Dictionary that numpydoc.validate.validate return.

    Returns
    -------
    dict
        Updated dict with Modin custom errors.
    """
    errors = check_optional_args(doc)
    errors += check_spelling_words(doc)
    errors += check_docstring_indention(doc)
    results["errors"].extend(errors)
    return results


def skip_check_if_noqa(doc: Validator, err_code: str, noqa_checks: list) -> bool:
    """
    Skip the check that matches `err_code` if `err_code` found in noqa string.

    Parameters
    ----------
    doc : numpydoc.validate.Validator
        Validator handler.
    err_code : str
        Error code found by numpydoc.
    noqa_checks : list
        Found noqa checks.

    Returns
    -------
    bool
        Return True if 'noqa' found.
    """
    if noqa_checks == ["all"]:
        return True

    # GL08 - missing docstring in an arbitary object; numpydoc code
    if err_code == "GL08":
        name = doc.name.split(".")[-1]
        # Numpydoc recommends to add docstrings of __init__ method in class docstring.
        # So there is no error if docstring is missing in __init__
        if name == "__init__":
            return True
    return err_code in noqa_checks


def get_noqa_checks(doc: Validator) -> list:
    """
    Get codes after `# noqa`.

    Parameters
    ----------
    doc : numpydoc.validate.Validator
        Validator handler.

    Returns
    -------
    list
        List with codes.

    Notes
    -----
    If noqa doesn't have any codes - returns ["all"].
    """
    source = doc.method_source
    if not source:
        return []

    noqa_str = ""
    if not inspect.ismodule(doc.obj):
        # find last line of obj definition
        for line in source.split("\n"):
            if ")" in line and ":" in line.split(")", 1)[1]:
                noqa_str = line
                break
    else:
        # noqa string is defined as the first line before the docstring
        if not doc.raw_doc:
            # noqa string is meaningless if there is no docstring in module
            return []
        lines = source.split("\n")
        for idx, line in enumerate(lines):
            if '"""' in line or "'''" in line:
                noqa_str = lines[idx - 1]
                break

    if "# noqa:" in noqa_str:
        noqa_checks = noqa_str.split("# noqa:", 1)[1].split(",")
    elif "# noqa" in noqa_str:
        noqa_checks = ["all"]
    else:
        noqa_checks = []
    return [check.strip() for check in noqa_checks]


def construct_validator(import_path: str) -> Validator:  # noqa: GL08
    # helper function
    return Validator(get_doc_object(Validator._load_obj(import_path)))


# code snippet from numpydoc
def validate_object(import_path: str) -> list:
    """
    Check docstrings of an entity that can be imported.

    Parameters
    ----------
    import_path : str
        Python-like import path.

    Returns
    -------
    errors : list
        List with string representations of errors.
    """
    from numpydoc.validate import validate

    errors = []
    doc = construct_validator(import_path)
    if (
        getattr(doc.obj, "__doc_inherited__", False)
        or (
            isinstance(doc.obj, property)
            and getattr(doc.obj.fget, "__doc_inherited__", False)
        )
        or (
            isinstance(doc.obj, functools.cached_property)
            and getattr(doc.obj.func, "__doc_inherited__", False)
        )
    ):
        # do not check inherited docstrings
        return errors
    results = validate(import_path)
    results = validate_modin_error(doc, results)
    noqa_checks = get_noqa_checks(doc)
    for err_code, err_desc in results["errors"]:
        if (
            err_code not in NUMPYDOC_BASE_ERROR_CODES
            and err_code not in MODIN_ERROR_CODES
        ) or skip_check_if_noqa(doc, err_code, noqa_checks):
            continue
        errors.append(
            ":".join([import_path, str(results["file_line"]), err_code, err_desc])
        )
    return errors


def numpydoc_validate(path: pathlib.Path) -> bool:
    """
    Perform numpydoc checks.

    Parameters
    ----------
    path : pathlib.Path
        Filename or directory path for check.

    Returns
    -------
    is_successfull : bool
        Return True if all checks are successful.
    """
    is_successfull = True

    if path.is_file():
        walker = ((str(path.parent), [], [path.name]),)
    else:
        walker = os.walk(path)

    for root, _, files in walker:
        if "__pycache__" in root:
            continue
        for _file in files:
            if not _file.endswith(".py"):
                continue

            current_path = os.path.join(root, _file)
            # get importable name
            module_name = current_path.replace("/", ".").replace("\\", ".")
            # remove ".py"
            module_name = os.path.splitext(module_name)[0]

            with open(current_path) as fd:
                file_contents = fd.read()

            # using static parsing for collecting module, functions, classes and their methods
            module = ast.parse(file_contents)

            def is_public_func(node):
                return isinstance(node, ast.FunctionDef) and (
                    not node.name.startswith("__") or node.name.endswith("__")
                )

            functions = [node for node in module.body if is_public_func(node)]
            classes = [node for node in module.body if isinstance(node, ast.ClassDef)]
            methods = [
                f"{module_name}.{_class.name}.{node.name}"
                for _class in classes
                for node in _class.body
                if is_public_func(node)
            ]

            # numpydoc docstrings validation
            # docstrings are taken dynamically
            to_validate = (
                [module_name]
                + [f"{module_name}.{x.name}" for x in (functions + classes)]
                + methods
            )
            results = list(map(validate_object, to_validate))
            is_successfull_file = not any(results)
            if not is_successfull_file:
                logging.info(f"NUMPYDOC OUTPUT FOR {current_path}")
            [logging.error(error) for errors in results for error in errors]
            is_successfull &= is_successfull_file
    return is_successfull


def pydocstyle_validate(
    path: pathlib.Path, add_ignore: List[str], use_numpydoc: bool
) -> int:
    """
    Perform pydocstyle checks.

    Parameters
    ----------
    path : pathlib.Path
        Filename or directory path for check.
    add_ignore : List[int]
        `pydocstyle` error codes which are not verified.
    use_numpydoc : bool
        Disable duplicate `pydocstyle` checks if `numpydoc` is in use.

    Returns
    -------
    bool
        Return True if all pydocstyle checks are successful.
    """
    pydocstyle = "pydocstyle"
    if not shutil.which(pydocstyle):
        raise ValueError(f"{pydocstyle} not found in PATH")
    # These check can be done with numpydoc tool, so disable them for pydocstyle.
    if use_numpydoc:
        add_ignore.extend(["D100", "D101", "D102", "D103", "D104", "D105"])
    result = subprocess.run(
        [
            pydocstyle,
            "--convention",
            "numpy",
            "--add-ignore",
            ",".join(add_ignore),
            str(path),
        ],
        text=True,
        capture_output=True,
    )
    if result.returncode:
        logging.info(f"PYDOCSTYLE OUTPUT FOR {path}")
        logging.error(result.stdout)
        logging.error(result.stderr)
    return True if result.returncode == 0 else False


def monkeypatching():
    """Monkeypatch not installed modules and decorators which change __doc__ attribute."""
    from unittest.mock import Mock

    import ray

    import modin.utils

    def monkeypatch(*args, **kwargs):
        if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
            # This is the case where the decorator is just @ray.remote without parameters.
            return args[0]
        return lambda cls_or_func: cls_or_func

    ray.remote = monkeypatch

    # We are mocking packages we don't need for docs checking in order to avoid import errors
    sys.modules["sqlalchemy"] = Mock()

    modin.utils.instancer = functools.wraps(modin.utils.instancer)(lambda cls: cls)

    # monkey-patch numpydoc for working correctly with properties
    # until https://github.com/numpy/numpydoc/issues/551 is fixed
    def load_obj(name, old_load_obj=Validator._load_obj):
        obj = old_load_obj(name)
        if isinstance(obj, property):
            obj = obj.fget
        elif isinstance(obj, functools.cached_property):
            obj = obj.func
        return obj

    Validator._load_obj = staticmethod(load_obj)

    # enable docs testing on windows
    sys.getdlopenflags = Mock()
    sys.setdlopenflags = Mock()
    xgboost_mock = Mock()

    class Booster:
        pass

    xgboost_mock.Booster = Booster
    sys.modules["xgboost"] = xgboost_mock


def validate(
    paths: List[pathlib.Path], add_ignore: List[str], use_numpydoc: bool
) -> bool:
    """
    Perform pydocstyle and numpydoc checks.

    Parameters
    ----------
    paths : List[pathlib.Path]
        Filenames of directories for check.
    add_ignore : List[str]
        `pydocstyle` error codes which are not verified.
    use_numpydoc : bool
        Determine if numpydoc checks are needed.

    Returns
    -------
    is_successfull : bool
        Return True if all checks are successful.
    """
    is_successfull = True
    for path in paths:
        if not pydocstyle_validate(path, add_ignore, use_numpydoc):
            is_successfull = False
        if use_numpydoc:
            if not numpydoc_validate(path):
                is_successfull = False
    return is_successfull


def check_args(args: argparse.Namespace):
    """
    Check the obtained values for correctness.

    Parameters
    ----------
    args : argparse.Namespace
        Parser arguments.

    Raises
    ------
    ValueError
        Occurs in case of non-existent files or directories.
    """
    for path in args.paths:
        if not path.exists():
            raise ValueError(f"{path} does not exist")
        abs_path = os.path.abspath(path)
        if not abs_path.startswith(MODIN_PATH):
            raise ValueError(
                "it is unsupported to use this script on files from another "
                + f"repository; script' repo '{MODIN_PATH}', "
                + f"input path '{abs_path}'"
            )


def get_args() -> argparse.Namespace:
    """
    Get args from cli with validation.

    Returns
    -------
    argparse.Namespace
    """
    parser = argparse.ArgumentParser(
        description="Check docstrings by using pydocstyle and numpydoc"
    )
    parser.add_argument(
        "paths",
        nargs="+",
        type=pathlib.Path,
        help="Filenames or directories; in case of direstories perform recursive check",
    )
    parser.add_argument(
        "--add-ignore",
        nargs="*",
        default=[],
        help="Pydocstyle error codes; for example: D100,D100,D102",
    )
    parser.add_argument(
        "--disable-numpydoc",
        default=False,
        action="store_true",
        help="Determine if numpydoc checks are not needed",
    )
    args = parser.parse_args()
    check_args(args)
    return args


if __name__ == "__main__":
    args = get_args()
    monkeypatching()
    if not validate(args.paths, args.add_ignore, not args.disable_numpydoc):
        logging.error("INVALID DOCUMENTATION FOUND")
        exit(1)
    logging.info("SUCCESSFUL CHECK")


================================================
FILE: scripts/release.py
================================================
import argparse
import atexit
import collections
import json
import re
import sys
from pathlib import Path

import github
import pygit2
from packaging import version


class GithubUserResolver:
    def __init__(self, email2commit, token):
        self.__cache_file = Path(__file__).parent / "gh-users-cache.json"
        self.__cache = (
            json.loads(self.__cache_file.read_text())
            if self.__cache_file.exists()
            else {}
        )
        # filter unknown users hoping we'd be able to find them this time
        self.__cache = {key: value for key, value in self.__cache.items() if value}
        # using anonymous access if token not specified
        self.__github = github.Github(token or None)
        self.__modin_repo = self.__github.get_repo("modin-project/modin")
        self.__email2commit = email2commit
        atexit.register(self.__save)

    def __search_commits(self, term):
        if commit := self.__email2commit.get(term):
            gh_commit = self.__modin_repo.get_commit(str(commit))
            return gh_commit.author.login
        return None

    @staticmethod
    def __is_email(term):
        return re.match(r".*@.*\..*", term)

    def __search_github(self, term):
        search = f"in:email {term}" if self.__is_email(term) else f"fullname:{term}"
        match = [user.login for user in self.__github.search_users(search)]
        return match[0] if len(match) == 1 else None

    def __try_user(self, term):
        if self.__is_email(term):
            return None
        try:
            return self.__github.get_user(term).login
        except github.GithubException as ex:
            if ex.status != 404:
                raise
            return None

    def __resolve_single(self, term):
        return (
            self.__search_commits(term)
            or self.__search_github(term)
            or self.__try_user(term)
        )

    def __resolve_cache(self, name, email):
        return self.__cache.get(f"{name} <{email}>", None)

    def __register(self, name, email, match):
        self.__cache[f"{name} <{email}>"] = match

    def resolve(self, people):
        logins, unknowns = set(), set()

        for name, email in people:
            if match := self.__resolve_cache(name, email):
                logins.add(match)
            elif match := self.__resolve_single(email):
                self.__register(name, email, match)
                logins.add(match)
            else:
                if match := self.__resolve_single(name):
                    logins.add(match)
                else:
                    unknowns.add((name, email))
                self.__register(name, email, match)

        return logins, unknowns

    def resolve_by_reviews(self, unknowns, email2pr):
        logins, new_unknowns = set(), set()
        for name, email in unknowns:
            commit = self.__modin_repo.get_commit(str(email2pr[email]))
            found = set()
            for pull in commit.get_pulls():
                for review in pull.get_reviews():
                    user = review.user
                    if user.name == name and (not user.email or user.email == email):
                        found.add(user.login)

            if len(found) == 1:
                self.__register(name, email, list(found)[0])
                logins |= found
            else:
                new_unknowns.add((name, email))

        return logins, new_unknowns

    def __save(self):
        self.__cache_file.write_text(json.dumps(self.__cache, indent=4, sort_keys=True))


class GitWrapper:
    def __init__(self):
        self.repo = pygit2.Repository(Path(__file__).parent)

    def is_on_main(self):
        return self.repo.references["refs/heads/main"] == self.repo.head

    @staticmethod
    def __get_tag_version(entry):
        try:
            return version.parse(entry.lstrip("refs/tags/"))
        except version.InvalidVersion as ex:
            return f'<bad version "{entry}": {ex}>'

    def get_previous_release(self, rel_type):
        tags = [
            (entry, self.__get_tag_version(entry))
            for entry in self.repo.references
            if entry.startswith("refs/tags/")
        ]
        # filter away legacy versions (which aren't following the proper naming schema);
        # also skip pre-releases
        tags = [
            (entry, ver)
            for entry, ver in tags
            if isinstance(ver, version.Version) and not ver.pre
        ]
        if rel_type == "minor":
            # leave only minor releases
            tags = [(entry, ver) for entry, ver in tags if ver.micro == 0]
        else:
            assert rel_type == "patch"
        prev_ref, prev_ver = max(tags, key=lambda pair: pair[1])
        return prev_ref, self.repo.references[prev_ref].peel(), prev_ver

    def get_commits_upto(self, stop_commit):
        history = []
        for obj in self.repo.walk(self.repo.head.target):
            if obj.id == stop_commit.id:
                break
            history.append(obj)
        else:
            raise ValueError("Current HEAD is not derived from previous release")
        return history

    def ensure_title_link(self, obj: pygit2.Commit):
        title = obj.message.splitlines()[0]
        if not re.match(r".*\(#(\d+)\)$", title):
            title += f" ({obj.short_id})"
        return title


def make_notes(args):
    wrapper = GitWrapper()
    release_type = "minor" if wrapper.is_on_main() else "patch"
    sys.stderr.write(f"Detected release type: {release_type}\n")

    prev_ref, prev_commit, prev_ver = wrapper.get_previous_release(release_type)
    sys.stderr.write(f"Previous {release_type} release: {prev_ref}\n")

    next_major, next_minor, next_patch = prev_ver.release
    if release_type == "minor":
        next_minor += 1
    elif release_type == "patch":
        next_patch += 1
    else:
        raise ValueError(f"Unexpected release type: {release_type}")
    next_ver = version.Version(f"{next_major}.{next_minor}.{next_patch}")

    sys.stderr.write(f"Computing release notes for {prev_ver} -> {next_ver}...\n")
    try:
        history = wrapper.get_commits_upto(prev_commit)
    except ValueError as ex:
        sys.stderr.write(
            f"{ex}: did you forget to checkout correct branch or pull tags?"
        )
        return 1
    if not history:
        sys.stderr.write(f"No commits since {prev_ver} found, nothing to generate!\n")
        return 1

    titles = collections.defaultdict(list)
    people = set()
    email2commit, email2pr = {}, {}
    for obj in history:
        title = obj.message.splitlines()[0]
        titles[title.split("-")[0]].append(obj)
        new_people = set(
            re.findall(
                r"(?:(?:Signed-off-by|Co-authored-by):\s*)([\w\s,]+?)\s*<([^>]+)>",
                obj.message,
            )
        )
        for _, email in new_people:
            email2pr[email] = obj.id
        people |= new_people
        email2commit[obj.author.email] = obj.id
    sys.stderr.write(f"Found {len(history)} commit(s) since {prev_ref}\n")

    sys.stderr.write("Resolving contributors...\n")
    user_resolver = GithubUserResolver(email2commit, args.token)
    logins, unknowns = user_resolver.resolve(people)
    new_logins, unknowns = user_resolver.resolve_by_reviews(unknowns, email2pr)
    logins |= new_logins
    sys.stderr.write(f"Found {len(logins)} GitHub usernames.\n")
    if unknowns:
        sys.stderr.write(
            f"Warning! Failed to resolve {len(unknowns)} usernames, please resolve them manually!\n"
        )

    sections = [
        ("Stability and Bugfixes", "FIX"),
        ("Performance enhancements", "PERF"),
        ("Refactor Codebase", "REFACTOR"),
        ("Update testing suite", "TEST"),
        ("Documentation improvements", "DOCS"),
        ("New Features", "FEAT"),
    ]

    notes = rf"""Modin {next_ver}

<Please fill in short release summary>

Key Features and Updates Since {prev_ver}
-------------------------------{'-' * len(str(prev_ver))}
"""

    def _add_section(section, prs):
        nonlocal notes
        if prs:
            notes += f"* {section}\n"
            notes += "\n".join(
                [
                    f"  * {wrapper.ensure_title_link(obj)}"
                    for obj in sorted(prs, key=lambda obj: obj.message)
                ]
            )
            notes += "\n"

    for section, key in sections:
        _add_section(section, titles.pop(key, None))

    uncategorized = sum(titles.values(), [])
    _add_section("Uncategorized improvements", uncategorized)

    notes += r"""
Contributors
------------
"""
    notes += "\n".join(f"@{login}" for login in sorted(logins)) + "\n"
    notes += (
        "\n".join(
            f"<unknown-login> {name} <{email}>" for name, email in sorted(unknowns)
        )
        + "\n"
    )

    sys.stdout.write(notes)


def main():
    parse = argparse.ArgumentParser()
    parse.add_argument(
        "--token",
        type=str,
        default="",
        help="GitHub token for queries (optional, bumps up rate limit)",
    )
    parse.set_defaults(func=lambda _: parse.print_usage())
    subparsers = parse.add_subparsers()

    notes = subparsers.add_parser("notes", help="Generate release notes")
    notes.set_defaults(func=make_notes)

    args = parse.parse_args()
    sys.exit(args.func(args))


if __name__ == "__main__":
    main()


================================================
FILE: scripts/test/__init__.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


================================================
FILE: scripts/test/examples.py
================================================
# noqa: MD01
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# noqa: MD02
"""Function examples for docstring testing."""


class weakdict(dict):  # noqa: GL08
    __slots__ = ("__weakref__",)


def optional_square(number: int = 5) -> int:  # noqa
    """
    Square `number`.

    The function from Modin.

    Parameters
    ----------
    number : int
        Some number.

    Notes
    -----
    The `optional_square` Modin function from modin/scripts/examples.py.
    """
    return number**2


def optional_square_empty_parameters(number: int = 5) -> int:
    """
    Parameters
    ----------
    """
    return number**2


def square_summary(number: int) -> int:  # noqa: PR01, GL08
    """
    Square `number`.

    See https://github.com/ray-project/ray.

    Examples
    --------
    The function that will never be used in modin.pandas.DataFrame same as in
    pandas or NumPy.
    """
    return number**2


================================================
FILE: scripts/test/test_doc_checker.py
================================================
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pytest

from scripts.doc_checker import (
    MODIN_ERROR_CODES,
    check_optional_args,
    check_spelling_words,
    construct_validator,
    get_noqa_checks,
    get_optional_args,
)


@pytest.mark.parametrize(
    "import_path, result",
    [
        ("scripts.test.examples.optional_square", {"number": 5}),
        ("scripts.test.examples.optional_square_empty_parameters", {"number": 5}),
        ("scripts.test.examples.square_summary", {}),
        ("scripts.test.examples.weakdict", {}),
        ("scripts.test.examples", {}),
    ],
)
def test_get_optional_args(import_path, result):
    optional_args = get_optional_args(construct_validator(import_path))
    assert optional_args == result


@pytest.mark.parametrize(
    "import_path, result",
    [
        (
            "scripts.test.examples.optional_square",
            [
                (
                    "MD01",
                    MODIN_ERROR_CODES["MD01"].format(parameter="number", found="int"),
                )
            ],
        ),
        ("scripts.test.examples.optional_square_empty_parameters", []),
        ("scripts.test.examples.square_summary", []),
        ("scripts.test.examples.weakdict", []),
        ("scripts.test.examples", []),
    ],
)
def test_check_optional_args(import_path, result):
    errors = check_optional_args(construct_validator(import_path))
    assert errors == result


@pytest.mark.parametrize(
    "import_path, result",
    [
        ("scripts.test.examples.optional_square", []),
        (
            "scripts.test.examples.square_summary",
            [
                ("MD02", 57, "Pandas", "pandas"),
                ("MD02", 57, "Numpy", "NumPy"),
            ],
        ),
        ("scripts.test.examples.optional_square_empty_parameters", []),
        ("scripts.test.examples.weakdict", []),
        ("scripts.test.examples", []),
    ],
)
def test_check_spelling_words(import_path, result):
    result_errors = []
    for code, line, word, reference in result:
        result_errors.append(
            (
                code,
                MODIN_ERROR_CODES[code].format(
                    line=line, word=word, reference=reference
                ),
            )
        )
    errors = check_spelling_words(construct_validator(import_path))
    # the order of incorrect words found on the same line is not guaranteed
    for error in errors:
        assert error in result_errors


@pytest.mark.parametrize(
    "import_path, result",
    [
        ("scripts.test.examples.optional_square", ["all"]),
        ("scripts.test.examples.optional_square_empty_parameters", []),
        ("scripts.test.examples.square_summary", ["PR01", "GL08"]),
        ("scripts.test.examples.weakdict", ["GL08"]),
        ("scripts.test.examples", ["MD02"]),
    ],
)
def test_get_noqa_checks(import_path, result):
    noqa_checks = get_noqa_checks(construct_validator(import_path))
    assert noqa_checks == result


================================================
FILE: setup.cfg
================================================

# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.

[versioneer]
VCS = git
style = pep440
versionfile_source = modin/_version.py
versionfile_build = modin/_version.py
tag_prefix =
parentdir_prefix = modin-

[tool:pytest]
addopts = --cov-config=setup.cfg --cov=modin --cov-append --cov-report= -m "not exclude_by_default"
xfail_strict=true
markers =
    exclude_in_sanity
    exclude_by_default
filterwarnings =
    error:.*defaulting to pandas.*:UserWarning

[isort]
profile = black

[flake8]
max-line-length = 88
ignore = E203, E266, E501, W503
select = B,C,E,F,W,T,B9,NIC
per-file-ignores =
    modin/pandas/__init__.py:E402,F401
    stress_tests/kaggle/*:E402
    modin/experimental/pandas/__init__.py:E402
    modin/_version.py:T201
    modin/tests/*:E402

[coverage:run]
source =
    # modin sources
    modin/*
omit =
    # These are not covered by any test because it is an experimental API
    modin/sql/*
    modin/experimental/sql*
    # This is not used yet
    modin/pandas/index/*
    # Skip tests
    modin/tests/*
    # Plotting is not tested
    modin/pandas/plotting.py
    # Skip CLI part
    modin/__main__.py
    # Skip third-party stuff
    modin/_version.py
parallel = True
# The use of this feature is one of the recommendations of codecov if the
# tests are run in different environments (for example, on different operating
# systems): https://coverage.readthedocs.io/en/stable/config.html#run-relative-files
relative_files = true

[coverage:report]
exclude_lines =
    # Have to re-enable the standard pragma
    pragma: no cover
    # Don't complain if tests don't hit defensive assertion code:
    raise AssertionError
    raise NotImplementedError
    raise ImportError
    assert
    pass


================================================
FILE: setup.py
================================================
from setuptools import find_packages, setup

import versioneer

with open("README.md", "r", encoding="utf-8") as fh:
    long_description = fh.read()

dask_deps = ["dask>=2.22.0", "distributed>=2.22.0"]
ray_deps = ["ray>=2.10.0,<3", "pyarrow>=10.0.1"]
mpi_deps = ["unidist[mpi]>=0.2.1"]
consortium_standard_deps = ["dataframe-api-compat>=0.2.7"]
spreadsheet_deps = ["modin-spreadsheet>=0.1.0"]
# Currently, Modin does not include `mpi` option in `all`.
# Otherwise, installation of modin[all] would fail because
# users need to have a working MPI implementation and
# certain software installed beforehand.
all_deps = dask_deps + ray_deps + spreadsheet_deps + consortium_standard_deps

# Distribute 'modin-autoimport-pandas.pth' along with binary and source distributions.
# This file provides the "import pandas before Ray init" feature if specific
# environment variable is set (see https://github.com/modin-project/modin/issues/4564).
cmdclass = versioneer.get_cmdclass()
extra_files = ["modin-autoimport-pandas.pth"]


class AddPthFileBuild(cmdclass["build_py"]):
    def _get_data_files(self):
        return (super()._get_data_files() or []) + [
            (".", ".", self.build_lib, extra_files)
        ]


class AddPthFileSDist(cmdclass["sdist"]):
    def make_distribution(self):
        self.filelist.extend(extra_files)
        return super().make_distribution()


cmdclass["build_py"] = AddPthFileBuild
cmdclass["sdist"] = AddPthFileSDist

setup(
    name="modin",
    version=versioneer.get_version(),
    cmdclass=cmdclass,
    description="Modin: Make your pandas code run faster by changing one line of code.",
    packages=find_packages(exclude=["scripts", "scripts.*"]),
    include_package_data=True,
    license="Apache 2",
    url="https://github.com/modin-project/modin",
    long_description=long_description,
    long_description_content_type="text/markdown",
    install_requires=[
        "pandas>=2.2,<2.4",
        "packaging>=21.0",
        "numpy>=1.22.4",
        "fsspec>=2022.11.0",
        "psutil>=5.8.0",
        "typing-extensions",
    ],
    extras_require={
        # can be installed by pip install modin[dask]
        "dask": dask_deps,
        "ray": ray_deps,
        "mpi": mpi_deps,
        "consortium-standard": consortium_standard_deps,
        "spreadsheet": spreadsheet_deps,
        "all": all_deps,
    },
    python_requires=">=3.9",
)


================================================
FILE: stress_tests/kaggle/kaggle10.py
================================================
import matplotlib

matplotlib.use("PS")
import warnings

import matplotlib.pyplot as plt
import numpy as np  # linear algebra
import seaborn as sns

import modin.pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

warnings.filterwarnings("ignore")
data = pd.read_csv("column_2C_weka.csv")
print(plt.style.available)  # look at available plot styles
plt.style.use("ggplot")
data.head()
data.info()
data.describe()
color_list = ["red" if i == "Abnormal" else "green" for i in data.loc[:, "class"]]
pd.plotting.scatter_matrix(
    data.loc[:, data.columns != "class"],
    c=color_list,
    figsize=[15, 15],
    diagonal="hist",
    alpha=0.5,
    s=200,
    marker="*",
    edgecolor="black",
)
plt.show()
sns.countplot(x="class", data=data)
data.loc[:, "class"].value_counts()
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)
x, y = data.loc[:, data.columns != "class"], data.loc[:, "class"]
knn.fit(x, y)
prediction = knn.predict(x)
print("Prediction: {}".format(prediction))
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)
knn = KNeighborsClassifier(n_neighbors=3)
x, y = data.loc[:, data.columns != "class"], data.loc[:, "class"]
knn.fit(x_train, y_train)
prediction = knn.predict(x_test)
print("With KNN (K=3) accuracy is: ", knn.score(x_test, y_test))  # accuracy
neig = np.arange(1, 25)
train_accuracy = []
test_accuracy = []
for i, k in enumerate(neig):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(x_train, y_train)
    train_accuracy.append(knn.score(x_train, y_train))
    test_accuracy.append(knn.score(x_test, y_test))
plt.figure(figsize=[13, 8])
plt.plot(neig, test_accuracy, label="Testing Accuracy")
plt.plot(neig, train_accuracy, label="Training Accuracy")
plt.legend()
plt.title("-value VS Accuracy")
plt.xlabel("Number of Neighbors")
plt.ylabel("Accuracy")
plt.xticks(neig)
plt.savefig("graph.png")
plt.show()
print(
    "Best accuracy is {} with K = {}".format(
        np.max(test_accuracy), 1 + test_accuracy.index(np.max(test_accuracy))
    )
)
data1 = data[data["class"] == "A"]
x = np.array(data1.loc[:, "pelvic_incidence"]).reshape(-1, 1)
y = np.array(data1.loc[:, "sacral_slope"]).reshape(-1, 1)
plt.figure(figsize=[10, 10])
plt.scatter(x=x, y=y)
plt.xlabel("pelvic_incidence")
plt.ylabel("sacral_slope")
plt.show()
from sklearn.linear_model import LinearRegression

reg = LinearRegression()
predict_space = np.linspace(min(x), max(x)).reshape(-1, 1)
reg.fit(x, y)
predicted = reg.predict(predict_space)
print("R^2 score: ", reg.score(x, y))
plt.plot(predict_space, predicted, color="black", linewidth=3)
plt.scatter(x=x, y=y)
plt.xlabel("pelvic_incidence")
plt.ylabel("sacral_slope")
plt.show()
from sklearn.model_selection import cross_val_score

reg = LinearRegression()
k = 5
cv_result = cross_val_score(reg, x, y, cv=k)  # uses R^2 as score
print("CV Scores: ", cv_result)
print("CV scores average: ", np.sum(cv_result) / k)
from sklearn.linear_model import Ridge

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=2, test_size=0.3)
ridge = Ridge(alpha=0.1, normalize=True)
ridge.fit(x_train, y_train)
ridge_predict = ridge.predict(x_test)
print("Ridge score: ", ridge.score(x_test, y_test))
from sklearn.linear_model import Lasso

x = np.array(
    data1.loc[
        :,
        [
            "pelvic_incidence",
            "pelvic_tilt numeric",
            "lumbar_lordosis_angle",
            "pelvic_radius",
        ],
    ]
)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=3, test_size=0.3)
lasso = Lasso(alpha=0.1, normalize=True)
lasso.fit(x_train, y_train)
ridge_predict = lasso.predict(x_test)
print("Lasso score: ", lasso.score(x_test, y_test))
print("Lasso coefficients: ", lasso.coef_)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

x, y = data.loc[:, data.columns != "class"], data.loc[:, "class"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)
rf = RandomForestClassifier(random_state=4)
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n", cm)
print("Classification report: \n", classification_report(y_test, y_pred))
sns.heatmap(cm, annot=True, fmt="d")
plt.show()
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_curve

data["class_binary"] = [1 if i == "Abnormal" else 0 for i in data.loc[:, "class"]]
x, y = (
    data.loc[:, (data.columns != "class") & (data.columns != "class_binary")],
    data.loc[:, "class_binary"],
)
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=42
)
logreg = LogisticRegression()
logreg.fit(x_train, y_train)
y_pred_prob = logreg.predict_proba(x_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
plt.plot([0, 1], [0, 1], "k--")
plt.plot(fpr, tpr)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC")
plt.show()
from sklearn.model_selection import GridSearchCV

grid = {"n_neighbors": np.arange(1, 50)}
knn = KNeighborsClassifier()
knn_cv = GridSearchCV(knn, grid, cv=3)  # GridSearchCV
knn_cv.fit(x, y)  # Fit
print("Tuned hyperparameter k: {}".format(knn_cv.best_params_))
print("Best score: {}".format(knn_cv.best_score_))
param_grid = {"C": np.logspace(-3, 3, 7), "penalty": ["l1", "l2"]}
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=12
)
logreg = LogisticRegression()
logreg_cv = GridSearchCV(logreg, param_grid, cv=3)
logreg_cv.fit(x_train, y_train)
print("Tuned hyperparameters : {}".format(logreg_cv.best_params_))
print("Best Accuracy: {}".format(logreg_cv.best_score_))
data = pd.read_csv("column_2C_weka.csv")
df = pd.get_dummies(data)
df.head(10)
df.drop("class_Normal", axis=1, inplace=True)
df.head(10)
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

steps = [("scalar", StandardScaler()), ("SVM", SVC())]
pipeline = Pipeline(steps)
parameters = {"SVM__C": [1, 10, 100], "SVM__gamma": [0.1, 0.01]}
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
cv = GridSearchCV(pipeline, param_grid=parameters, cv=3)
cv.fit(x_train, y_train)
y_pred = cv.predict(x_test)
print("Accuracy: {}".format(cv.score(x_test, y_test)))
print("Tuned Model Parameters: {}".format(cv.best_params_))
data = pd.read_csv("column_2C_weka.csv")
plt.scatter(data["pelvic_radius"], data["degree_spondylolisthesis"])
plt.xlabel("pelvic_radius")
plt.ylabel("degree_spondylolisthesis")
plt.show()
data2 = data.loc[:, ["degree_spondylolisthesis", "pelvic_radius"]]
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=2)
kmeans.fit(data2)
labels = kmeans.predict(data2)
plt.scatter(data["pelvic_radius"], data["degree_spondylolisthesis"], c=labels)
plt.xlabel("pelvic_radius")
plt.xlabel("degree_spondylolisthesis")
plt.show()
df = pd.DataFrame({"labels": labels, "class": data["class"]})
ct = pd.crosstab(df["labels"], df["class"])
print(ct)
inertia_list = np.empty(8)
for i in range(1, 8):
    kmeans = KMeans(n_clusters=i)
    kmeans.fit(data2)
    inertia_list[i] = kmeans.inertia_
plt.plot(range(0, 8), inertia_list, "-o")
plt.xlabel("Number of cluster")
plt.ylabel("Inertia")
plt.show()
data = pd.read_csv("column_2C_weka.csv")
data3 = data.drop("class", axis=1)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

scalar = StandardScaler()
kmeans = KMeans(n_clusters=2)
pipe = make_pipeline(scalar, kmeans)
pipe.fit(data3)
labels = pipe.predict(data3)
df = pd.DataFrame({"labels": labels, "class": data["class"]})
ct = pd.crosstab(df["labels"], df["class"])
print(ct)
from scipy.cluster.hierarchy import dendrogram, linkage

merg = linkage(data3.iloc[200:220, :], method="single")
dendrogram(merg, leaf_rotation=90, leaf_font_size=6)
plt.show()
from sklearn.manifold import TSNE

model = TSNE(learning_rate=100)
transformed = model.fit_transform(data2)
x = transformed[:, 0]
y = transformed[:, 1]
plt.scatter(x, y, c=color_list)
plt.xlabel("pelvic_radius")
plt.xlabel("degree_spondylolisthesis")
plt.show()
from sklearn.decomposition import PCA

model = PCA()
model.fit(data3)
transformed = model.transform(data3)
print("Principle components: ", model.components_)
scaler = StandardScaler()
pca = PCA()
pipeline = make_pipeline(scaler, pca)
pipeline.fit(data3)
plt.bar(range(pca.n_components_), pca.explained_variance_)
plt.xlabel("PCA feature")
plt.ylabel("variance")
plt.show()
pca = PCA(n_components=2)
pca.fit(data3)
transformed = pca.transform(data3)
x = transformed[:, 0]
y = transformed[:, 1]
plt.scatter(x, y, c=color_list)
plt.show()


================================================
FILE: stress_tests/kaggle/kaggle12.py
================================================
import matplotlib

matplotlib.use("PS")
from collections import Counter

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import (
    AdaBoostClassifier,
    ExtraTreesClassifier,
    GradientBoostingClassifier,
    RandomForestClassifier,
    VotingClassifier,
)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (
    GridSearchCV,
    StratifiedKFold,
    cross_val_score,
    learning_curve,
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

import modin.pandas as pd

sns.set(style="white", context="notebook", palette="deep")
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
IDtest = test["PassengerId"]


def detect_outliers(df, n, features):
    outlier_indices = []
    for col in features:
        Q1 = np.percentile(df[col], 25)
        Q3 = np.percentile(df[col], 75)
        IQR = Q3 - Q1
        outlier_step = 1.5 * IQR
        outlier_list_col = df[
            (df[col] < Q1 - outlier_step) | (df[col] > Q3 + outlier_step)
        ].index
        outlier_indices.extend(outlier_list_col)
    outlier_indices = Counter(outlier_indices)
    multiple_outliers = [k for k, v in outlier_indices.items() if v > n]
    return multiple_outliers


Outliers_to_drop = detect_outliers(train, 2, ["Age", "SibSp", "Parch", "Fare"])
train.loc[Outliers_to_drop]  # Show the outliers rows
train = train.drop(Outliers_to_drop, axis=0).reset_index(drop=True)
train_len = len(train)
dataset = pd.concat(list_of_objs=[train, test], axis=0).reset_index(drop=True)
dataset = dataset.fillna(np.nan)
dataset.isnull().sum()
train.info()
train.isnull().sum()
train.head()
train.dtypes
train.describe()
g = sns.heatmap(
    train[["Survived", "SibSp", "Parch", "Age", "Fare"]].corr(),
    annot=True,
    fmt=".2f",
    cmap="coolwarm",
)
g = sns.factorplot(
    x="SibSp", y="Survived", data=train, kind="bar", size=6, palette="muted"
)
g.despine(left=True)
g = g.set_ylabels("survival probability")
g = sns.factorplot(
    x="Parch", y="Survived", data=train, kind="bar", size=6, palette="muted"
)
g.despine(left=True)
g = g.set_ylabels("survival probability")
dataset["Fare"].isnull().sum()
dataset["Fare"] = dataset["Fare"].fillna(dataset["Fare"].median())
g = sns.distplot(
    dataset["Fare"], color="m", label="Skewness : %.2f" % (dataset["Fare"].skew())
)
g = g.legend(loc="best")
dataset["Fare"] = dataset["Fare"].map(lambda i: np.log(i) if i > 0 else 0)
g = sns.distplot(
    dataset["Fare"], color="b", label="Skewness : %.2f" % (dataset["Fare"].skew())
)
g = g.legend(loc="best")
g = sns.barplot(x="Sex", y="Survived", data=train)
g = g.set_ylabel("Survival Probability")
train[["Sex", "Survived"]].groupby("Sex").mean()
g = sns.factorplot(
    x="Pclass", y="Survived", data=train, kind="bar", size=6, palette="muted"
)
g.despine(left=True)
g = g.set_ylabels("survival probability")
g = sns.factorplot(
    x="Pclass", y="Survived", hue="Sex", data=train, size=6, kind="bar", palette="muted"
)
g.despine(left=True)
g = g.set_ylabels("survival probability")
dataset["Embarked"].isnull().sum()
dataset["Embarked"] = dataset["Embarked"].fillna("S")
g = sns.factorplot(
    x="Embarked", y="Survived", data=train, size=6, kind="bar", palette="muted"
)
g.despine(left=True)
g = g.set_ylabels("survival probability")
g = sns.factorplot(
    "Pclass", col="Embarked", data=train, size=6, kind="count", palette="muted"
)
g.despine(left=True)
g = g.set_ylabels("Count")
g = sns.factorplot(y="Age", x="Sex", data=dataset, kind="box")
g = sns.factorplot(y="Age", x="Sex", hue="Pclass", data=dataset, kind="box")
g = sns.factorplot(y="Age", x="Parch", data=dataset, kind="box")
g = sns.factorplot(y="Age", x="SibSp", data=dataset, kind="box")
dataset["Sex"] = dataset["Sex"].map({"male": 0, "female": 1})
g = sns.heatmap(
    dataset[["Age", "Sex", "SibSp", "Parch", "Pclass"]].corr(), cmap="BrBG", annot=True
)
index_NaN_age = list(dataset["Age"][dataset["Age"].isnull()].index)
for i in index_NaN_age:
    age_med = dataset["Age"].median()
    age_pred = dataset["Age"][
        (
            (dataset["SibSp"] == dataset.iloc[i]["SibSp"])
            & (dataset["Parch"] == dataset.iloc[i]["Parch"])
            & (dataset["Pclass"] == dataset.iloc[i]["Pclass"])
        )
    ].median()
    if not np.isnan(age_pred):
        dataset["Age"].iloc[i] = age_pred
    else:
        dataset["Age"].iloc[i] = age_med
g = sns.factorplot(x="Survived", y="Age", data=train, kind="box")
g = sns.factorplot(x="Survived", y="Age", data=train, kind="violin")
dataset["Name"].head()
dataset_title = [i.split(",")[1].split(".")[0].strip() for i in dataset["Name"]]
dataset["Title"] = pd.Series(dataset_title)
dataset["Title"].head()
g = sns.countplot(x="Title", data=dataset)
g = plt.setp(g.get_xticklabels(), rotation=45)
dataset["Title"] = dataset["Title"].replace(
    [
        "Lady",
        "the Countess",
        "Countess",
        "Capt",
        "Col",
        "Don",
        "Dr",
        "Major",
        "Rev",
        "Sir",
        "Jonkheer",
        "Dona",
    ],
    "Rare",
)
dataset["Title"] = dataset["Title"].map(
    {"Master": 0, "Miss": 1, "Ms": 1, "Mme": 1, "Mlle": 1, "Mrs": 1, "Mr": 2, "Rare": 3}
)
dataset["Title"] = dataset["Title"].astype(int)
g = sns.countplot(dataset["Title"])
g = g.set_xticklabels(["Master", "Miss/Ms/Mme/Mlle/Mrs", "Mr", "Rare"])
g = sns.factorplot(x="Title", y="Survived", data=dataset, kind="bar")
g = g.set_xticklabels(["Master", "Miss-Mrs", "Mr", "Rare"])
g = g.set_ylabels("survival probability")
dataset.drop(labels=["Name"], axis=1, inplace=True)
dataset["Fsize"] = dataset["SibSp"] + dataset["Parch"] + 1
g = sns.factorplot(x="Fsize", y="Survived", data=dataset)
g = g.set_ylabels("Survival Probability")
dataset["Single"] = dataset["Fsize"].map(lambda s: 1 if s == 1 else 0)
dataset["SmallF"] = dataset["Fsize"].map(lambda s: 1 if s == 2 else 0)
dataset["MedF"] = dataset["Fsize"].map(lambda s: 1 if 3 <= s <= 4 else 0)
dataset["LargeF"] = dataset["Fsize"].map(lambda s: 1 if s >= 5 else 0)
g = sns.factorplot(x="Single", y="Survived", data=dataset, kind="bar")
g = g.set_ylabels("Survival Probability")
g = sns.factorplot(x="SmallF", y="Survived", data=dataset, kind="bar")
g = g.set_ylabels("Survival Probability")
g = sns.factorplot(x="MedF", y="Survived", data=dataset, kind="bar")
g = g.set_ylabels("Survival Probability")
g = sns.factorplot(x="LargeF", y="Survived", data=dataset, kind="bar")
g = g.set_ylabels("Survival Probability")
dataset = pd.get_dummies(dataset, columns=["Title"])
dataset = pd.get_dummies(dataset, columns=["Embarked"], prefix="Em")
dataset.head()
dataset["Cabin"].head()
dataset["Cabin"].describe()
dataset["Cabin"].isnull().sum()
dataset["Cabin"][dataset["Cabin"].notnull()].head()
dataset["Cabin"] = pd.Series(
    [i[0] if not pd.isnull(i) else "X" for i in dataset["Cabin"]]
)
g = sns.countplot(dataset["Cabin"], order=["A", "B", "C", "D", "E", "F", "G", "T", "X"])
g = sns.factorplot(
    y="Survived",
    x="Cabin",
    data=dataset,
    kind="bar",
    order=["A", "B", "C", "D", "E", "F", "G", "T", "X"],
)
g = g.set_ylabels("Survival Probability")
dataset = pd.get_dummies(dataset, columns=["Cabin"], prefix="Cabin")
dataset["Ticket"].head()
Ticket = []
for i in list(dataset.Ticket):
    if not i.isdigit():
        Ticket.append(
            i.replace(".", "").replace("/", "").strip().split(" ")[0]
        )  # Take prefix
    else:
        Ticket.append("X")
dataset["Ticket"] = Ticket
dataset["Ticket"].head()
dataset = pd.get_dummies(dataset, columns=["Ticket"], prefix="T")
dataset["Pclass"] = dataset["Pclass"].astype("category")
dataset = pd.get_dummies(dataset, columns=["Pclass"], prefix="Pc")
dataset.drop(labels=["PassengerId"], axis=1, inplace=True)
dataset.head()
train = dataset[:train_len]
test = dataset[train_len:]
test.drop(labels=["Survived"], axis=1, inplace=True)
train["Survived"] = train["Survived"].astype(int)
Y_train = train["Survived"]
X_train = train.drop(labels=["Survived"], axis=1)
kfold = StratifiedKFold(n_splits=10)
random_state = 2
classifiers = []
classifiers.append(SVC(random_state=random_state))
classifiers.append(DecisionTreeClassifier(random_state=random_state))
classifiers.append(
    AdaBoostClassifier(
        DecisionTreeClassifier(random_state=random_state),
        random_state=random_state,
        learning_rate=0.1,
    )
)
classifiers.append(RandomForestClassifier(random_state=random_state))
classifiers.append(ExtraTreesClassifier(random_state=random_state))
classifiers.append(GradientBoostingClassifier(random_state=random_state))
classifiers.append(MLPClassifier(random_state=random_state))
classifiers.append(KNeighborsClassifier())
classifiers.append(LogisticRegression(random_state=random_state))
classifiers.append(LinearDiscriminantAnalysis())
cv_results = []
for classifier in classifiers:
    cv_results.append(
        cross_val_score(
            classifier, X_train, y=Y_train, scoring="accuracy", cv=kfold, n_jobs=4
        )
    )
cv_means = []
cv_std = []
for cv_result in cv_results:
    cv_means.append(cv_result.mean())
    cv_std.append(cv_result.std())
cv_res = pd.DataFrame(
    {
        "CrossValMeans": cv_means,
        "CrossValerrors": cv_std,
        "Algorithm": [
            "SVC",
            "DecisionTree",
            "AdaBoost",
            "RandomForest",
            "ExtraTrees",
            "GradientBoosting",
            "MultipleLayerPerceptron",
            "KNeighboors",
            "LogisticRegression",
            "LinearDiscriminantAnalysis",
        ],
    }
)
g = sns.barplot(
    "CrossValMeans",
    "Algorithm",
    data=cv_res,
    palette="Set3",
    orient="h",
    **{"xerr": cv_std}
)
g.set_xlabel("Mean Accuracy")
g = g.set_title("Cross validation scores")
DTC = DecisionTreeClassifier()
adaDTC = AdaBoostClassifier(DTC, random_state=7)
ada_param_grid = {
    "base_estimator__criterion": ["gini", "entropy"],
    "base_estimator__splitter": ["best", "random"],
    "algorithm": ["SAMME", "SAMME.R"],
    "n_estimators": [1, 2],
    "learning_rate": [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 1.5],
}
gsadaDTC = GridSearchCV(
    adaDTC, param_grid=ada_param_grid, cv=kfold, scoring="accuracy", n_jobs=4, verbose=1
)
gsadaDTC.fit(X_train, Y_train)
ada_best = gsadaDTC.best_estimator_
gsadaDTC.best_score_
ExtC = ExtraTreesClassifier()
ex_param_grid = {
    "max_depth": [None],
    "max_features": [1, 3, 10],
    "min_samples_split": [2, 3, 10],
    "min_samples_leaf": [1, 3, 10],
    "bootstrap": [False],
    "n_estimators": [100, 300],
    "criterion": ["gini"],
}
gsExtC = GridSearchCV(
    ExtC, param_grid=ex_param_grid, cv=kfold, scoring="accuracy", n_jobs=4, verbose=1
)
gsExtC.fit(X_train, Y_train)
ExtC_best = gsExtC.best_estimator_
gsExtC.best_score_
RFC = RandomForestClassifier()
rf_param_grid = {
    "max_depth": [None],
    "max_features": [1, 3, 10],
    "min_samples_split": [2, 3, 10],
    "min_samples_leaf": [1, 3, 10],
    "bootstrap": [False],
    "n_estimators": [100, 300],
    "criterion": ["gini"],
}
gsRFC = GridSearchCV(
    RFC, param_grid=rf_param_grid, cv=kfold, scoring="accuracy", n_jobs=4, verbose=1
)
gsRFC.fit(X_train, Y_train)
RFC_best = gsRFC.best_estimator_
gsRFC.best_score_
GBC = GradientBoostingClassifier()
gb_param_grid = {
    "loss": ["deviance"],
    "n_estimators": [100, 200, 300],
    "learning_rate": [0.1, 0.05, 0.01],
    "max_depth": [4, 8],
    "min_samples_leaf": [100, 150],
    "max_features": [0.3, 0.1],
}
gsGBC = GridSearchCV(
    GBC, param_grid=gb_param_grid, cv=kfold, scoring="accuracy", n_jobs=4, verbose=1
)
gsGBC.fit(X_train, Y_train)
GBC_best = gsGBC.best_estimator_
gsGBC.best_score_
SVMC = SVC(probability=True)
svc_param_grid = {
    "kernel": ["rbf"],
    "gamma": [0.001, 0.01, 0.1, 1],
    "C": [1, 10, 50, 100, 200, 300, 1000],
}
gsSVMC = GridSearchCV(
    SVMC, param_grid=svc_param_grid, cv=kfold, scoring="accuracy", n_jobs=4, verbose=1
)
gsSVMC.fit(X_train, Y_train)
SVMC_best = gsSVMC.best_estimator_
gsSVMC.best_score_


def plot_learning_curve(
    estimator,
    title,
    X,
    y,
    ylim=None,
    cv=None,
    n_jobs=-1,
    train_sizes=np.linspace(0.1, 1.0, 5),
):
    """Generate a simple plot of the test and training learning curve"""
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes
    )
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()
    plt.fill_between(
        train_sizes,
        train_scores_mean - train_scores_std,
        train_scores_mean + train_scores_std,
        alpha=0.1,
        color="r",
    )
    plt.fill_between(
        train_sizes,
        test_scores_mean - test_scores_std,
        test_scores_mean + test_scores_std,
        alpha=0.1,
        color="g",
    )
    plt.plot(train_sizes, train_scores_mean, "o-", color="r", label="Training score")
    plt.plot(
        train_sizes, test_scores_mean, "o-", color="g", label="Cross-validation score"
    )
    plt.legend(loc="best")
    return plt


g = plot_learning_curve(
    gsRFC.best_estimator_, "RF mearning curves", X_train, Y_train, cv=kfold
)
g = plot_learning_curve(
    gsExtC.best_estimator_, "ExtraTrees learning curves", X_train, Y_train, cv=kfold
)
g = plot_learning_curve(
    gsSVMC.best_estimator_, "SVC learning curves", X_train, Y_train, cv=kfold
)
g = plot_learning_curve(
    gsadaDTC.best_estimator_, "AdaBoost learning curves", X_train, Y_train, cv=kfold
)
g = plot_learning_curve(
    gsGBC.best_estimator_,
    "GradientBoosting learning curves",
    X_train,
    Y_train,
    cv=kfold,
)
nrows = ncols = 2
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, sharex="all", figsize=(15, 15))
names_classifiers = [
    ("AdaBoosting", ada_best),
    ("ExtraTrees", ExtC_best),
    ("RandomForest", RFC_best),
    ("GradientBoosting", GBC_best),
]
nclassifier = 0
for row in range(nrows):
    for col in range(ncols):
        name = names_classifiers[nclassifier][0]
        classifier = names_classifiers[nclassifier][1]
        indices = np.argsort(classifier.feature_importances_)[::-1][:40]
        g = sns.barplot(
            y=X_train.columns[indices][:40],
            x=classifier.feature_importances_[indices][:40],
            orient="h",
            ax=axes[row][col],
        )
        g.set_xlabel("Relative importance", fontsize=12)
        g.set_ylabel("Features", fontsize=12)
        g.tick_params(labelsize=9)
        g.set_title(name + " feature importance")
        nclassifier += 1
test_Survived_RFC = pd.Series(RFC_best.predict(test), name="RFC")
test_Survived_ExtC = pd.Series(ExtC_best.predict(test), name="ExtC")
test_Survived_SVMC = pd.Series(SVMC_best.predict(test), name="SVC")
test_Survived_AdaC = pd.Series(ada_best.predict(test), name="Ada")
test_Survived_GBC = pd.Series(GBC_best.predict(test), name="GBC")
ensemble_results = pd.concat(
    [
        test_Survived_RFC,
        test_Survived_ExtC,
        test_Survived_AdaC,
        test_Survived_GBC,
        test_Survived_SVMC,
    ],
    axis=1,
)
g = sns.heatmap(ensemble_results.corr(), annot=True)
votingC = VotingClassifier(
    estimators=[
        ("rfc", RFC_best),
        ("extc", ExtC_best),
        ("svc", SVMC_best),
        ("adac", ada_best),
        ("gbc", GBC_best),
    ],
    voting="soft",
    n_jobs=4,
)
votingC = votingC.fit(X_train, Y_train)
test_Survived = pd.Series(votingC.predict(test), name="Survived")
results = pd.concat([IDtest, test_Survived], axis=1)
results.to_csv("ensemble_python_voting.csv", index=False)


================================================
FILE: stress_tests/kaggle/kaggle13.py
================================================
#!/usr/bin/env python
import matplotlib

matplotlib.use("PS")
import warnings  # current version of seaborn generates a bunch of warnings that we'll ignore

import modin.pandas as pd

warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="white", color_codes=True)
iris = pd.read_csv("Iris.csv")  # the iris dataset is now a Pandas DataFrame
iris.head()
iris["Species"].value_counts()
iris.plot(kind="scatter", x="SepalLengthCm", y="SepalWidthCm")
sns.jointplot(x="SepalLengthCm", y="SepalWidthCm", data=iris, size=5)
sns.FacetGrid(iris, hue="Species", size=5).map(
    plt.scatter, "SepalLengthCm", "SepalWidthCm"
).add_legend()
sns.boxplot(x="Species", y="PetalLengthCm", data=iris)
ax = sns.boxplot(x="Species", y="PetalLengthCm", data=iris)
ax = sns.stripplot(
    x="Species", y="PetalLengthCm", data=iris, jitter=True, edgecolor="gray"
)
sns.violinplot(x="Species", y="PetalLengthCm", data=iris, size=6)
sns.FacetGrid(iris, hue="Species", size=6).map(
    sns.kdeplot, "PetalLengthCm"
).add_legend()
iris.drop("Id", axis=1).boxplot(by="Species", figsize=(12, 6))
from pandas.tools.plotting import andrews_curves

andrews_curves(iris.drop("Id", axis=1), "Species")
from pandas.tools.plotting import parallel_coordinates

parallel_coordinates(iris.drop("Id", axis=1), "Species")
from pandas.tools.plotting import radviz

radviz(iris.drop("Id", axis=1), "Species")


================================================
FILE: stress_tests/kaggle/kaggle14.py
================================================
import matplotlib

matplotlib.use("PS")
import matplotlib.pyplot as plt
import seaborn as sns

import modin.pandas as pd

plt.style.use("fivethirtyeight")
import warnings

warnings.filterwarnings("ignore")
data = pd.read_csv("train.csv")
data.head()
data.isnull().sum()  # checking for total null values
data.groupby(["Sex", "Survived"])["Survived"].count()
f, ax = plt.subplots(1, 2, figsize=(18, 8))
data[["Sex", "Survived"]].groupby(["Sex"]).mean().plot.bar(ax=ax[0])
ax[0].set_title("Survived vs Sex")
sns.countplot("Sex", hue="Survived", data=data, ax=ax[1])
ax[1].set_title("Sex:Survived vs Dead")
plt.show()
pd.crosstab(data.Pclass, data.Survived, margins=True).style.background_gradient(
    cmap="summer_r"
)
f, ax = plt.subplots(1, 2, figsize=(18, 8))
data["Pclass"].value_counts().plot.bar(
    color=["#CD7F32", "#FFDF00", "#D3D3D3"], ax=ax[0]
)
ax[0].set_title("Number Of Passengers By Pclass")
ax[0].set_ylabel("Count")
sns.countplot("Pclass", hue="Survived", data=data, ax=ax[1])
ax[1].set_title("Pclass:Survived vs Dead")
plt.show()
pd.crosstab(
    [data.Sex, data.Survived], data.Pclass, margins=True
).style.background_gradient(cmap="summer_r")
sns.factorplot("Pclass", "Survived", hue="Sex", data=data)
plt.show()
print("Oldest Passenger was of:", data["Age"].max(), "Years")
print("Youngest Passenger was of:", data["Age"].min(), "Years")
print("Average Age on the ship:", data["Age"].mean(), "Years")
f, ax = plt.subplots(1, 2, figsize=(18, 8))
sns.violinplot("Pclass", "Age", hue="Survived", data=data, split=True, ax=ax[0])
ax[0].set_title("Pclass and Age vs Survived")
ax[0].set_yticks(range(0, 110, 10))
sns.violinplot("Sex", "Age", hue="Survived", data=data, split=True, ax=ax[1])
ax[1].set_title("Sex and Age vs Survived")
ax[1].set_yticks(range(0, 110, 10))
plt.show()
data["Initial"] = 0
for i in data:
    data["Initial"] = data.Name.str.extract(
        r"([A-Za-z]+)\."  # noqa: W605
    )  # lets extract the Salutations
pd.crosstab(data.Initial, data.Sex).T.style.background_gradient(
    cmap="summer_r"
)  # Checking the Initials with the Sex
data["Initial"].replace(
    [
        "Mlle",
        "Mme",
        "Ms",
        "Dr",
        "Major",
        "Lady",
        "Countess",
        "Jonkheer",
        "Col",
        "Rev",
        "Capt",
        "Sir",
        "Don",
    ],
    [
        "Miss",
        "Miss",
        "Miss",
        "Mr",
        "Mr",
        "Mrs",
        "Mrs",
        "Other",
        "Other",
        "Other",
        "Mr",
        "Mr",
        "Mr",
    ],
    inplace=True,
)
data.groupby("Initial")["Age"].mean()  # lets check the average age by Initials
data.loc[(data.Age.isnull()) & (data.Initial == "Mr"), "Age"] = 33
data.loc[(data.Age.isnull()) & (data.Initial == "Mrs"), "Age"] = 36
data.loc[(data.Age.isnull()) & (data.Initial == "Master"), "Age"] = 5
data.loc[(data.Age.isnull()) & (data.Initial == "Miss"), "Age"] = 22
data.loc[(data.Age.isnull()) & (data.Initial == "Other"), "Age"] = 46
data.Age.isnull().any()  # So no null values left finally
f, ax = plt.subplots(1, 2, figsize=(20, 10))
data[data["Survived"] == 0].Age.plot.hist(
    ax=ax[0], bins=20, edgecolor="black", color="red"
)
ax[0].set_title("Survived= 0")
x1 = list(range(0, 85, 5))
ax[0].set_xticks(x1)
data[data["Survived"] == 1].Age.plot.hist(
    ax=ax[1], color="green", bins=20, edgecolor="black"
)
ax[1].set_title("Survived= 1")
x2 = list(range(0, 85, 5))
ax[1].set_xticks(x2)
plt.show()
sns.factorplot("Pclass", "Survived", col="Initial", data=data)
plt.show()
pd.crosstab(
    [data.Embarked, data.Pclass], [data.Sex, data.Survived], margins=True
).style.background_gradient(cmap="summer_r")
sns.factorplot("Embarked", "Survived", data=data)
fig = plt.gcf()
fig.set_size_inches(5, 3)
plt.show()
f, ax = plt.subplots(2, 2, figsize=(20, 15))
sns.countplot("Embarked", data=data, ax=ax[0, 0])
ax[0, 0].set_title("No. Of Passengers Boarded")
sns.countplot("Embarked", hue="Sex", data=data, ax=ax[0, 1])
ax[0, 1].set_title("Male-Female Split for Embarked")
sns.countplot("Embarked", hue="Survived", data=data, ax=ax[1, 0])
ax[1, 0].set_title("Embarked vs Survived")
sns.countplot("Embarked", hue="Pclass", data=data, ax=ax[1, 1])
ax[1, 1].set_title("Embarked vs Pclass")
plt.subplots_adjust(wspace=0.2, hspace=0.5)
plt.show()
sns.factorplot("Pclass", "Survived", hue="Sex", col="Embarked", data=data)
plt.show()
data["Embarked"].fillna("S", inplace=True)
data.Embarked.isnull().any()  # Finally No NaN values
pd.crosstab([data.SibSp], data.Survived).style.background_gradient(cmap="summer_r")
f, ax = plt.subplots(1, 2, figsize=(20, 8))
sns.barplot("SibSp", "Survived", data=data, ax=ax[0])
ax[0].set_title("SibSp vs Survived")
sns.factorplot("SibSp", "Survived", data=data, ax=ax[1])
ax[1].set_title("SibSp vs Survived")
plt.close(2)
plt.show()
pd.crosstab(data.SibSp, data.Pclass).style.background_gradient(cmap="summer_r")
pd.crosstab(data.Parch, data.Pclass).style.background_gradient(cmap="summer_r")
f, ax = plt.subplots(1, 2, figsize=(20, 8))
sns.barplot("Parch", "Survived", data=data, ax=ax[0])
ax[0].set_title("Parch vs Survived")
sns.factorplot("Parch", "Survived", data=data, ax=ax[1])
ax[1].set_title("Parch vs Survived")
plt.close(2)
plt.show()
print("Highest Fare was:", data["Fare"].max())
print("Lowest Fare was:", data["Fare"].min())
print("Average Fare was:", data["Fare"].mean())
f, ax = plt.subplots(1, 3, figsize=(20, 8))
sns.distplot(data[data["Pclass"] == 1].Fare, ax=ax[0])
ax[0].set_title("Fares in Pclass 1")
sns.distplot(data[data["Pclass"] == 2].Fare, ax=ax[1])
ax[1].set_title("Fares in Pclass 2")
sns.distplot(data[data["Pclass"] == 3].Fare, ax=ax[2])
ax[2].set_title("Fares in Pclass 3")
plt.show()
sns.heatmap(
    data.corr(), annot=True, cmap="RdYlGn", linewidths=0.2
)  # data.corr()-->correlation matrix
fig = plt.gcf()
fig.set_size_inches(10, 8)
plt.show()
data["Age_band"] = 0
data.loc[data["Age"] <= 16, "Age_band"] = 0
data.loc[(data["Age"] > 16) & (data["Age"] <= 32), "Age_band"] = 1
data.loc[(data["Age"] > 32) & (data["Age"] <= 48), "Age_band"] = 2
data.loc[(data["Age"] > 48) & (data["Age"] <= 64), "Age_band"] = 3
data.loc[data["Age"] > 64, "Age_band"] = 4
data.head(2)
data["Age_band"].value_counts().to_frame().style.background_gradient(
    cmap="summer"
)  # checking the number of passenegers in each band
sns.factorplot("Age_band", "Survived", data=data, col="Pclass")
plt.show()
data["Family_Size"] = 0
data["Family_Size"] = data["Parch"] + data["SibSp"]  # family size
data["Alone"] = 0
data.loc[data.Family_Size == 0, "Alone"] = 1  # Alone
f, ax = plt.subplots(1, 2, figsize=(18, 6))
sns.factorplot("Family_Size", "Survived", data=data, ax=ax[0])
ax[0].set_title("Family_Size vs Survived")
sns.factorplot("Alone", "Survived", data=data, ax=ax[1])
ax[1].set_title("Alone vs Survived")
plt.close(2)
plt.close(3)
plt.show()
sns.factorplot("Alone", "Survived", data=data, hue="Sex", col="Pclass")
plt.show()
data["Fare_Range"] = pd.qcut(data["Fare"], 4)
data.groupby(["Fare_Range"])["Survived"].mean().to_frame().style.background_gradient(
    cmap="summer_r"
)
data["Fare_cat"] = 0
data.loc[data["Fare"] <= 7.91, "Fare_cat"] = 0
data.loc[(data["Fare"] > 7.91) & (data["Fare"] <= 14.454), "Fare_cat"] = 1
data.loc[(data["Fare"] > 14.454) & (data["Fare"] <= 31), "Fare_cat"] = 2
data.loc[(data["Fare"] > 31) & (data["Fare"] <= 513), "Fare_cat"] = 3
sns.factorplot("Fare_cat", "Survived", data=data, hue="Sex")
plt.show()
data["Sex"].replace(["male", "female"], [0, 1], inplace=True)
data["Embarked"].replace(["S", "C", "Q"], [0, 1, 2], inplace=True)
data["Initial"].replace(
    ["Mr", "Mrs", "Miss", "Master", "Other"], [0, 1, 2, 3, 4], inplace=True
)
data.drop(
    ["Name", "Age", "Ticket", "Fare", "Cabin", "Fare_Range", "PassengerId"],
    axis=1,
    inplace=True,
)
sns.heatmap(
    data.corr(), annot=True, cmap="RdYlGn", linewidths=0.2, annot_kws={"size": 20}
)
fig = plt.gcf()
fig.set_size_inches(18, 15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()
from sklearn import metrics  # accuracy measure
from sklearn import svm  # support vector Machine
from sklearn.ensemble import RandomForestClassifier  # Random Forest
from sklearn.linear_model import LogisticRegression  # logistic regression
from sklearn.metrics import confusion_matrix  # for confusion matrix
from sklearn.model_selection import train_test_split  # training and testing data split
from sklearn.naive_bayes import GaussianNB  # Naive bayes
from sklearn.neighbors import KNeighborsClassifier  # KNN
from sklearn.tree import DecisionTreeClassifier  # Decision Tree

train, test = train_test_split(
    data, test_size=0.3, random_state=0, stratify=data["Survived"]
)
train_X = train[train.columns[1:]]
train_Y = train[train.columns[:1]]
test_X = test[test.columns[1:]]
test_Y = test[test.columns[:1]]
X = data[data.columns[1:]]
Y = data["Survived"]
model = svm.SVC(kernel="rbf", C=1, gamma=0.1)
model.fit(train_X, train_Y)
prediction1 = model.predict(test_X)
print("Accuracy for rbf SVM is ", metrics.accuracy_score(prediction1, test_Y))
model = svm.SVC(kernel="linear", C=0.1, gamma=0.1)
model.fit(train_X, train_Y)
prediction2 = model.predict(test_X)
print("Accuracy for linear SVM is", metrics.accuracy_score(prediction2, test_Y))
model = LogisticRegression()
model.fit(train_X, train_Y)
prediction3 = model.predict(test_X)
print(
    "The accuracy of the Logistic Regression is",
    metrics.accuracy_score(prediction3, test_Y),
)
model = DecisionTreeClassifier()
model.fit(train_X, train_Y)
prediction4 = model.predict(test_X)
print(
    "The accuracy of the Decision Tree is", metrics.accuracy_score(prediction4, test_Y)
)
model = KNeighborsClassifier()
model.fit(train_X, train_Y)
prediction5 = model.predict(test_X)
print("The accuracy of the KNN is", metrics.accuracy_score(prediction5, test_Y))
a_index = list(range(1, 11))
a = pd.Series()
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for i in list(range(1, 11)):
    model = KNeighborsClassifier(n_neighbors=i)
    model.fit(train_X, train_Y)
    prediction = model.predict(test_X)
    a = a.append(pd.Series(metrics.accuracy_score(prediction, test_Y)))
plt.plot(a_index, a)
plt.xticks(x)
fig = plt.gcf()
fig.set_size_inches(12, 6)
plt.show()
print(
    "Accuracies for different values of n are:",
    a.values,
    "with the max value as ",
    a.values.max(),
)
model = GaussianNB()
model.fit(train_X, train_Y)
prediction6 = model.predict(test_X)
print("The accuracy of the NaiveBayes is", metrics.accuracy_score(prediction6, test_Y))
model = RandomForestClassifier(n_estimators=100)
model.fit(train_X, train_Y)
prediction7 = model.predict(test_X)
print(
    "The accuracy of the Random Forests is", metrics.accuracy_score(prediction7, test_Y)
)
from sklearn.model_selection import KFold  # for K-fold cross validation
from sklearn.model_selection import cross_val_predict  # prediction
from sklearn.model_selection import cross_val_score  # score evaluation

kfold = KFold(n_splits=10, random_state=22)  # k=10, split the data into 10 equal parts
xyz = []
accuracy = []
std = []
classifiers = [
    "Linear Svm",
    "Radial Svm",
    "Logistic Regression",
    "KNN",
    "Decision Tree",
    "Naive Bayes",
    "Random Forest",
]
models = [
    svm.SVC(kernel="linear"),
    svm.SVC(kernel="rbf"),
    LogisticRegression(),
    KNeighborsClassifier(n_neighbors=9),
    DecisionTreeClassifier(),
    GaussianNB(),
    RandomForestClassifier(n_estimators=100),
]
for i in models:
    model = i
    cv_result = cross_val_score(model, X, Y, cv=kfold, scoring="accuracy")
    xyz.append(cv_result.mean())
    std.append(cv_result.std())
    accuracy.append(cv_result)
new_models_dataframe2 = pd.DataFrame({"CV Mean": xyz, "Std": std}, index=classifiers)
new_models_dataframe2
plt.subplots(figsize=(12, 6))
box = pd.DataFrame(accuracy, index=[classifiers])
box.T.boxplot()
new_models_dataframe2["CV Mean"].plot.barh(width=0.8)
plt.title("Average CV Mean Accuracy")
fig = plt.gcf()
fig.set_size_inches(8, 5)
plt.show()
f, ax = plt.subplots(3, 3, figsize=(12, 10))
y_pred = cross_val_predict(svm.SVC(kernel="rbf"), X, Y, cv=10)
sns.heatmap(confusion_matrix(Y, y_pred), ax=ax[0, 0], annot=True, fmt="2.0f")
ax[0, 0].set_title("Matrix for rbf-SVM")
y_pred = cross_val_predict(svm.SVC(kernel="linear"), X, Y, cv=10)
sns.heatmap(confusion_matrix(Y, y_pred), ax=ax[0, 1], annot=True, fmt="2.0f")
ax[0, 1].set_title("Matrix for Linear-SVM")
y_pred = cross_val_predict(KNeighborsClassifier(n_neighbors=9), X, Y, cv=10)
sns.heatmap(confusion_matrix(Y, y_pred), ax=ax[0, 2], annot=True, fmt="2.0f")
ax[0, 2].set_title("Matrix for KNN")
y_pred = cross_val_predict(RandomForestClassifier(n_estimators=100), X, Y, cv=10)
sns.heatmap(confusion_matrix(Y, y_pred), ax=ax[1, 0], annot=True, fmt="2.0f")
ax[1, 0].set_title("Matrix for Random-Forests")
y_pred = cross_val_predict(LogisticRegression(), X, Y, cv=10)
sns.heatmap(confusion_matrix(Y, y_pred), ax=ax[1, 1], annot=True, fmt="2.0f")
ax[1, 1].set_title("Matrix for Logistic Regression")
y_pred = cross_val_predict(DecisionTreeClassifier(), X, Y, cv=10)
sns.heatmap(confusion_matrix(Y, y_pred), ax=ax[1, 2], annot=True, fmt="2.0f")
ax[1, 2].set_title("Matrix for Decision Tree")
y_pred = cross_val_predict(GaussianNB(), X, Y, cv=10)
sns.heatmap(confusion_matrix(Y, y_pred), ax=ax[2, 0], annot=True, fmt="2.0f")
ax[2, 0].set_title("Matrix for Naive Bayes")
plt.subplots_adjust(hspace=0.2, wspace=0.2)
plt.show()
from sklearn.model_selection import GridSearchCV

C = [0.05, 0.1, 0.2, 0.3, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
gamma = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
kernel = ["rbf", "linear"]
hyper = {"kernel": kernel, "C": C, "gamma": gamma}
gd = GridSearchCV(estimator=svm.SVC(), param_grid=hyper, verbose=True)
gd.fit(X, Y)
print(gd.best_score_)
print(gd.best_estimator_)
n_estimators = range(100, 1000, 100)
hyper = {"n_estimators": n_estimators}
gd = GridSearchCV(
    estimator=RandomForestClassifier(random_state=0), param_grid=hyper, verbose=True
)
gd.fit(X, Y)
print(gd.best_score_)
print(gd.best_estimator_)
from sklearn.ensemble import VotingClassifier

ensemble_lin_rbf = VotingClassifier(
    estimators=[
        ("KNN", KNeighborsClassifier(n_neighbors=10)),
        ("RBF", svm.SVC(probability=True, kernel="rbf", C=0.5, gamma=0.1)),
        ("RFor", RandomForestClassifier(n_estimators=500, random_state=0)),
        ("LR", LogisticRegression(C=0.05)),
        ("DT", DecisionTreeClassifier(random_state=0)),
        ("NB", GaussianNB()),
        ("svm", svm.SVC(kernel="linear", probability=True)),
    ],
    voting="soft",
).fit(train_X, train_Y)
print("The accuracy for ensembled model is:", ensemble_lin_rbf.score(test_X, test_Y))
cross = cross_val_score(ensemble_lin_rbf, X, Y, cv=10, scoring="accuracy")
print("The cross validated score is", cross.mean())
from sklearn.ensemble import BaggingClassifier

model = BaggingClassifier(
    base_estimator=KNeighborsClassifier(n_neighbors=3), random_state=0, n_estimators=700
)
model.fit(train_X, train_Y)
prediction = model.predict(test_X)
print("The accuracy for bagged KNN is:", metrics.accuracy_score(prediction, test_Y))
result = cross_val_score(model, X, Y, cv=10, scoring="accuracy")
print("The cross validated score for bagged KNN is:", result.mean())
model = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(), random_state=0, n_estimators=100
)
model.fit(train_X, train_Y)
prediction = model.predict(test_X)
print(
    "The accuracy for bagged Decision Tree is:",
    metrics.accuracy_score(prediction, test_Y),
)
result = cross_val_score(model, X, Y, cv=10, scoring="accuracy")
print("The cross validated score for bagged Decision Tree is:", result.mean())
from sklearn.ensemble import AdaBoostClassifier

ada = AdaBoostClassifier(n_estimators=200, random_state=0, learning_rate=0.1)
result = cross_val_score(ada, X, Y, cv=10, scoring="accuracy")
print("The cross validated score for AdaBoost is:", result.mean())
from sklearn.ensemble import GradientBoostingClassifier

grad = GradientBoostingClassifier(n_estimators=500, random_state=0, learning_rate=0.1)
result = cross_val_score(grad, X, Y, cv=10, scoring="accuracy")
print("The cross validated score for Gradient Boosting is:", result.mean())
import xgboost as xg

xgboost = xg.XGBClassifier(n_estimators=900, learning_rate=0.1)
result = cross_val_score(xgboost, X, Y, cv=10, scoring="accuracy")
print("The cross validated score for XGBoost is:", result.mean())
n_estimators = list(range(100, 1100, 100))
learn_rate = [0.05, 0.1, 0.2, 0.3, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
hyper = {"n_estimators": n_estimators, "learning_rate": learn_rate}
gd = GridSearchCV(estimator=AdaBoostClassifier(), param_grid=hyper, verbose=True)
gd.fit(X, Y)
print(gd.best_score_)
print(gd.best_estimator_)
ada = AdaBoostClassifier(n_estimators=200, random_state=0, learning_rate=0.05)
result = cross_val_predict(ada, X, Y, cv=10)
sns.heatmap(confusion_matrix(Y, result), cmap="winter", annot=True, fmt="2.0f")
plt.show()
f, ax = plt.subplots(2, 2, figsize=(15, 12))
model = RandomForestClassifier(n_estimators=500, random_state=0)
model.fit(X, Y)
pd.Series(model.feature_importances_, X.columns).sort_values(ascending=True).plot.barh(
    width=0.8, ax=ax[0, 0]
)
ax[0, 0].set_title("Feature Importance in Random Forests")
model = AdaBoostClassifier(n_estimators=200, learning_rate=0.05, random_state=0)
model.fit(X, Y)
pd.Series(model.feature_importances_, X.columns).sort_values(ascending=True).plot.barh(
    width=0.8, ax=ax[0, 1], color="#ddff11"
)
ax[0, 1].set_title("Feature Importance in AdaBoost")
model = GradientBoostingClassifier(n_estimators=500, learning_rate=0.1, random_state=0)
model.fit(X, Y)
pd.Series(model.feature_importances_, X.columns).sort_values(ascending=True).plot.barh(
    width=0.8, ax=ax[1, 0], cmap="RdYlGn_r"
)
ax[1, 0].set_title("Feature Importance in Gradient Boosting")
model = xg.XGBClassifier(n_estimators=900, learning_rate=0.1)
model.fit(X, Y)
pd.Series(model.feature_importances_, X.columns).sort_values(ascending=True).plot.barh(
    width=0.8, ax=ax[1, 1], color="#FD0F00"
)
ax[1, 1].set_title("Feature Importance in XgBoost")
plt.show()


================================================
FILE: stress_tests/kaggle/kaggle17.py
================================================
import modin.pandas as pd

melbourne_file_path = "melb_data.csv"
melbourne_data = pd.read_csv(melbourne_file_path)
print(melbourne_data.columns)
melbourne_price_data = melbourne_data.Price
print(melbourne_price_data.head())
columns_of_interest = ["Landsize", "BuildingArea"]
two_columns_of_data = melbourne_data[columns_of_interest]
two_columns_of_data.describe()


================================================
FILE: stress_tests/kaggle/kaggle18.py
================================================
#!/usr/bin/env python  # noqa: E902
import matplotlib

matplotlib.use("PS")
import re
import string

import matplotlib.pyplot as plt
import nltk
import numpy as np
import pandas as pd
import seaborn as sns

sns.set(style="white")
import warnings
from collections import Counter

import bokeh.plotting as bp
import plotly.graph_objs as go
import plotly.offline as py
from bokeh.models import HoverTool  # BoxSelectTool
from bokeh.models import ColumnDataSource
from bokeh.plotting import output_notebook, show  # figure
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction import stop_words
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from wordcloud import WordCloud

warnings.filterwarnings("ignore")
import logging

logging.getLogger("lda").setLevel(logging.WARNING)
nltk.download("punkt")
nltk.download("stopwords")
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
print(train.shape)
print(test.shape)
train.dtypes
train.head()
train.price.describe()
plt.subplot(1, 2, 1)
(train["price"]).plot.hist(bins=50, figsize=(20, 10), edgecolor="white", range=[0, 250])
plt.xlabel("price+", fontsize=17)
plt.ylabel("frequency", fontsize=17)
plt.tick_params(labelsize=15)
plt.title("Price Distribution - Training Set", fontsize=17)
plt.subplot(1, 2, 2)
np.log(train["price"] + 1).plot.hist(bins=50, figsize=(20, 10), edgecolor="white")
plt.xlabel("log(price+1)", fontsize=17)
plt.ylabel("frequency", fontsize=17)
plt.tick_params(labelsize=15)
plt.title("Log(Price) Distribution - Training Set", fontsize=17)
plt.show()
train.shipping.value_counts() / len(train)
prc_shipBySeller = train.loc[train.shipping == 1, "price"]
prc_shipByBuyer = train.loc[train.shipping == 0, "price"]
fig, ax = plt.subplots(figsize=(20, 10))
ax.hist(
    np.log(prc_shipBySeller + 1),
    color="#8CB4E1",
    alpha=1.0,
    bins=50,
    label="Price when Seller pays Shipping",
)
ax.hist(
    np.log(prc_shipByBuyer + 1),
    color="#007D00",
    alpha=0.7,
    bins=50,
    label="Price when Buyer pays Shipping",
)
ax.set(title="Histogram Comparison", ylabel="% of Dataset in Bin")
plt.xlabel("log(price+1)", fontsize=17)
plt.ylabel("frequency", fontsize=17)
plt.title("Price Distribution by Shipping Type", fontsize=17)
plt.tick_params(labelsize=15)
plt.show()
print(
    "There are %d unique values in the category column."
    % train["category_name"].nunique()
)
train["category_name"].value_counts()[:5]
print(
    "There are %d items that do not have a label."
    % train["category_name"].isnull().sum()
)


def split_cat(text):
    try:
        return text.split("/")
    except Exception:
        return ("No Label", "No Label", "No Label")


train["general_cat"], train["subcat_1"], train["subcat_2"] = zip(
    *train["category_name"].apply(lambda x: split_cat(x))
)
train.head()
test["general_cat"], test["subcat_1"], test["subcat_2"] = zip(
    *test["category_name"].apply(lambda x: split_cat(x))
)
print("There are %d unique first sub-categories." % train["subcat_1"].nunique())
print("There are %d unique second sub-categories." % train["subcat_2"].nunique())
x = train["general_cat"].value_counts().index.values.astype("str")
y = train["general_cat"].value_counts().values
pct = [("%.2f" % (v * 100)) + "%" for v in (y / len(train))]
trace1 = go.Bar(x=x, y=y, text=pct)
layout = {
    "title": "Number of Items by Main Category",
    "yaxis": {"title": "Count"},
    "xaxis": {"title": "Category"},
}
fig = {"data": [trace1], "layout": layout}
py.iplot(fig)
x = train["subcat_1"].value_counts().index.values.astype("str")[:15]
y = train["subcat_1"].value_counts().values[:15]
pct = [("%.2f" % (v * 100)) + "%" for v in (y / len(train))][:15]
trace1 = go.Bar(
    x=x,
    y=y,
    text=pct,
    marker={
        "color": y,
        "colorscale": "Portland",
        "showscale": True,
        "reversescale": False,
    },
)
layout = {
    "title": "Number of Items by Sub Category (Top 15)",
    "yaxis": {"title": "Count"},
    "xaxis": {"title": "SubCategory"},
}
fig = {"data": [trace1], "layout": layout}
py.iplot(fig)
general_cats = train["general_cat"].unique()
x = [train.loc[train["general_cat"] == cat, "price"] for cat in general_cats]
data = [
    go.Box(x=np.log(x[i] + 1), name=general_cats[i]) for i in range(len(general_cats))
]
layout = {
    "title": "Price Distribution by General Category",
    "yaxis": {"title": "Frequency"},
    "xaxis": {"title": "Category"},
}
fig = {"data": data, "layout": layout}
py.iplot(fig)
print(
    "There are %d unique brand names in the training dataset."
    % train["brand_name"].nunique()
)
x = train["brand_name"].value_counts().index.values.astype("str")[:10]
y = train["brand_name"].value_counts().values[:10]


def wordCount(text):
    try:
        text = text.lower()
        regex = re.compile("[" + re.escape(string.punctuation) + "0-9\\r\\t\\n]")
        txt = regex.sub(" ", text)
        words = [
            w
            for w in txt.split(" ")
            if w not in stop_words.ENGLISH_STOP_WORDS and len(w) > 3
        ]
        return len(words)
    except Exception:
        return 0


train["desc_len"] = train["item_description"].apply(lambda x: wordCount(x))
test["desc_len"] = test["item_description"].apply(lambda x: wordCount(x))
train.head()
df = train.groupby("desc_len")["price"].mean().reset_index()
trace1 = go.Scatter(
    x=df["desc_len"],
    y=np.log(df["price"] + 1),
    mode="lines+markers",
    name="lines+markers",
)
layout = {
    "title": "Average Log(Price) by Description Length",
    "yaxis": {"title": "Average Log(Price)"},
    "xaxis": {"title": "Description Length"},
}
fig = {"data": [trace1], "layout": layout}
py.iplot(fig)
train.item_description.isnull().sum()
train = train[pd.notnull(train["item_description"])]
stop = set(stopwords.words("english"))


def tokenize(text):
    """
    sent_tokenize(): segment text into sentences
    word_tokenize(): break sentences into words
    """
    try:
        regex = re.compile("[" + re.escape(string.punctuation) + "0-9\\r\\t\\n]")
        text = regex.sub(" ", text)  # remove punctuation
        tokens_ = [word_tokenize(s) for s in sent_tokenize(text)]
        tokens = []
        for token_by_sent in tokens_:
            tokens += token_by_sent
        tokens = list(filter(lambda t: t.lower() not in stop, tokens))
        filtered_tokens = [w for w in tokens if re.search("[a-zA-Z]", w)]
        filtered_tokens = [w.lower() for w in filtered_tokens if len(w) >= 3]
        return filtered_tokens
    except TypeError as err:
        print(text, err)


cat_desc = {}
for cat in general_cats:
    text = " ".join(train.loc[train["general_cat"] == cat, "item_description"].values)
    cat_desc[cat] = tokenize(text)
flat_lst = [item for sublist in list(cat_desc.values()) for item in sublist]
allWordsCount = Counter(flat_lst)
all_top10 = allWordsCount.most_common(20)
x = [w[0] for w in all_top10]
y = [w[1] for w in all_top10]
trace1 = go.Bar(x=x, y=y, text=pct)
layout = {
    "title": "Word Frequency",
    "yaxis": {"title": "Count"},
    "xaxis": {"title": "Word"},
}
fig = {"data": [trace1], "layout": layout}
py.iplot(fig)
stop = set(stopwords.words("english"))


def tokenize(text):
    try:
        regex = re.compile("[" + re.escape(string.punctuation) + "0-9\\r\\t\\n]")
        text = regex.sub(" ", text)  # remove punctuation
        tokens_ = [word_tokenize(s) for s in sent_tokenize(text)]
        tokens = []
        for token_by_sent in tokens_:
            tokens += token_by_sent
        tokens = list(filter(lambda t: t.lower() not in stop, tokens))
        filtered_tokens = [w for w in tokens if re.search("[a-zA-Z]", w)]
        filtered_tokens = [w.lower() for w in filtered_tokens if len(w) >= 3]
        return filtered_tokens
    except TypeError as err:
        print(text, err)


train["tokens"] = train["item_description"].map(tokenize)
test["tokens"] = test["item_description"].map(tokenize)
train.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)
for description, tokens in zip(
    train["item_description"].head(), train["tokens"].head()
):
    print("description:", description)
    print("tokens:", tokens)
    print()
cat_desc = {}
for cat in general_cats:
    text = " ".join(train.loc[train["general_cat"] == cat, "item_description"].values)
    cat_desc[cat] = tokenize(text)
import sys

sys.exit()
women100 = Counter(cat_desc["Women"]).most_common(100)
beauty100 = Counter(cat_desc["Beauty"]).most_common(100)
kids100 = Counter(cat_desc["Kids"]).most_common(100)
electronics100 = Counter(cat_desc["Electronics"]).most_common(100)


def generate_wordcloud(tup):
    wordcloud = WordCloud(
        background_color="white", max_words=50, max_font_size=40, random_state=42
    ).generate(str(tup))
    return wordcloud


fig, axes = plt.subplots(2, 2, figsize=(30, 15))
ax = axes[0, 0]
ax.imshow(generate_wordcloud(women100), interpolation="bilinear")
ax.axis("off")
ax.set_title("Women Top 100", fontsize=30)
ax = axes[0, 1]
ax.imshow(generate_wordcloud(beauty100))
ax.axis("off")
ax.set_title("Beauty Top 100", fontsize=30)
ax = axes[1, 0]
ax.imshow(generate_wordcloud(kids100))
ax.axis("off")
ax.set_title("Kids Top 100", fontsize=30)
ax = axes[1, 1]
ax.imshow(generate_wordcloud(electronics100))
ax.axis("off")
ax.set_title("Electronic Top 100", fontsize=30)
vectorizer = TfidfVectorizer(
    min_df=10, max_features=180000, tokenizer=tokenize, ngram_range=(1, 2)
)
all_desc = np.append(train["item_description"].values, test["item_description"].values)
vz = vectorizer.fit_transform(list(all_desc))
tfidf = dict(zip(vectorizer.get_feature_names(), vectorizer.idf_))
tfidf = pd.DataFrame(columns=["tfidf"]).from_dict(dict(tfidf), orient="index")
tfidf.columns = ["tfidf"]
tfidf.sort_values(by=["tfidf"], ascending=True).head(10)
tfidf.sort_values(by=["tfidf"], ascending=False).head(10)
trn = train.copy()
tst = test.copy()
trn["is_train"] = 1
tst["is_train"] = 0
sample_sz = 15000
combined_df = pd.concat([trn, tst])
combined_sample = combined_df.sample(n=sample_sz)
vz_sample = vectorizer.fit_transform(list(combined_sample["item_description"]))
from sklearn.decomposition import TruncatedSVD

n_comp = 30
svd = TruncatedSVD(n_components=n_comp, random_state=42)
svd_tfidf = svd.fit_transform(vz_sample)
from sklearn.manifold import TSNE

tsne_model = TSNE(n_components=2, verbose=1, random_state=42, n_iter=500)
tsne_tfidf = tsne_model.fit_transform(svd_tfidf)
output_notebook()
plot_tfidf = bp.figure(
    plot_width=700,
    plot_height=600,
    title="tf-idf clustering of the item description",
    tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
    x_axis_type=None,
    y_axis_type=None,
    min_border=1,
)
combined_sample.reset_index(inplace=True, drop=True)
tfidf_df = pd.DataFrame(tsne_tfidf, columns=["x", "y"])
tfidf_df["description"] = combined_sample["item_description"]
tfidf_df["tokens"] = combined_sample["tokens"]
tfidf_df["category"] = combined_sample["general_cat"]
plot_tfidf.scatter(x="x", y="y", source=tfidf_df, alpha=0.7)
hover = plot_tfidf.select({"type": HoverTool})
hover.tooltips = {
    "description": "@description",
    "tokens": "@tokens",
    "category": "@category",
}
show(plot_tfidf)
from sklearn.cluster import MiniBatchKMeans

num_clusters = 30  # need to be selected wisely
kmeans_model = MiniBatchKMeans(
    n_clusters=num_clusters,
    init="k-means++",
    n_init=1,
    init_size=1000,
    batch_size=1000,
    verbose=0,
    max_iter=1000,
)
kmeans = kmeans_model.fit(vz)
kmeans_clusters = kmeans.predict(vz)
kmeans_distances = kmeans.transform(vz)
sorted_centroids = kmeans.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
for i in range(num_clusters):
    print("Cluster %d:" % i)
    aux = ""
    for j in sorted_centroids[i, :10]:
        aux += terms[j] + " | "
    print(aux)
    print()
kmeans = kmeans_model.fit(vz_sample)
kmeans_clusters = kmeans.predict(vz_sample)
kmeans_distances = kmeans.transform(vz_sample)
tsne_kmeans = tsne_model.fit_transform(kmeans_distances)
colormap = np.array(
    [
        "#6d8dca",
        "#69de53",
        "#723bca",
        "#c3e14c",
        "#c84dc9",
        "#68af4e",
        "#6e6cd5",
        "#e3be38",
        "#4e2d7c",
        "#5fdfa8",
        "#d34690",
        "#3f6d31",
        "#d44427",
        "#7fcdd8",
        "#cb4053",
        "#5e9981",
        "#803a62",
        "#9b9e39",
        "#c88cca",
        "#e1c37b",
        "#34223b",
        "#bdd8a3",
        "#6e3326",
        "#cfbdce",
        "#d07d3c",
        "#52697d",
        "#194196",
        "#d27c88",
        "#36422b",
        "#b68f79",
    ]
)
kmeans_df = pd.DataFrame(tsne_kmeans, columns=["x", "y"])
kmeans_df["cluster"] = kmeans_clusters
kmeans_df["description"] = combined_sample["item_description"]
kmeans_df["category"] = combined_sample["general_cat"]
plot_kmeans = bp.figure(
    plot_width=700,
    plot_height=600,
    title="KMeans clustering of the description",
    tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
    x_axis_type=None,
    y_axis_type=None,
    min_border=1,
)
source = ColumnDataSource(
    data={
        "x": kmeans_df["x"],
        "y": kmeans_df["y"],
        "color": colormap[kmeans_clusters],
        "description": kmeans_df["description"],
        "category": kmeans_df["category"],
        "cluster": kmeans_df["cluster"],
    }
)
plot_kmeans.scatter(x="x", y="y", color="color", source=source)
hover = plot_kmeans.select({"type": HoverTool})
hover.tooltips = {
    "description": "@description",
    "category": "@category",
    "cluster": "@cluster",
}
show(plot_kmeans)
cvectorizer = CountVectorizer(
    min_df=4, max_features=180000, tokenizer=tokenize, ngram_range=(1, 2)
)
cvz = cvectorizer.fit_transform(combined_sample["item_description"])
lda_model = LatentDirichletAllocation(
    n_components=20, learning_method="online", max_iter=20, random_state=42
)
X_topics = lda_model.fit_transform(cvz)
n_top_words = 10
topic_summaries = []
topic_word = lda_model.components_  # get the topic words
vocab = cvectorizer.get_feature_names()
for i, topic_dist in enumerate(topic_word):
    topic_words = np.array(vocab)[np.argsort(topic_dist)][: -(n_top_words + 1) : -1]
    topic_summaries.append(" ".join(topic_words))
    print("Topic {}: {}".format(i, " | ".join(topic_words)))
tsne_lda = tsne_model.fit_transform(X_topics)
unnormalized = np.matrix(X_topics)
doc_topic = unnormalized / unnormalized.sum(axis=1)
lda_keys = []
for i, tweet in enumerate(combined_sample["item_description"]):
    lda_keys += [doc_topic[i].argmax()]
lda_df = pd.DataFrame(tsne_lda, columns=["x", "y"])
lda_df["description"] = combined_sample["item_description"]
lda_df["category"] = combined_sample["general_cat"]
lda_df["topic"] = lda_keys
lda_df["topic"] = lda_df["topic"].map(int)
plot_lda = bp.figure(
    plot_width=700,
    plot_height=600,
    title="LDA topic visualization",
    tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
    x_axis_type=None,
    y_axis_type=None,
    min_border=1,
)
source = ColumnDataSource(
    data={
        "x": lda_df["x"],
        "y": lda_df["y"],
        "color": colormap[lda_keys],
        "description": lda_df["description"],
        "topic": lda_df["topic"],
        "category": lda_df["category"],
    }
)
plot_lda.scatter(source=source, x="x", y="y", color="color")
hover = plot_kmeans.select({"type": HoverTool})
hover = plot_lda.select({"type": HoverTool})
hover.tooltips = {
    "description": "@description",
    "topic": "@topic",
    "category": "@category",
}
show(plot_lda)


def prepareLDAData():
    data = {
        "vocab": vocab,
        "doc_topic_dists": doc_topic,
        "doc_lengths": list(lda_df["len_docs"]),
        "term_frequency": cvectorizer.vocabulary_,
        "topic_term_dists": lda_model.components_,
    }
    return data


import pyLDAvis

lda_df["len_docs"] = combined_sample["tokens"].map(len)
ldadata = prepareLDAData()
pyLDAvis.enable_notebook()
prepared_data = pyLDAvis.prepare(**ldadata)


================================================
FILE: stress_tests/kaggle/kaggle19.py
================================================
#!/usr/bin/env python
# coding: utf-8
import matplotlib

matplotlib.use("PS")
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

warnings.filterwarnings("ignore")
train = pd.read_csv("train.csv")
train.info()
train.head()
print(
    "The average person kills {:.4f} players, 99% of people have {} kills or less, while the most kills ever recorded is {}.".format(
        train["kills"].mean(), train["kills"].quantile(0.99), train["kills"].max()
    )
)
data = train.copy()
data.loc[data["kills"] > data["kills"].quantile(0.99)] = "8+"
plt.figure(figsize=(15, 10))
sns.countplot(data["kills"].astype("str").sort_values())
plt.title("Kill Count", fontsize=15)
plt.show()
data = train.copy()
data = data[data["kills"] == 0]
plt.figure(figsize=(15, 10))
plt.title("Damage Dealt by 0 killers", fontsize=15)
plt.show()
print(
    "{} players ({:.4f}%) have won without a single kill!".format(
        len(data[data["winPlacePerc"] == 1]),
        100 * len(data[data["winPlacePerc"] == 1]) / len(train),
    )
)
data1 = train[train["damageDealt"] == 0].copy()
print(
    "{} players ({:.4f}%) have won without dealing damage!".format(
        len(data1[data1["winPlacePerc"] == 1]),
        100 * len(data1[data1["winPlacePerc"] == 1]) / len(train),
    )
)
kills = train.copy()
kills["killsCategories"] = pd.cut(
    kills["kills"],
    [-1, 0, 2, 5, 10, 60],
    labels=["0_kills", "1-2_kills", "3-5_kills", "6-10_kills", "10+_kills"],
)
plt.figure(figsize=(15, 8))
sns.boxplot(x="killsCategories", y="winPlacePerc", data=kills)
plt.show()
print(
    "The average person walks for {:.1f}m, 99% of people have walked {}m or less, while the marathoner champion walked for {}m.".format(
        train["walkDistance"].mean(),
        train["walkDistance"].quantile(0.99),
        train["walkDistance"].max(),
    )
)
data = train.copy()
data = data[data["walkDistance"] < train["walkDistance"].quantile(0.99)]
plt.figure(figsize=(15, 10))
plt.title("Walking Distance Distribution", fontsize=15)
sns.distplot(data["walkDistance"])
plt.show()
print(
    "{} players ({:.4f}%) walked 0 meters. This means that they die before even taking a step or they are afk (more possible).".format(
        len(data[data["walkDistance"] == 0]),
        100 * len(data1[data1["walkDistance"] == 0]) / len(train),
    )
)
print(
    "The average person drives for {:.1f}m, 99% of people have drived {}m or less, while the formula 1 champion drived for {}m.".format(
        train["rideDistance"].mean(),
        train["rideDistance"].quantile(0.99),
        train["rideDistance"].max(),
    )
)
data = train.copy()
data = data[data["rideDistance"] < train["rideDistance"].quantile(0.9)]
plt.figure(figsize=(15, 10))
plt.title("Ride Distance Distribution", fontsize=15)
sns.distplot(data["rideDistance"])
plt.show()
print(
    "{} players ({:.4f}%) drived for 0 meters. This means that they don't have a driving licence yet.".format(
        len(data[data["rideDistance"] == 0]),
        100 * len(data1[data1["rideDistance"] == 0]) / len(train),
    )
)
f, ax1 = plt.subplots(figsize=(20, 10))
sns.pointplot(
    x="vehicleDestroys", y="winPlacePerc", data=data, color="#606060", alpha=0.8
)
plt.xlabel("Number of Vehicle Destroys", fontsize=15, color="blue")
plt.ylabel("Win Percentage", fontsize=15, color="blue")
plt.title("Vehicle Destroys/ Win Ratio", fontsize=20, color="blue")
plt.grid()
plt.show()
print(
    "The average person swims for {:.1f}m, 99% of people have swimemd {}m or less, while the olympic champion swimmed for {}m.".format(
        train["swimDistance"].mean(),
        train["swimDistance"].quantile(0.99),
        train["swimDistance"].max(),
    )
)
data = train.copy()
data = data[data["swimDistance"] < train["swimDistance"].quantile(0.95)]
plt.figure(figsize=(15, 10))
plt.title("Swim Distance Distribution", fontsize=15)
sns.distplot(data["swimDistance"])
plt.show()
swim = train.copy()
swim["swimDistance"] = pd.cut(
    swim["swimDistance"], [-1, 0, 5, 20, 5286], labels=["0m", "1-5m", "6-20m", "20m+"]
)
plt.figure(figsize=(15, 8))
sns.boxplot(x="swimDistance", y="winPlacePerc", data=swim)
plt.show()
print(
    "The average person uses {:.1f} heal items, 99% of people use {} or less, while the doctor used {}.".format(
        train["heals"].mean(), train["heals"].quantile(0.99), train["heals"].max()
    )
)
print(
    "The average person uses {:.1f} boost items, 99% of people use {} or less, while the doctor used {}.".format(
        train["boosts"].mean(), train["boosts"].quantile(0.99), train["boosts"].max()
    )
)
data = train.copy()
data = data[data["heals"] < data["heals"].quantile(0.99)]
data = data[data["boosts"] < data["boosts"].quantile(0.99)]
f, ax1 = plt.subplots(figsize=(20, 10))
sns.pointplot(x="heals", y="winPlacePerc", data=data, color="lime", alpha=0.8)
sns.pointplot(x="boosts", y="winPlacePerc", data=data, color="blue", alpha=0.8)
plt.text(4, 0.6, "Heals", color="lime", fontsize=17, style="italic")
plt.text(4, 0.55, "Boosts", color="blue", fontsize=17, style="italic")
plt.xlabel("Number of heal/boost items", fontsize=15, color="blue")
plt.ylabel("Win Percentage", fontsize=15, color="blue")
plt.title("Heals vs Boosts", fontsize=20, color="blue")
plt.grid()
plt.show()
solos = train[train["numGroups"] > 50]
duos = train[(train["numGroups"] > 25) & (train["numGroups"] <= 50)]
squads = train[train["numGroups"] <= 25]
print(
    "There are {} ({:.2f}%) solo games, {} ({:.2f}%) duo games and {} ({:.2f}%) squad games.".format(
        len(solos),
        100 * len(solos) / len(train),
        len(duos),
        100 * len(duos) / len(train),
        len(squads),
        100 * len(squads) / len(train),
    )
)
f, ax1 = plt.subplots(figsize=(20, 10))
sns.pointplot(x="kills", y="winPlacePerc", data=solos, color="black", alpha=0.8)
sns.pointplot(x="kills", y="winPlacePerc", data=duos, color="#CC0000", alpha=0.8)
sns.pointplot(x="kills", y="winPlacePerc", data=squads, color="#3399FF", alpha=0.8)
plt.text(37, 0.6, "Solos", color="black", fontsize=17, style="italic")
plt.text(37, 0.55, "Duos", color="#CC0000", fontsize=17, style="italic")
plt.text(37, 0.5, "Squads", color="#3399FF", fontsize=17, style="italic")
plt.xlabel("Number of kills", fontsize=15, color="blue")
plt.ylabel("Win Percentage", fontsize=15, color="blue")
plt.title("Solo vs Duo vs Squad Kills", fontsize=20, color="blue")
plt.grid()
plt.show()
f, ax1 = plt.subplots(figsize=(20, 10))
sns.pointplot(x="DBNOs", y="winPlacePerc", data=duos, color="#CC0000", alpha=0.8)
sns.pointplot(x="DBNOs", y="winPlacePerc", data=squads, color="#3399FF", alpha=0.8)
sns.pointplot(x="assists", y="winPlacePerc", data=duos, color="#FF6666", alpha=0.8)
sns.pointplot(x="assists", y="winPlacePerc", data=squads, color="#CCE5FF", alpha=0.8)
sns.pointplot(x="revives", y="winPlacePerc", data=duos, color="#660000", alpha=0.8)
sns.pointplot(x="revives", y="winPlacePerc", data=squads, color="#000066", alpha=0.8)
plt.text(14, 0.5, "Duos - Assists", color="#FF6666", fontsize=17, style="italic")
plt.text(14, 0.45, "Duos - DBNOs", color="#CC0000", fontsize=17, style="italic")
plt.text(14, 0.4, "Duos - Revives", color="#660000", fontsize=17, style="italic")
plt.text(14, 0.35, "Squads - Assists", color="#CCE5FF", fontsize=17, style="italic")
plt.text(14, 0.3, "Squads - DBNOs", color="#3399FF", fontsize=17, style="italic")
plt.text(14, 0.25, "Squads - Revives", color="#000066", fontsize=17, style="italic")
plt.xlabel("Number of DBNOs/Assits/Revives", fontsize=15, color="blue")
plt.ylabel("Win Percentage", fontsize=15, color="blue")
plt.title("Duo vs Squad DBNOs, Assists, and Revives", fontsize=20, color="blue")
plt.grid()
plt.show()
f, ax = plt.subplots(figsize=(15, 15))
sns.heatmap(train.corr(), annot=True, linewidths=0.5, fmt=".1f", ax=ax)
plt.show()
k = 5  # number of variables for heatmap
f, ax = plt.subplots(figsize=(11, 11))
cols = train.corr().nlargest(k, "winPlacePerc")["winPlacePerc"].index
cm = np.corrcoef(train[cols].values.T)
sns.set(font_scale=1.25)
hm = sns.heatmap(
    cm,
    cbar=True,
    annot=True,
    square=True,
    fmt=".2f",
    annot_kws={"size": 10},
    yticklabels=cols.values,
    xticklabels=cols.values,
)
plt.show()
train["playersJoined"] = train.groupby("matchId")["matchId"].transform("count")
data = train.copy()
data = data[data["playersJoined"] > 49]
train["killsNorm"] = train["kills"] * ((100 - train["playersJoined"]) / 100 + 1)
train["damageDealtNorm"] = train["damageDealt"] * (
    (100 - train["playersJoined"]) / 100 + 1
)
train[["playersJoined", "kills", "killsNorm", "damageDealt", "damageDealtNorm"]][5:8]
train["healsAndBoosts"] = train["heals"] + train["boosts"]
train["totalDistance"] = (
    train["walkDistance"] + train["rideDistance"] + train["swimDistance"]
)
train["boostsPerWalkDistance"] = train["boosts"] / (
    train["walkDistance"] + 1
)  # The +1 is to avoid infinity, because there are entries where boosts>0 and walkDistance=0. Strange.
train["boostsPerWalkDistance"].fillna(0, inplace=True)
train["healsPerWalkDistance"] = train["heals"] / (
    train["walkDistance"] + 1
)  # The +1 is to avoid infinity, because there are entries where heals>0 and walkDistance=0. Strange.
train["healsPerWalkDistance"].fillna(0, inplace=True)
train["healsAndBoostsPerWalkDistance"] = train["healsAndBoosts"] / (
    train["walkDistance"] + 1
)  # The +1 is to avoid infinity.
train["healsAndBoostsPerWalkDistance"].fillna(0, inplace=True)
train[
    [
        "walkDistance",
        "boosts",
        "boostsPerWalkDistance",
        "heals",
        "healsPerWalkDistance",
        "healsAndBoosts",
        "healsAndBoostsPerWalkDistance",
    ]
][40:45]
train["killsPerWalkDistance"] = train["kills"] / (
    train["walkDistance"] + 1
)  # The +1 is to avoid infinity, because there are entries where kills>0 and walkDistance=0. Strange.
train["killsPerWalkDistance"].fillna(0, inplace=True)
train[
    ["kills", "walkDistance", "rideDistance", "killsPerWalkDistance", "winPlacePerc"]
].sort_values(by="killsPerWalkDistance").tail(10)
train["team"] = [
    1 if i > 50 else 2 if (i > 25 & i <= 50) else 4 for i in train["numGroups"]
]
train.head()


================================================
FILE: stress_tests/kaggle/kaggle20.py
================================================
import matplotlib

matplotlib.use("PS")
import time

import matplotlib.pyplot as plt
import numpy as np  # linear algebra
import seaborn as sns  # data visualization library

import modin.pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

data = pd.read_csv("data.csv")
data.head()  # head method show only first 5 rows
col = data.columns
print(col)
y = data.diagnosis  # M or B
list = ["Unnamed: 32", "id", "diagnosis"]
x = data.drop(list, axis=1)
x.head()
ax = sns.countplot(y, label="Count")  # M = 212, B = 357
x.describe()
data_dia = y
data = x
data_n_2 = (data - data.mean()) / (data.std())  # standardization
data = pd.concat([y, data_n_2.iloc[:, 0:10]], axis=1)
data = pd.melt(data, id_vars="diagnosis", var_name="features", value_name="value")
plt.figure(figsize=(10, 10))
sns.violinplot(
    x="features", y="value", hue="diagnosis", data=data, split=True, inner="quart"
)
plt.xticks(rotation=90)
data = pd.concat([y, data_n_2.iloc[:, 10:20]], axis=1)
data = pd.melt(data, id_vars="diagnosis", var_name="features", value_name="value")
plt.figure(figsize=(10, 10))
sns.violinplot(
    x="features", y="value", hue="diagnosis", data=data, split=True, inner="quart"
)
plt.xticks(rotation=90)
data = pd.concat([y, data_n_2.iloc[:, 20:31]], axis=1)
data = pd.melt(data, id_vars="diagnosis", var_name="features", value_name="value")
plt.figure(figsize=(10, 10))
sns.violinplot(
    x="features", y="value", hue="diagnosis", data=data, split=True, inner="quart"
)
plt.xticks(rotation=90)
plt.figure(figsize=(10, 10))
sns.boxplot(x="features", y="value", hue="diagnosis", data=data)
plt.xticks(rotation=90)
sns.jointplot(
    x.loc[:, "concavity_worst"],
    x.loc[:, "concave points_worst"],
    kind="regg",
    color="#ce1414",
)
sns.set(style="white")
df = x.loc[:, ["radius_worst", "perimeter_worst", "area_worst"]]
g = sns.PairGrid(df, diag_sharey=False)
g.map_lower(sns.kdeplot, cmap="Blues_d")
g.map_upper(plt.scatter)
g.map_diag(sns.kdeplot, lw=3)
sns.set(style="whitegrid", palette="muted")
data_dia = y
data = x
data_n_2 = (data - data.mean()) / (data.std())  # standardization
data = pd.concat([y, data_n_2.iloc[:, 0:10]], axis=1)
data = pd.melt(data, id_vars="diagnosis", var_name="features", value_name="value")
plt.figure(figsize=(10, 10))
tic = time.time()
sns.swarmplot(x="features", y="value", hue="diagnosis", data=data)
plt.xticks(rotation=90)
data = pd.concat([y, data_n_2.iloc[:, 10:20]], axis=1)
data = pd.melt(data, id_vars="diagnosis", var_name="features", value_name="value")
plt.figure(figsize=(10, 10))
sns.swarmplot(x="features", y="value", hue="diagnosis", data=data)
plt.xticks(rotation=90)
data = pd.concat([y, data_n_2.iloc[:, 20:31]], axis=1)
data = pd.melt(data, id_vars="diagnosis", var_name="features", value_name="value")
plt.figure(figsize=(10, 10))
sns.swarmplot(x="features", y="value", hue="diagnosis", data=data)
toc = time.time()
plt.xticks(rotation=90)
print("swarm plot time: ", toc - tic, " s")
f, ax = plt.subplots(figsize=(18, 18))
sns.heatmap(x.corr(), annot=True, linewidths=0.5, fmt=".1f", ax=ax)
drop_list1 = [
    "perimeter_mean",
    "radius_mean",
    "compactness_mean",
    "concave points_mean",
    "radius_se",
    "perimeter_se",
    "radius_worst",
    "perimeter_worst",
    "compactness_worst",
    "concave points_worst",
    "compactness_se",
    "concave points_se",
    "texture_worst",
    "area_worst",
]
x_1 = x.drop(drop_list1, axis=1)  # do not modify x, we will use it later
x_1.head()
f, ax = plt.subplots(figsize=(14, 14))
sns.heatmap(x_1.corr(), annot=True, linewidths=0.5, fmt=".1f", ax=ax)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix  # f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    x_1, y, test_size=0.3, random_state=42
)
clf_rf = RandomForestClassifier(random_state=43)
clr_rf = clf_rf.fit(x_train, y_train)
ac = accuracy_score(y_test, clf_rf.predict(x_test))
print("Accuracy is: ", ac)
cm = confusion_matrix(y_test, clf_rf.predict(x_test))
sns.heatmap(cm, annot=True, fmt="d")
from sklearn.feature_selection import SelectKBest, chi2

select_feature = SelectKBest(chi2, k=5).fit(x_train, y_train)
print("Score list:", select_feature.scores_)
print("Feature list:", x_train.columns)
x_train_2 = select_feature.transform(x_train)
x_test_2 = select_feature.transform(x_test)
clf_rf_2 = RandomForestClassifier()
clr_rf_2 = clf_rf_2.fit(x_train_2, y_train)
ac_2 = accuracy_score(y_test, clf_rf_2.predict(x_test_2))
print("Accuracy is: ", ac_2)
cm_2 = confusion_matrix(y_test, clf_rf_2.predict(x_test_2))
sns.heatmap(cm_2, annot=True, fmt="d")
from sklearn.feature_selection import RFE

clf_rf_3 = RandomForestClassifier()
rfe = RFE(estimator=clf_rf_3, n_features_to_select=5, step=1)
rfe = rfe.fit(x_train, y_train)
print("Chosen best 5 feature by rfe:", x_train.columns[rfe.support_])
from sklearn.feature_selection import RFECV

clf_rf_4 = RandomForestClassifier()
rfecv = RFECV(
    estimator=clf_rf_4, step=1, cv=5, scoring="accuracy"
)  # 5-fold cross-validation
rfecv = rfecv.fit(x_train, y_train)
print("Optimal number of features :", rfecv.n_features_)
print("Best features :", x_train.columns[rfecv.support_])
import matplotlib.pyplot as plt

plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score of number of selected features")
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
plt.show()
clf_rf_5 = RandomForestClassifier()
clr_rf_5 = clf_rf_5.fit(x_train, y_train)
importances = clr_rf_5.feature_importances_
std = np.std([tree.feature_importances_ for tree in clf_rf.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
print("Feature ranking:")
for f in range(x_train.shape[1]):
    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
plt.figure(1, figsize=(14, 13))
plt.title("Feature importances")
plt.bar(
    range(x_train.shape[1]),
    importances[indices],
    color="g",
    yerr=std[indices],
    align="center",
)
plt.xticks(range(x_train.shape[1]), x_train.columns[indices], rotation=90)
plt.xlim([-1, x_train.shape[1]])
plt.show()
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=42
)
x_train_N = (x_train - x_train.mean()) / (x_train.max() - x_train.min())
x_test_N = (x_test - x_test.mean()) / (x_test.max() - x_test.min())
from sklearn.decomposition import PCA

pca = PCA()
pca.fit(x_train_N)
plt.figure(1, figsize=(14, 13))
plt.clf()
plt.axes([0.2, 0.2, 0.7, 0.7])
plt.plot(pca.explained_variance_ratio_, linewidth=2)
plt.axis("tight")
plt.xlabel("n_components")
plt.ylabel("explained_variance_ratio_")


================================================
FILE: stress_tests/kaggle/kaggle22.py
================================================
import matplotlib

matplotlib.use("PS")
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer  # CountVectorizer
from sklearn.linear_model import LogisticRegression

import modin.pandas as pd

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
subm = pd.read_csv("sample_submission.csv")
train.head()
train["comment_text"][0]
train["comment_text"][2]
lens = train.comment_text.str.len()
lens.mean(), lens.std(), lens.max()
lens.hist()
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
train["none"] = 1 - train[label_cols].max(axis=1)
train.describe()
len(train), len(test)
COMMENT = "comment_text"
train[COMMENT].fillna("unknown", inplace=True)
test[COMMENT].fillna("unknown", inplace=True)
import re
import string

re_tok = re.compile(f"([{string.punctuation}“”¨«»®´·º½¾¿¡§£₤‘’])")


def tokenize(s):
    return re_tok.sub(r" \1 ", s).split()


n = train.shape[0]
vec = TfidfVectorizer(
    ngram_range=(1, 2),
    tokenizer=tokenize,
    min_df=3,
    max_df=0.9,
    strip_accents="unicode",
    use_idf=1,
    smooth_idf=1,
    sublinear_tf=1,
)
trn_term_doc = vec.fit_transform(train[COMMENT])
test_term_doc = vec.transform(test[COMMENT])
trn_term_doc, test_term_doc


def pr(y_i, y):
    p = x[y == y_i].sum(0)
    return (p + 1) / ((y == y_i).sum() + 1)


x = trn_term_doc
test_x = test_term_doc


def get_mdl(y):
    y = y.values
    r = np.log(pr(1, y) / pr(0, y))
    m = LogisticRegression(C=4, dual=True)
    x_nb = x.multiply(r)
    return m.fit(x_nb, y), r


preds = np.zeros((len(test), len(label_cols)))
for i, j in enumerate(label_cols):
    print("fit", j)
    m, r = get_mdl(train[j])
    preds[:, i] = m.predict_proba(test_x.multiply(r))[:, 1]
submid = pd.DataFrame({"id": subm["id"]})
submission = pd.concat([submid, pd.DataFrame(preds, columns=label_cols)], axis=1)
submission.to_csv("submission.csv", index=False)


================================================
FILE: stress_tests/kaggle/kaggle3.py
================================================
#!/usr/bin/env python
import matplotlib

matplotlib.use("PS")
import matplotlib.pyplot as plt
import numpy as np  # linear algebra
import seaborn as sns  # visualization tool

import modin.pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

data = pd.read_csv("pokemon.csv")
data.info()
data.corr()
f, ax = plt.subplots(figsize=(18, 18))
sns.heatmap(data.corr(), annot=True, linewidths=0.5, fmt=".1f", ax=ax)
data.head(10)
data.columns
data.Speed.plot(
    kind="line",
    color="g",
    label="Speed",
    linewidth=1,
    alpha=0.5,
    grid=True,
    linestyle=":",
)
data.Defense.plot(
    color="r", label="Defense", linewidth=1, alpha=0.5, grid=True, linestyle="-."
)
plt.legend(loc="upper right")  # legend = puts label into plot
plt.xlabel("x axis")  # label = name of label
plt.ylabel("y axis")
plt.title("Line Plot")  # title = title of plot
data.plot(kind="scatter", x="Attack", y="Defense", alpha=0.5, color="red")
plt.xlabel("Attack")  # label = name of label
plt.ylabel("Defence")
plt.title("Attack Defense Scatter Plot")  # title = title of plot
data.Speed.plot(kind="hist", bins=50, figsize=(12, 12))
data.Speed.plot(kind="hist", bins=50)
dictionary = {"spain": "madrid", "usa": "vegas"}
print(dictionary.keys())
print(dictionary.values())
dictionary["spain"] = "barcelona"  # update existing entry
print(dictionary)
dictionary["france"] = "paris"  # Add new entry
print(dictionary)
del dictionary["spain"]  # remove entry with key 'spain'
print(dictionary)
print("france" in dictionary)  # check include or not
dictionary.clear()  # remove all entries in dict
print(dictionary)
print(dictionary)  # it gives error because dictionary is deleted
data = pd.read_csv("pokemon.csv")
series = data["Defense"]  # data['Defense'] = series
print(type(series))
data_frame = data[["Defense"]]  # data[['Defense']] = data frame
print(type(data_frame))
print(3 > 2)
print(3 != 2)
print(True and False)
print(True or False)
x = (
    data["Defense"] > 200
)  # There are only 3 pokemons who have higher defense value than 200
data[x]
data[np.logical_and(data["Defense"] > 200, data["Attack"] > 100)]
data[(data["Defense"] > 200) & (data["Attack"] > 100)]
i = 0
while i != 5:
    print("i is: ", i)
    i += 1
print(i, " is equal to 5")
lis = [1, 2, 3, 4, 5]
for i in lis:
    print("i is: ", i)
print("")
for index, value in enumerate(lis):
    print(index, " : ", value)
print("")
dictionary = {"spain": "madrid", "france": "paris"}
for key, value in dictionary.items():
    print(key, " : ", value)
print("")
for index, value in data[["Attack"]][0:1].iterrows():
    print(index, " : ", value)


def tuble_ex():
    """return defined t tuble"""
    t = (1, 2, 3)
    return t


a, b, c = tuble_ex()
print(a, b, c)
x = 2


def f():
    x = 3
    return x


print(x)  # x = 2 global scope
print(f())  # x = 3 local scope
x = 5


def f():
    y = 2 * x  # there is no local scope x
    return y


print(f())  # it uses global scope x
import builtins

dir(builtins)


def square():
    """return square of value"""

    def add():
        """add two local variable"""
        x = 2
        y = 3
        z = x + y
        return z

    return add() ** 2


print(square())


def f(a, b=1, c=2):
    y = a + b + c
    return y


print(f(5))
print(f(5, 4, 3))


def f(*args):
    for i in args:
        print(i)


f(1)
print("")
f(1, 2, 3, 4)


def f(**kwargs):
    """print key and value of dictionary"""
    for (
        key,
        value,
    ) in (
        kwargs.items()
    ):  # If you do not understand this part turn for loop part and look at dictionary in for loop
        print(key, " ", value)


f(country="spain", capital="madrid", population=123456)
number_list = [1, 2, 3]
y = map(lambda x: x**2, number_list)
print(list(y))
name = "ronaldo"
it = iter(name)
print(next(it))  # print next iteration
print(*it)  # print remaining iteration
list1 = [1, 2, 3, 4]
list2 = [5, 6, 7, 8]
z = zip(list1, list2)
print(z)
z_list = list(z)
print(z_list)
un_zip = zip(*z_list)
un_list1, un_list2 = list(un_zip)  # unzip returns tuble
print(un_list1)
print(un_list2)
print(type(un_list2))
num1 = [1, 2, 3]
num2 = [i + 1 for i in num1]
print(num2)
num1 = [5, 10, 15]
num2 = [i**2 if i == 10 else i - 5 if i < 7 else i + 5 for i in num1]
print(num2)
threshold = sum(data.Speed) / len(data.Speed)
data["speed_level"] = ["high" if i > threshold else "low" for i in data.Speed]
data.loc[:10, ["speed_level", "Speed"]]  # we will learn loc more detailed later
data = pd.read_csv("pokemon.csv")
data.head()  # head shows first 5 rows
data.tail()
data.columns
data.shape
data.info()
print(
    data["Type 1"].value_counts(dropna=False)
)  # if there are nan values that also be counted
data.describe()  # ignore null entries
data.boxplot(column="Attack", by="Legendary")
data_new = data.head()  # I only take 5 rows into new data
data_new
melted = pd.melt(frame=data_new, id_vars="Name", value_vars=["Attack", "Defense"])
melted
melted.pivot(index="Name", columns="variable", values="value")
data1 = data.head()
data2 = data.tail()
conc_data_row = pd.concat(
    [data1, data2], axis=0, ignore_index=True
)  # axis = 0 : adds dataframes in row
conc_data_row
data1 = data["Attack"].head()
data2 = data["Defense"].head()
conc_data_col = pd.concat([data1, data2], axis=1)  # axis = 0 : adds dataframes in row
conc_data_col
data.dtypes
data["Type 1"] = data["Type 1"].astype("category")
data["Speed"] = data["Speed"].astype("float")
data.dtypes
data.info()
data["Type 2"].value_counts(dropna=False)
data1 = (
    data  # also we will use data to fill missing value so I assign it to data1 variable
)
data1["Type 2"].dropna(
    inplace=True
)  # inplace = True means we do not assign it to new variable. Changes automatically assigned to data
assert 1 == 1  # return nothing because it is true
assert data["Type 2"].notnull().all()  # returns nothing because we drop nan values
data["Type 2"].fillna("empty", inplace=True)
assert (
    data["Type 2"].notnull().all()
)  # returns nothing because we do not have nan values
country = ["Spain", "France"]
population = ["11", "12"]
list_label = ["country", "population"]
list_col = [country, population]
zipped = list(zip(list_label, list_col))
data_dict = dict(zipped)
df = pd.DataFrame(data_dict)
df
df["capital"] = ["madrid", "paris"]
df
df["income"] = 0  # Broadcasting entire column
df
data1 = data.loc[:, ["Attack", "Defense", "Speed"]]
data1.plot()
data1.plot(subplots=True)
plt.show()
data1.plot(kind="scatter", x="Attack", y="Defense")
plt.show()
data1.plot(kind="hist", y="Defense", bins=50, range=(0, 250), normed=True)
fig, axes = plt.subplots(nrows=2, ncols=1)
data1.plot(kind="hist", y="Defense", bins=50, range=(0, 250), normed=True, ax=axes[0])
data1.plot(
    kind="hist",
    y="Defense",
    bins=50,
    range=(0, 250),
    normed=True,
    ax=axes[1],
    cumulative=True,
)
plt.savefig("graph.png")
plt
data.describe()
time_list = ["1992-03-08", "1992-04-12"]
print(type(time_list[1]))  # As you can see date is string
datetime_object = pd.to_datetime(time_list)
print(type(datetime_object))
import warnings

warnings.filterwarnings("ignore")
data2 = data.head()
date_list = ["1992-01-10", "1992-02-10", "1992-03-10", "1993-03-15", "1993-03-16"]
datetime_object = pd.to_datetime(date_list)
data2["date"] = datetime_object
data2 = data2.set_index("date")
data2
print(data2.loc["1993-03-16"])
print(data2.loc["1992-03-10":"1993-03-16"])
data2.resample("A").mean()
data2.resample("M").mean()
data2.resample("M").first().interpolate("linear")
data2.resample("M").mean().interpolate("linear")
data = pd.read_csv("pokemon.csv")
data = data.set_index("#")
data.head()
data["HP"][1]
data.HP[1]
data.loc[1, ["HP"]]
data[["HP", "Attack"]]
print(type(data["HP"]))  # series
print(type(data[["HP"]]))  # data frames
data.loc[1:10, "HP":"Defense"]  # 10 and "Defense" are inclusive
data.loc[10:1:-1, "HP":"Defense"]
data.loc[1:10, "Speed":]
boolean = data.HP > 200
data[boolean]
first_filter = data.HP > 150
second_filter = data.Speed > 35
data[first_filter & second_filter]
data.HP[data.Speed < 15]


def div(n):
    return n / 2


data.HP.apply(div)
data.HP.apply(lambda n: n / 2)
data["total_power"] = data.Attack + data.Defense
data.head()
print(data.index.name)
data.index.name = "index_name"
data.head()
data.head()
data3 = data.copy()
data3.index = range(100, 100 + len(data3.index), 1)
data3.head()
data = pd.read_csv("pokemon.csv")
data.head()
data1 = data.set_index(["Type 1", "Type 2"])
data1.head(100)
dic = {
    "treatment": ["A", "A", "B", "B"],
    "gender": ["F", "M", "F", "M"],
    "response": [10, 45, 5, 9],
    "age": [15, 4, 72, 65],
}
df = pd.DataFrame(dic)
df
df.pivot(index="treatment", columns="gender", values="response")
df1 = df.set_index(["treatment", "gender"])
df1
df1.unstack(level=0)
df1.unstack(level=1)
df2 = df1.swaplevel(0, 1)
df2
df
pd.melt(df, id_vars="treatment", value_vars=["age", "response"])
df
df.groupby("treatment").mean()  # mean is aggregation / reduce method
df.groupby("treatment").age.max()
df.groupby("treatment")[["age", "response"]].min()
df.info()


================================================
FILE: stress_tests/kaggle/kaggle4.py
================================================
import matplotlib

matplotlib.use("PS")
import matplotlib.pyplot as plt  # Matlab-style plotting
import numpy as np  # linear algebra
import seaborn as sns

import modin.pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

color = sns.color_palette()
sns.set_style("darkgrid")
import warnings


def ignore_warn(*args, **kwargs):
    pass


warnings.warn = ignore_warn  # ignore annoying warning (from sklearn and seaborn)
from scipy import stats
from scipy.stats import norm, skew  # for some statistics

pd.set_option(
    "display.float_format", lambda x: "{:.3f}".format(x)
)  # Limiting floats output to 3 decimal points
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
train.head(5)
test.head(5)
print("The train data size before dropping Id feature is : {} ".format(train.shape))
print("The test data size before dropping Id feature is : {} ".format(test.shape))
train_ID = train["Id"]
test_ID = test["Id"]
train.drop("Id", axis=1, inplace=True)
test.drop("Id", axis=1, inplace=True)
print("\nThe train data size after dropping Id feature is : {} ".format(train.shape))
print("The test data size after dropping Id feature is : {} ".format(test.shape))
fig, ax = plt.subplots()
ax.scatter(x=train["GrLivArea"], y=train["SalePrice"])
plt.ylabel("SalePrice", fontsize=13)
plt.xlabel("GrLivArea", fontsize=13)
plt.show()
train = train.drop(
    train[(train["GrLivArea"] > 4000) & (train["SalePrice"] < 300000)].index
)
fig, ax = plt.subplots()
ax.scatter(train["GrLivArea"], train["SalePrice"])
plt.ylabel("SalePrice", fontsize=13)
plt.xlabel("GrLivArea", fontsize=13)
plt.show()
sns.distplot(train["SalePrice"], fit=norm)
(mu, sigma) = norm.fit(train["SalePrice"])
print("\n mu = {:.2f} and sigma = {:.2f}\n".format(mu, sigma))
plt.legend(
    [r"Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )".format(mu, sigma)],
    loc="best",  # noqa: W605
)
plt.ylabel("Frequency")
plt.title("SalePrice distribution")
fig = plt.figure()
res = stats.probplot(train["SalePrice"], plot=plt)
plt.show()
train["SalePrice"] = np.log1p(train["SalePrice"])
sns.distplot(train["SalePrice"], fit=norm)
(mu, sigma) = norm.fit(train["SalePrice"])
print("\n mu = {:.2f} and sigma = {:.2f}\n".format(mu, sigma))
plt.legend(
    [r"Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )".format(mu, sigma)],
    loc="best",  # noqa: W605
)
plt.ylabel("Frequency")
plt.title("SalePrice distribution")
fig = plt.figure()
res = stats.probplot(train["SalePrice"], plot=plt)
plt.show()
ntrain = train.shape[0]
ntest = test.shape[0]
y_train = train.SalePrice.values
all_data = pd.concat((train, test)).reset_index(drop=True)
all_data.drop(["SalePrice"], axis=1, inplace=True)
print("all_data size is : {}".format(all_data.shape))
all_data_na = (all_data.isnull().sum() / len(all_data)) * 100
all_data_na = all_data_na.drop(all_data_na[all_data_na == 0].index).sort_values(
    ascending=False
)[:30]
missing_data = pd.DataFrame({"Missing Ratio": all_data_na})
missing_data.head(20)
corrmat = train.corr()
plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmax=0.9, square=True)
all_data["PoolQC"] = all_data["PoolQC"].fillna("None")
all_data["MiscFeature"] = all_data["MiscFeature"].fillna("None")
all_data["Alley"] = all_data["Alley"].fillna("None")
all_data["Fence"] = all_data["Fence"].fillna("None")
all_data["FireplaceQu"] = all_data["FireplaceQu"].fillna("None")
all_data["LotFrontage"] = all_data.groupby("Neighborhood")["LotFrontage"].transform(
    lambda x: x.fillna(x.median())
)
for col in ("GarageType", "GarageFinish", "GarageQual", "GarageCond"):
    all_data[col] = all_data[col].fillna("None")
for col in ("GarageYrBlt", "GarageArea", "GarageCars"):
    all_data[col] = all_data[col].fillna(0)
for col in (
    "BsmtFinSF1",
    "BsmtFinSF2",
    "BsmtUnfSF",
    "TotalBsmtSF",
    "BsmtFullBath",
    "BsmtHalfBath",
):
    all_data[col] = all_data[col].fillna(0)
for col in ("BsmtQual", "BsmtCond", "BsmtExposure", "BsmtFinType1", "BsmtFinType2"):
    all_data[col] = all_data[col].fillna("None")
all_data["MasVnrType"] = all_data["MasVnrType"].fillna("None")
all_data["MasVnrArea"] = all_data["MasVnrArea"].fillna(0)
all_data["MSZoning"] = all_data["MSZoning"].fillna(all_data["MSZoning"].mode()[0])
all_data = all_data.drop(["Utilities"], axis=1)
all_data["Functional"] = all_data["Functional"].fillna("Typ")
all_data["Electrical"] = all_data["Electrical"].fillna(all_data["Electrical"].mode()[0])
all_data["KitchenQual"] = all_data["KitchenQual"].fillna(
    all_data["KitchenQual"].mode()[0]
)
all_data["Exterior1st"] = all_data["Exterior1st"].fillna(
    all_data["Exterior1st"].mode()[0]
)
all_data["Exterior2nd"] = all_data["Exterior2nd"].fillna(
    all_data["Exterior2nd"].mode()[0]
)
all_data["SaleType"] = all_data["SaleType"].fillna(all_data["SaleType"].mode()[0])
all_data["MSSubClass"] = all_data["MSSubClass"].fillna("None")
all_data_na = (all_data.isnull().sum() / len(all_data)) * 100
all_data_na = all_data_na.drop(all_data_na[all_data_na == 0].index).sort_values(
    ascending=False
)
missing_data = pd.DataFrame({"Missing Ratio": all_data_na})
missing_data.head()
all_data["MSSubClass"] = all_data["MSSubClass"].apply(str)
all_data["OverallCond"] = all_data["OverallCond"].astype(str)
all_data["YrSold"] = all_data["YrSold"].astype(str)
all_data["MoSold"] = all_data["MoSold"].astype(str)
from sklearn.preprocessing import LabelEncoder

cols = (
    "FireplaceQu",
    "BsmtQual",
    "BsmtCond",
    "GarageQual",
    "GarageCond",
    "ExterQual",
    "ExterCond",
    "HeatingQC",
    "PoolQC",
    "KitchenQual",
    "BsmtFinType1",
    "BsmtFinType2",
    "Functional",
    "Fence",
    "BsmtExposure",
    "GarageFinish",
    "LandSlope",
    "LotShape",
    "PavedDrive",
    "Street",
    "Alley",
    "CentralAir",
    "MSSubClass",
    "OverallCond",
    "YrSold",
    "MoSold",
)
for c in cols:
    lbl = LabelEncoder()
    lbl.fit(list(all_data[c].values))
    all_data[c] = lbl.transform(list(all_data[c].values))
print("Shape all_data: {}".format(all_data.shape))
all_data["TotalSF"] = (
    all_data["TotalBsmtSF"] + all_data["1stFlrSF"] + all_data["2ndFlrSF"]
)
numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index
skewed_feats = (
    all_data[numeric_feats]
    .apply(lambda x: skew(x.dropna()))
    .sort_values(ascending=False)
)
print("\nSkew in numerical features: \n")
skewness = pd.DataFrame({"Skew": skewed_feats})
skewness.head(10)
skewness = skewness[abs(skewness) > 0.75]
print(
    "There are {} skewed numerical features to Box Cox transform".format(
        skewness.shape[0]
    )
)
from scipy.special import boxcox1p

skewed_features = skewness.index
lam = 0.15
for feat in skewed_features:
    # all_data[feat] += 1
    all_data[feat] = boxcox1p(all_data[feat], lam)
all_data = pd.get_dummies(all_data)
print(all_data.shape)
train = all_data[:ntrain]
test = all_data[ntrain:]
import lightgbm as lgb
import xgboost as xgb
from sklearn.base import BaseEstimator, RegressorMixin, TransformerMixin, clone
from sklearn.ensemble import GradientBoostingRegressor  # RandomForestRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import ElasticNet  # BayesianRidge, LassoLarsIC
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold, cross_val_score  # train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler

n_folds = 5


def rmsle_cv(model):
    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)
    rmse = np.sqrt(
        -cross_val_score(
            model, train.values, y_train, scoring="neg_mean_squared_error", cv=kf
        )
    )
    return rmse


lasso = make_pipeline(RobustScaler(), Lasso(alpha=0.0005, random_state=1))
ENet = make_pipeline(
    RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=0.9, random_state=3)
)
KRR = KernelRidge(alpha=0.6, kernel="polynomial", degree=2, coef0=2.5)
GBoost = GradientBoostingRegressor(
    n_estimators=1,
    learning_rate=0.05,
    max_depth=4,
    max_features="sqrt",
    min_samples_leaf=15,
    min_samples_split=10,
    loss="huber",
    random_state=5,
)
model_xgb = xgb.XGBRegressor(
    colsample_bytree=0.4603,
    gamma=0.0468,
    learning_rate=0.05,
    max_depth=3,
    min_child_weight=1.7817,
    n_estimators=1,
    reg_alpha=0.4640,
    reg_lambda=0.8571,
    subsample=0.5213,
    silent=1,
    random_state=7,
    nthread=-1,
)
model_lgb = lgb.LGBMRegressor(
    objective="regression",
    num_leaves=5,
    learning_rate=0.05,
    n_estimators=1,
    max_bin=55,
    bagging_fraction=0.8,
    bagging_freq=5,
    feature_fraction=0.2319,
    feature_fraction_seed=9,
    bagging_seed=9,
    min_data_in_leaf=6,
    min_sum_hessian_in_leaf=11,
)
score = rmsle_cv(lasso)
print("\nLasso score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(ENet)
print("ElasticNet score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(KRR)
print("Kernel Ridge score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(GBoost)
print("Gradient Boosting score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(model_xgb)
print("Xgboost score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(model_lgb)
print("LGBM score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        self.models_ = [clone(x) for x in self.models]
        for model in self.models_:
            model.fit(X, y)
        return self

    def predict(self, X):
        predictions = np.column_stack([model.predict(X) for model in self.models_])
        return np.mean(predictions, axis=1)


averaged_models = AveragingModels(models=(ENet, GBoost, KRR, lasso))
score = rmsle_cv(averaged_models)
print(
    " Averaged base models score: {:.4f} ({:.4f})\n".format(score.mean(), score.std())
)


class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=5):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds

    def fit(self, X, y):
        self.base_models_ = [[] for _ in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                self.base_models_[i].append(instance)
                instance.fit(X[train_index], y[train_index])
                y_pred = instance.predict(X[holdout_index])
                out_of_fold_predictions[holdout_index, i] = y_pred
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self


def predict(self, X):
    meta_features = np.column_stack(
        [
            np.column_stack([model.predict(X) for model in base_models]).mean(axis=1)
            for base_models in self.base_models_
        ]
    )
    return self.meta_model_.predict(meta_features)


stacked_averaged_models = StackingAveragedModels(
    base_models=(ENet, GBoost, KRR), meta_model=lasso
)
score = rmsle_cv(stacked_averaged_models)
print(
    "Stacking Averaged models score: {:.4f} ({:.4f})".format(score.mean(), score.std())
)


def rmsle(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))


stacked_averaged_models.fit(train.values, y_train)
stacked_train_pred = stacked_averaged_models.predict(train.values)
stacked_pred = np.expm1(stacked_averaged_models.predict(test.values))
print(rmsle(y_train, stacked_train_pred))
model_xgb.fit(train, y_train)
xgb_train_pred = model_xgb.predict(train)
xgb_pred = np.expm1(model_xgb.predict(test))
print(rmsle(y_train, xgb_train_pred))
model_lgb.fit(train, y_train)
lgb_train_pred = model_lgb.predict(train)
lgb_pred = np.expm1(model_lgb.predict(test.values))
print(rmsle(y_train, lgb_train_pred))
print("RMSLE score on train data:")
print(
    rmsle(
        y_train,
        stacked_train_pred * 0.70 + xgb_train_pred * 0.15 + lgb_train_pred * 0.15,
    )
)
ensemble = stacked_pred * 0.70 + xgb_pred * 0.15 + lgb_pred * 0.15
sub = pd.DataFrame()
sub["Id"] = test_ID
sub["SalePrice"] = ensemble
sub.to_csv("submission.csv", index=False)


================================================
FILE: stress_tests/kaggle/kaggle5.py
================================================
import matplotlib

matplotlib.use("PS")
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron, SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier

import modin.pandas as pd

train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
combine = [train_df, test_df]
print(train_df.columns.values)
train_df.head()
train_df.tail()
train_df.info()
print("_" * 40)
test_df.info()
train_df.describe()
train_df.describe(include=["O"])
train_df[["Pclass", "Survived"]].groupby(["Pclass"], as_index=False).mean().sort_values(
    by="Survived", ascending=False
)
train_df[["Sex", "Survived"]].groupby(["Sex"], as_index=False).mean().sort_values(
    by="Survived", ascending=False
)
train_df[["SibSp", "Survived"]].groupby(["SibSp"], as_index=False).mean().sort_values(
    by="Survived", ascending=False
)
train_df[["Parch", "Survived"]].groupby(["Parch"], as_index=False).mean().sort_values(
    by="Survived", ascending=False
)
grid = sns.FacetGrid(train_df, col="Survived", row="Pclass", size=2.2, aspect=1.6)
grid.map(plt.hist, "Age", alpha=0.5, bins=20)
grid.add_legend()
grid = sns.FacetGrid(train_df, row="Embarked", size=2.2, aspect=1.6)
grid.map(sns.pointplot, "Pclass", "Survived", "Sex", palette="deep")
grid.add_legend()
grid = sns.FacetGrid(train_df, row="Embarked", col="Survived", size=2.2, aspect=1.6)
grid.map(sns.barplot, "Sex", "Fare", alpha=0.5, ci=None)
grid.add_legend()
print("Before", train_df.shape, test_df.shape, combine[0].shape, combine[1].shape)
train_df = train_df.drop(["Ticket", "Cabin"], axis=1)
test_df = test_df.drop(["Ticket", "Cabin"], axis=1)
combine = [train_df, test_df]
"After", train_df.shape, test_df.shape, combine[0].shape, combine[1].shape
for dataset in combine:
    dataset["Title"] = dataset.Name.str.extract(
        r" ([A-Za-z]+)\.", expand=False
    )  # noqa: W605
pd.crosstab(train_df["Title"], train_df["Sex"])
for dataset in combine:
    dataset["Title"] = dataset["Title"].replace(
        [
            "Lady",
            "Countess",
            "Capt",
            "Col",
            "Don",
            "Dr",
            "Major",
            "Rev",
            "Sir",
            "Jonkheer",
            "Dona",
        ],
        "Rare",
    )
    dataset["Title"] = dataset["Title"].replace("Mlle", "Miss")
    dataset["Title"] = dataset["Title"].replace("Ms", "Miss")
    dataset["Title"] = dataset["Title"].replace("Mme", "Mrs")
train_df[["Title", "Survived"]].groupby(["Title"], as_index=False).mean()


def title_mapping(string):
    return np.random.randint(1, high=6)


for dataset in combine:
    dataset["Title"] = dataset["Title"].map(title_mapping)
    dataset["Title"] = dataset["Title"].fillna(0)
train_df.head()
train_df = train_df.drop(["Name", "PassengerId"], axis=1)
test_df = test_df.drop(["Name"], axis=1)
combine = [train_df, test_df]
train_df.shape, test_df.shape


def gender_mapping(string):
    return np.random.randint(0, high=2)


for dataset in combine:
    # dataset['Sex'] = dataset['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
    dataset["Sex"] = dataset["Sex"].map(gender_mapping).astype(int)
train_df.head()
grid = sns.FacetGrid(train_df, row="Pclass", col="Sex", size=2.2, aspect=1.6)
grid.map(plt.hist, "Age", alpha=0.5, bins=20)
grid.add_legend()
guess_ages = np.zeros((2, 3))
guess_ages
for dataset in combine:
    for i in range(0, 2):
        for j in range(0, 3):
            guess_df = dataset[(dataset["Sex"] == i) & (dataset["Pclass"] == j + 1)][
                "Age"
            ].dropna()
# age_mean = guess_df.mean()
# age_std = guess_df.std()
# age_guess = rnd.uniform(age_mean - age_std, age_mean + age_std)
age_guess = guess_df.median()
# Convert random age float to nearest .5 age
guess_ages[i, j] = int(age_guess / 0.5 + 0.5) * 0.5
for i in range(0, 2):
    for j in range(0, 3):
        dataset.loc[
            (dataset.Age.isnull()) & (dataset.Sex == i) & (dataset.Pclass == j + 1),
            "Age",
        ] = guess_ages[i, j]
dataset["Age"] = dataset["Age"].astype(int)
train_df.head()
train_df["AgeBand"] = pd.cut(train_df["Age"], 5)
train_df[["AgeBand", "Survived"]].groupby(
    ["AgeBand"], as_index=False
).mean().sort_values(by="AgeBand", ascending=True)
for dataset in combine:
    dataset.loc[dataset["Age"] <= 16, "Age"] = 0
    dataset.loc[(dataset["Age"] > 16) & (dataset["Age"] <= 32), "Age"] = 1
    dataset.loc[(dataset["Age"] > 32) & (dataset["Age"] <= 48), "Age"] = 2
    dataset.loc[(dataset["Age"] > 48) & (dataset["Age"] <= 64), "Age"] = 3
    dataset.loc[dataset["Age"] > 64, "Age"]
train_df.head()
train_df = train_df.drop(["AgeBand"], axis=1)
combine = [train_df, test_df]
train_df.head()
for dataset in combine:
    dataset["FamilySize"] = dataset["SibSp"] + dataset["Parch"] + 1
train_df[["FamilySize", "Survived"]].groupby(
    ["FamilySize"], as_index=False
).mean().sort_values(by="Survived", ascending=False)
for dataset in combine:
    dataset["IsAlone"] = 0
    dataset.loc[dataset["FamilySize"] == 1, "IsAlone"] = 1
train_df[["IsAlone", "Survived"]].groupby(["IsAlone"], as_index=False).mean()
train_df = train_df.drop(["Parch", "SibSp", "FamilySize"], axis=1)
test_df = test_df.drop(["Parch", "SibSp", "FamilySize"], axis=1)
combine = [train_df, test_df]
train_df.head()
for dataset in combine:
    dataset["Age*Class"] = dataset.Age * dataset.Pclass
train_df.loc[:, ["Age*Class", "Age", "Pclass"]].head(10)
freq_port = train_df.Embarked.dropna().mode()[0]
freq_port
for dataset in combine:
    dataset["Embarked"] = dataset["Embarked"].fillna(freq_port)
train_df[["Embarked", "Survived"]].groupby(
    ["Embarked"], as_index=False
).mean().sort_values(by="Survived", ascending=False)


def embarked_mapping(string):
    return np.random.randint(0, high=3)


for dataset in combine:
    dataset["Embarked"] = dataset["Embarked"].map({"S": 0, "C": 1, "Q": 2}).astype(int)
train_df.head()
test_df["Fare"].fillna(test_df["Fare"].dropna().median(), inplace=True)
test_df.head()
train_df["FareBand"] = pd.qcut(train_df["Fare"], 4)
train_df[["FareBand", "Survived"]].groupby(
    ["FareBand"], as_index=False
).mean().sort_values(by="FareBand", ascending=True)
for dataset in combine:
    dataset.loc[dataset["Fare"] <= 7.91, "Fare"] = 0
    dataset.loc[(dataset["Fare"] > 7.91) & (dataset["Fare"] <= 14.454), "Fare"] = 1
    dataset.loc[(dataset["Fare"] > 14.454) & (dataset["Fare"] <= 31), "Fare"] = 2
    dataset.loc[dataset["Fare"] > 31, "Fare"] = 3
    dataset["Fare"] = dataset["Fare"].astype(int)
train_df = train_df.drop(["FareBand"], axis=1)
combine = [train_df, test_df]
train_df.head(10)
test_df.head(10)
X_train = train_df.drop("Survived", axis=1)
Y_train = train_df["Survived"]
X_test = test_df.drop("PassengerId", axis=1).copy()
X_train.shape, Y_train.shape, X_test.shape
logreg = LogisticRegression()
logreg.fit(X_train, Y_train)
Y_pred = logreg.predict(X_test)
acc_log = round(logreg.score(X_train, Y_train) * 100, 2)
acc_log
coeff_df = pd.DataFrame(train_df.columns.delete(0))
coeff_df.columns = ["Feature"]
coeff_df["Correlation"] = pd.Series(logreg.coef_[0])
coeff_df.sort_values(by="Correlation", ascending=False)
svc = SVC()
svc.fit(X_train, Y_train)
Y_pred = svc.predict(X_test)
acc_svc = round(svc.score(X_train, Y_train) * 100, 2)
acc_svc
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
acc_knn = round(knn.score(X_train, Y_train) * 100, 2)
acc_knn
gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
Y_pred = gaussian.predict(X_test)
acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)
acc_gaussian
perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)
acc_perceptron
linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
Y_pred = linear_svc.predict(X_test)
acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2)
acc_linear_svc
sgd = SGDClassifier()
sgd.fit(X_train, Y_train)
Y_pred = sgd.predict(X_test)
acc_sgd = round(sgd.score(X_train, Y_train) * 100, 2)
acc_sgd
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, Y_train)
Y_pred = decision_tree.predict(X_test)
acc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2)
acc_decision_tree
random_forest = RandomForestClassifier(n_estimators=1)
random_forest.fit(X_train, Y_train)
Y_pred = random_forest.predict(X_test)
random_forest.score(X_train, Y_train)
acc_random_forest = round(random_forest.score(X_train, Y_train) * 100, 2)
acc_random_forest
models = pd.DataFrame(
    {
        "Model": [
            "Support Vector Machines",
            "KNN",
            "Logistic Regression",
            "Random Forest",
            "Naive Bayes",
            "Perceptron",
            "Stochastic Gradient Decent",
            "Linear SVC",
            "Decision Tree",
        ],
        "Score": [
            acc_svc,
            acc_knn,
            acc_log,
            acc_random_forest,
            acc_gaussian,
            acc_perceptron,
            acc_sgd,
            acc_linear_svc,
            acc_decision_tree,
        ],
    }
)
models.sort_values(by="Score", ascending=False)
submission = pd.DataFrame({"PassengerId": test_df["PassengerId"], "Survived": Y_pred})


================================================
FILE: stress_tests/kaggle/kaggle6.py
================================================
import matplotlib

matplotlib.use("PS")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

np.random.seed(2)
import itertools

from keras.callbacks import ReduceLROnPlateau
from keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPool2D
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical  # convert to one-hot-encoding
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

sns.set(style="white", context="notebook", palette="deep")
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
Y_train = train["label"]
X_train = train.drop(labels=["label"], axis=1)
del train
g = sns.countplot(Y_train)
Y_train.value_counts()
X_train.isnull().any().describe()
test.isnull().any().describe()
X_train = X_train / 255.0
test = test / 255.0
X_train = X_train.values.reshape(-1, 28, 28, 1)
test = test.values.reshape(-1, 28, 28, 1)
Y_train = to_categorical(Y_train, num_classes=10)
random_seed = 2
X_train, X_val, Y_train, Y_val = train_test_split(
    X_train, Y_train, test_size=0.1, random_state=random_seed
)
g = plt.imshow(X_train[0][:, :, 0])
model = Sequential()
model.add(
    Conv2D(
        filters=32,
        kernel_size=(5, 5),
        padding="Same",
        activation="relu",
        input_shape=(28, 28, 1),
    )
)
model.add(Conv2D(filters=32, kernel_size=(5, 5), padding="Same", activation="relu"))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="Same", activation="relu"))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="Same", activation="relu"))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation="softmax"))
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(
    optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
)
learning_rate_reduction = ReduceLROnPlateau(
    monitor="val_acc", patience=3, verbose=1, factor=0.5, min_lr=0.00001
)
epochs = 1  # Turn epochs to 30 to get 0.9967 accuracy
batch_size = 86
datagen = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range=0.1,  # Randomly zoom image
    width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=False,  # randomly flip images
    vertical_flip=False,
)  # randomly flip images
datagen.fit(X_train)
history = model.fit_generator(
    datagen.flow(X_train, Y_train, batch_size=batch_size),
    epochs=epochs,
    validation_data=(X_val, Y_val),
    verbose=2,
    steps_per_epoch=X_train.shape[0] // batch_size,
    callbacks=[learning_rate_reduction],
)
fig, ax = plt.subplots(2, 1)
ax[0].plot(history.history["loss"], color="b", label="Training loss")
ax[0].plot(history.history["val_loss"], color="r", label="validation loss", axes=ax[0])
legend = ax[0].legend(loc="best", shadow=True)
ax[1].plot(history.history["acc"], color="b", label="Training accuracy")
ax[1].plot(history.history["val_acc"], color="r", label="Validation accuracy")
legend = ax[1].legend(loc="best", shadow=True)


def plot_confusion_matrix(
    cm, classes, normalize=False, title="Confusion matrix", cmap=plt.cm.Blues
):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation="nearest", cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
        thresh = cm.max() / 2.0
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(
            j,
            i,
            cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black",
        )
    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")


Y_pred = model.predict(X_val)
Y_pred_classes = np.argmax(Y_pred, axis=1)
Y_true = np.argmax(Y_val, axis=1)
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes)
plot_confusion_matrix(confusion_mtx, classes=range(10))
errors = Y_pred_classes - Y_true != 0
Y_pred_classes_errors = Y_pred_classes[errors]
Y_pred_errors = Y_pred[errors]
Y_true_errors = Y_true[errors]
X_val_errors = X_val[errors]


def display_errors(errors_index, img_errors, pred_errors, obs_errors):
    """This function shows 6 images with their predicted and real labels"""
    n = 0
    nrows = 2
    ncols = 3
    fig, ax = plt.subplots(nrows, ncols, sharex=True, sharey=True)
    for row in range(nrows):
        for col in range(ncols):
            error = errors_index[n]
            ax[row, col].imshow((img_errors[error]).reshape((28, 28)))
            ax[row, col].set_title(
                "Predicted label :{}\nTrue label :{}".format(
                    pred_errors[error], obs_errors[error]
                )
            )
            n += 1


Y_pred_errors_prob = np.max(Y_pred_errors, axis=1)
true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))
delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors
sorted_dela_errors = np.argsort(delta_pred_true_errors)
most_important_errors = sorted_dela_errors[-6:]
display_errors(
    most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors
)
results = model.predict(test)
results = np.argmax(results, axis=1)
results = pd.Series(results, name="Label")
submission = pd.concat([pd.Series(range(1, 28001), name="ImageId"), results], axis=1)
submission.to_csv("cnn_mnist_datagen.csv", index=False)


================================================
FILE: stress_tests/kaggle/kaggle7.py
================================================
import matplotlib

matplotlib.use("PS")
import warnings

import numpy as np
from sklearn.preprocessing import LabelEncoder

import modin.pandas as pd

warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns

app_train = pd.read_csv("application_train.csv")
print("Training data shape: ", app_train.shape)
app_train.head()
app_test = pd.read_csv("application_test.csv")
print("Testing data shape: ", app_test.shape)
app_test.head()
app_train["TARGET"].value_counts()
app_train["TARGET"].astype(int).plot.hist()


def missing_values_table(df):
    # Total missing values
    mis_val = df.isnull().sum()
    mis_val_percent = 100 * df.isnull().sum() / len(df)
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)
    mis_val_table_ren_columns = mis_val_table.rename(
        columns={0: "Missing Values", 1: "% of Total Values"}
    )
    mis_val_table_ren_columns = (
        mis_val_table_ren_columns[mis_val_table_ren_columns.iloc[:, 1] != 0]
        .sort_values("% of Total Values", ascending=False)
        .round(1)
    )
    print(
        "Your selected dataframe has " + str(df.shape[1]) + " columns.\n"
        "There are "
        + str(mis_val_table_ren_columns.shape[0])
        + " columns that have missing values."
    )
    return mis_val_table_ren_columns


app_train.dtypes.value_counts()
app_train.select_dtypes("object").apply(pd.Series.nunique, axis=0)
le = LabelEncoder()
le_count = 0
for col in app_train:
    if app_train[col].dtype == "object":
        # If 2 or fewer unique categories
        if len(list(app_train[col].unique())) <= 2:
            # Train on the training data
            le.fit(app_train[col])
            # Transform both training and testing data
            app_train[col] = le.transform(app_train[col])
            app_test[col] = le.transform(app_test[col])
            le_count += 1
print("%d columns were label encoded." % le_count)
app_train = pd.get_dummies(app_train)
app_test = pd.get_dummies(app_test)
print("Training Features shape: ", app_train.shape)
print("Testing Features shape: ", app_test.shape)
train_labels = app_train["TARGET"]
app_train, app_test = app_train.align(app_test, join="inner", axis=1)
app_train["TARGET"] = train_labels
print("Training Features shape: ", app_train.shape)
print("Testing Features shape: ", app_test.shape)
(app_train["DAYS_BIRTH"] / -365).describe()
app_train["DAYS_EMPLOYED"].describe()
app_train["DAYS_EMPLOYED"].plot.hist(title="Days Employment Histogram")
plt.xlabel("Days Employment")
anom = app_train[app_train["DAYS_EMPLOYED"] == 3]
non_anom = app_train[app_train["DAYS_EMPLOYED"] != 3]
print(
    "The non-anomalies default on %0.2f%% of loans" % (100 * non_anom["TARGET"].mean())
)
print("The anomalies default on %0.2f%% of loans" % (100 * anom["TARGET"].mean()))
print("There are %d anomalous days of employment" % len(anom))
app_train["DAYS_EMPLOYED_ANOM"] = app_train["DAYS_EMPLOYED"] == 3
app_train["DAYS_EMPLOYED"].replace({3: np.nan}, inplace=True)
app_train["DAYS_EMPLOYED"].plot.hist(title="Days Employment Histogram")
plt.xlabel("Days Employment")
app_test["DAYS_EMPLOYED_ANOM"] = app_test["DAYS_EMPLOYED"] == 3
app_test["DAYS_EMPLOYED"].replace({3: np.nan}, inplace=True)
print(
    "There are %d anomalies in the test data out of %d entries"
    % (app_test["DAYS_EMPLOYED_ANOM"].sum(), len(app_test))
)
correlations = app_train.corr()["TARGET"].sort_values()
print("Most Positive Correlations:\n", correlations.tail(15))
print("\nMost Negative Correlations:\n", correlations.head(15))
app_train["DAYS_BIRTH"] = abs(app_train["DAYS_BIRTH"])
app_train["DAYS_BIRTH"].corr(app_train["TARGET"])
plt.style.use("fivethirtyeight")
plt.hist(app_train["DAYS_BIRTH"] / 365, edgecolor="k", bins=25)
plt.title("Age of Client")
plt.xlabel("Age (years)")
plt.ylabel("Count")
plt.figure(figsize=(10, 8))
#
plt.xlabel("Age (years)")
plt.ylabel("Density")
plt.title("Distribution of Ages")
age_data = app_train[["TARGET", "DAYS_BIRTH"]]
age_data["YEARS_BIRTH"] = age_data["DAYS_BIRTH"] / 365
age_data["YEARS_BINNED"] = pd.cut(
    age_data["YEARS_BIRTH"], bins=np.linspace(20, 70, num=11)
)
age_data.head(10)
age_groups = age_data.groupby("YEARS_BINNED").mean()
age_groups
ext_data = app_train[
    ["TARGET", "EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH"]
]
ext_data_corrs = ext_data.corr()
ext_data_corrs
plt.figure(figsize=(8, 6))
sns.heatmap(ext_data_corrs, cmap=plt.cm.RdYlBu_r, vmin=-0.25, annot=True, vmax=0.6)
plt.title("Correlation Heatmap")
plot_data = ext_data.drop(columns=["DAYS_BIRTH"]).copy()
plot_data["YEARS_BIRTH"] = age_data["YEARS_BIRTH"]
plot_data = plot_data.dropna().loc[:100000, :]


def corr_func(x, y, **kwargs):
    r = np.corrcoef(x, y)[0][1]
    ax = plt.gca()
    ax.annotate("r = {:.2f}".format(r), xy=(0.2, 0.8), xycoords=ax.transAxes, size=20)


poly_features = app_train[
    ["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH", "TARGET"]
]
poly_features_test = app_test[
    ["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH"]
]
from sklearn.preprocessing import Imputer

imputer = Imputer(strategy="median")
poly_target = poly_features["TARGET"]
poly_features = poly_features.drop(columns=["TARGET"])
poly_features = imputer.fit_transform(poly_features)
poly_features_test = imputer.transform(poly_features_test)
from sklearn.preprocessing import PolynomialFeatures

poly_transformer = PolynomialFeatures(degree=3)
poly_transformer.fit(poly_features)
poly_features = poly_transformer.transform(poly_features)
poly_features_test = poly_transformer.transform(poly_features_test)
print("Polynomial Features shape: ", poly_features.shape)
poly_transformer.get_feature_names(
    input_features=["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH"]
)[:15]
poly_features = pd.DataFrame(
    poly_features,
    columns=poly_transformer.get_feature_names(
        ["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH"]
    ),
)
poly_features["TARGET"] = poly_target
poly_corrs = poly_features.corr()["TARGET"].sort_values()
print(poly_corrs.head(10))
print(poly_corrs.tail(5))
poly_features_test = pd.DataFrame(
    poly_features_test,
    columns=poly_transformer.get_feature_names(
        ["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH"]
    ),
)
poly_features["SK_ID_CURR"] = app_train["SK_ID_CURR"]
app_train_poly = app_train.merge(poly_features, on="SK_ID_CURR", how="left")
poly_features_test["SK_ID_CURR"] = app_test["SK_ID_CURR"]
app_test_poly = app_test.merge(poly_features_test, on="SK_ID_CURR", how="left")
app_train_poly, app_test_poly = app_train_poly.align(
    app_test_poly, join="inner", axis=1
)
print("Training data with polynomial features shape: ", app_train_poly.shape)
print("Testing data with polynomial features shape:  ", app_test_poly.shape)
app_train_domain = app_train.copy()
app_test_domain = app_test.copy()
app_train_domain["CREDIT_INCOME_PERCENT"] = (
    app_train_domain["AMT_CREDIT"] / app_train_domain["AMT_INCOME_TOTAL"]
)
app_train_domain["ANNUITY_INCOME_PERCENT"] = (
    app_train_domain["AMT_ANNUITY"] / app_train_domain["AMT_INCOME_TOTAL"]
)
app_train_domain["CREDIT_TERM"] = (
    app_train_domain["AMT_ANNUITY"] / app_train_domain["AMT_CREDIT"]
)
app_train_domain["DAYS_EMPLOYED_PERCENT"] = (
    app_train_domain["DAYS_EMPLOYED"] / app_train_domain["DAYS_BIRTH"]
)
app_test_domain["CREDIT_INCOME_PERCENT"] = (
    app_test_domain["AMT_CREDIT"] / app_test_domain["AMT_INCOME_TOTAL"]
)
app_test_domain["ANNUITY_INCOME_PERCENT"] = (
    app_test_domain["AMT_ANNUITY"] / app_test_domain["AMT_INCOME_TOTAL"]
)
app_test_domain["CREDIT_TERM"] = (
    app_test_domain["AMT_ANNUITY"] / app_test_domain["AMT_CREDIT"]
)
app_test_domain["DAYS_EMPLOYED_PERCENT"] = (
    app_test_domain["DAYS_EMPLOYED"] / app_test_domain["DAYS_BIRTH"]
)
from sklearn.preprocessing import Imputer, MinMaxScaler

if "TARGET" in app_train.columns:
    train = app_train.drop(columns=["TARGET"])
    # TODO (williamma12): Not sure why this line is necessary but it is
    app_test = app_test.drop(columns=["TARGET"])
else:
    train = app_train.copy()
features = list(train.columns)
test = app_test.copy()
imputer = Imputer(strategy="median")
scaler = MinMaxScaler(feature_range=(0, 1))
imputer.fit(train)
train = imputer.transform(train)
test = imputer.transform(app_test)
scaler.fit(train)
train = scaler.transform(train)
test = scaler.transform(test)
print("Training data shape: ", train.shape)
print("Testing data shape: ", test.shape)
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(C=0.0001)
log_reg.fit(train, train_labels)
log_reg_pred = log_reg.predict_proba(test)[:, 1]
submit = app_test[["SK_ID_CURR"]]
submit["TARGET"] = log_reg_pred
submit.head()
submit.to_csv("log_reg_baseline.csv", index=False)
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(
    n_estimators=100, random_state=50, verbose=1, n_jobs=-1
)
random_forest.fit(train, train_labels)
feature_importance_values = random_forest.feature_importances_
feature_importances = pd.DataFrame(
    {"feature": features, "importance": feature_importance_values}
)
predictions = random_forest.predict_proba(test)[:, 1]
submit = app_test[["SK_ID_CURR"]]
submit["TARGET"] = predictions
submit.to_csv("random_forest_baseline.csv", index=False)
poly_features_names = list(app_train_poly.columns)
imputer = Imputer(strategy="median")
poly_features = imputer.fit_transform(app_train_poly)
poly_features_test = imputer.transform(app_test_poly)
scaler = MinMaxScaler(feature_range=(0, 1))
poly_features = scaler.fit_transform(poly_features)
poly_features_test = scaler.transform(poly_features_test)
random_forest_poly = RandomForestClassifier(
    n_estimators=100, random_state=50, verbose=1, n_jobs=-1
)
random_forest_poly.fit(poly_features, train_labels)
predictions = random_forest_poly.predict_proba(poly_features_test)[:, 1]
submit = app_test[["SK_ID_CURR"]]
submit["TARGET"] = predictions
submit.to_csv("random_forest_baseline_engineered.csv", index=False)
app_train_domain = app_train_domain.drop(columns="TARGET")
app_test_domain = app_test_domain.drop(columns="TARGET")
domain_features_names = list(app_train_domain.columns)
imputer = Imputer(strategy="median")
domain_features = imputer.fit_transform(app_train_domain)
domain_features_test = imputer.transform(app_test_domain)
scaler = MinMaxScaler(feature_range=(0, 1))
domain_features = scaler.fit_transform(domain_features)
domain_features_test = scaler.transform(domain_features_test)
random_forest_domain = RandomForestClassifier(
    n_estimators=100, random_state=50, verbose=1, n_jobs=-1
)
random_forest_domain.fit(domain_features, train_labels)
feature_importance_values_domain = random_forest_domain.feature_importances_
feature_importances_domain = pd.DataFrame(
    {"feature": domain_features_names, "importance": feature_importance_values_domain}
)
predictions = random_forest_domain.predict_proba(domain_features_test)[:, 1]
submit = app_test[["SK_ID_CURR"]]
submit["TARGET"] = predictions
submit.to_csv("random_forest_baseline_domain.csv", index=False)


def plot_feature_importances(df):
    df = df.sort_values("importance", ascending=False).reset_index()
    df["importance_normalized"] = df["importance"] / df["importance"].sum()
    plt.figure(figsize=(10, 6))
    ax = plt.subplot()
    ax.barh(
        list(reversed(list(df.index[:15]))),
        df["importance_normalized"].head(15),
        align="center",
        edgecolor="k",
    )
    ax.set_yticks(list(reversed(list(df.index[:15]))))
    ax.set_yticklabels(df["feature"].head(15))
    plt.xlabel("Normalized Importance")
    plt.title("Feature Importances")
    return df


feature_importances_sorted = plot_feature_importances(feature_importances)
feature_importances_domain_sorted = plot_feature_importances(feature_importances_domain)
import gc

import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold


def model(features, test_features, encoding="ohe", n_folds=5):
    test_ids = test_features["SK_ID_CURR"]
    labels = features["TARGET"]
    features = features.drop(columns=["SK_ID_CURR", "TARGET"])
    test_features = test_features.drop(columns=["SK_ID_CURR"])
    if encoding == "ohe":
        features = pd.get_dummies(features)
        test_features = pd.get_dummies(test_features)
        features, test_features = features.align(test_features, join="inner", axis=1)
        cat_indices = "auto"
    elif encoding == "le":
        label_encoder = LabelEncoder()
        cat_indices = []
        for i, col in enumerate(features):
            if features[col].dtype == "object":
                features[col] = label_encoder.fit_transform(
                    np.array(features[col].astype(str)).reshape((-1,))
                )
                test_features[col] = label_encoder.transform(
                    np.array(test_features[col].astype(str)).reshape((-1,))
                )
                cat_indices.append(i)
    else:
        raise ValueError("Encoding must be either 'ohe' or 'le'")
    print("Training Data Shape: ", features.shape)
    print("Testing Data Shape: ", test_features.shape)
    feature_names = list(features.columns)
    features = np.array(features)
    test_features = np.array(test_features)
    k_fold = KFold(n_splits=n_folds, shuffle=True, random_state=50)
    feature_importance_values = np.zeros(len(feature_names))
    test_predictions = np.zeros(test_features.shape[0])
    out_of_fold = np.zeros(features.shape[0])
    valid_scores = []
    train_scores = []
    for train_indices, valid_indices in k_fold.split(features):
        train_features, train_labels = features[train_indices], labels[train_indices]
        valid_features, valid_labels = features[valid_indices], labels[valid_indices]
        model = lgb.LGBMClassifier(
            n_estimators=10000,
            objective="binary",
            class_weight="balanced",
            learning_rate=0.05,
            reg_alpha=0.1,
            reg_lambda=0.1,
            subsample=0.8,
            n_jobs=-1,
            random_state=50,
        )
        model.fit(
            train_features,
            train_labels,
            eval_metric="auc",
            eval_set=[(valid_features, valid_labels), (train_features, train_labels)],
            eval_names=["valid", "train"],
            categorical_feature=cat_indices,
            early_stopping_rounds=100,
            verbose=200,
        )
        best_iteration = model.best_iteration_
        feature_importance_values += model.feature_importances_ / k_fold.n_splits
        test_predictions += (
            model.predict_proba(test_features, num_iteration=best_iteration)[:, 1]
            / k_fold.n_splits
        )
        out_of_fold[valid_indices] = model.predict_proba(
            valid_features, num_iteration=best_iteration
        )[:, 1]
        valid_score = model.best_score_["valid"]["auc"]
        train_score = model.best_score_["train"]["auc"]
        valid_scores.append(valid_score)
        train_scores.append(train_score)
        gc.enable()
        del model, train_features, valid_features
        gc.collect()
    submission = pd.DataFrame({"SK_ID_CURR": test_ids, "TARGET": test_predictions})
    feature_importances = pd.DataFrame(
        {"feature": feature_names, "importance": feature_importance_values}
    )
    valid_auc = roc_auc_score(labels, out_of_fold)
    valid_scores.append(valid_auc)
    train_scores.append(np.mean(train_scores))
    fold_names = list(range(n_folds))
    fold_names.append("overall")
    metrics = pd.DataFrame(
        {"fold": fold_names, "train": train_scores, "valid": valid_scores}
    )
    return submission, feature_importances, metrics


submission, fi, metrics = model(app_train, app_test)
print("Baseline metrics")
print(metrics)
fi_sorted = plot_feature_importances(fi)
submission.to_csv("baseline_lgb.csv", index=False)
app_train_domain["TARGET"] = train_labels
submission_domain, fi_domain, metrics_domain = model(app_train_domain, app_test_domain)
print("Baseline with domain knowledge features metrics")
print(metrics_domain)
fi_sorted = plot_feature_importances(fi_domain)
submission_domain.to_csv("baseline_lgb_domain_features.csv", index=False)


================================================
FILE: stress_tests/kaggle/kaggle8.py
================================================
from sklearn.ensemble import RandomForestRegressor

import modin.pandas as pd

train = pd.read_csv("train.csv")
train_y = train.SalePrice
predictor_cols = ["LotArea", "OverallQual", "YearBuilt", "TotRmsAbvGrd"]
train_X = train[predictor_cols]
my_model = RandomForestRegressor()
my_model.fit(train_X, train_y)
test = pd.read_csv("test.csv")
test_X = test[predictor_cols]
predicted_prices = my_model.predict(test_X)
print(predicted_prices)
my_submission = pd.DataFrame({"Id": test.Id, "SalePrice": predicted_prices})
my_submission.to_csv("submission.csv", index=False)


================================================
FILE: stress_tests/kaggle/kaggle9.py
================================================
import matplotlib

matplotlib.use("PS")
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import skew

import modin.pandas as pd

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
train.head()
all_data = pd.concat(
    (
        train.loc[:, "MSSubClass":"SaleCondition"],
        test.loc[:, "MSSubClass":"SaleCondition"],
    )
)
matplotlib.rcParams["figure.figsize"] = (12.0, 6.0)
prices = pd.DataFrame(
    {"price": train["SalePrice"], "log(price + 1)": np.log1p(train["SalePrice"])}
)
prices.hist()
train["SalePrice"] = np.log1p(train["SalePrice"])
numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index
skewed_feats = train[numeric_feats].apply(
    lambda x: skew(x.dropna())
)  # compute skewness
skewed_feats = skewed_feats[skewed_feats > 0.75]
skewed_feats = skewed_feats.index
all_data[skewed_feats] = np.log1p(all_data[skewed_feats])
all_data = pd.get_dummies(all_data)
all_data = all_data.fillna(all_data.mean())
X_train = all_data[: train.shape[0]]
X_test = all_data[train.shape[0] :]
y = train.SalePrice
from sklearn.linear_model import LassoCV  # RidgeCV, ElasticNet, LassoLarsCV
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score


def rmse_cv(model):
    rmse = np.sqrt(
        -cross_val_score(model, X_train, y, scoring="neg_mean_squared_error", cv=5)
    )
    return rmse


model_ridge = Ridge()
alphas = [0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75]
cv_ridge = [rmse_cv(Ridge(alpha=alpha)).mean() for alpha in alphas]
cv_ridge = pd.Series(cv_ridge, index=alphas)
cv_ridge.plot(title="Validation - Just Do It")
plt.xlabel("alpha")
plt.ylabel("rmse")
cv_ridge.min()
model_lasso = LassoCV(alphas=[1, 0.1, 0.001, 0.0005]).fit(X_train, y)
rmse_cv(model_lasso).mean()
coef = pd.Series(model_lasso.coef_, index=X_train.columns)
print(
    "Lasso picked "
    + str(sum(coef != 0))
    + " variables and eliminated the other "
    + str(sum(coef == 0))
    + " variables"
)
imp_coef = pd.concat([coef.sort_values().head(10), coef.sort_values().tail(10)])
matplotlib.rcParams["figure.figsize"] = (8.0, 10.0)
imp_coef.plot(kind="barh")
plt.title("Coefficients in the Lasso Model")
matplotlib.rcParams["figure.figsize"] = (6.0, 6.0)
preds = pd.DataFrame({"preds": model_lasso.predict(X_train), "true": y})
preds["residuals"] = preds["true"] - preds["preds"]
preds.plot(x="preds", y="residuals", kind="scatter")
import xgboost as xgb

dtrain = xgb.DMatrix(X_train, label=y)
dtest = xgb.DMatrix(X_test)
params = {"max_depth": 2, "eta": 0.1}
model = xgb.cv(params, dtrain, num_boost_round=500, early_stopping_rounds=100)
model.loc[30:, ["test-rmse-mean", "train-rmse-mean"]].plot()
model_xgb = xgb.XGBRegressor(
    n_estimators=360, max_depth=2, learning_rate=0.1
)  # the params were tuned using xgb.cv
model_xgb.fit(X_train, y)
xgb_preds = np.expm1(model_xgb.predict(X_test))
lasso_preds = np.expm1(model_lasso.predict(X_test))
predictions = pd.DataFrame({"xgb": xgb_preds, "lasso": lasso_preds})
predictions.plot(x="xgb", y="lasso", kind="scatter")
preds = 0.7 * lasso_preds + 0.3 * xgb_preds
solution = pd.DataFrame({"id": test.Id, "SalePrice": preds})
solution.to_csv("ridge_sol.csv", index=False)
from keras.layers import Dense
from keras.models import Sequential
from keras.regularizers import l1
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train = StandardScaler().fit_transform(X_train)
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y, random_state=3)
X_tr.shape
X_tr
model = Sequential()
model.add(Dense(1, input_dim=X_train.shape[1], W_regularizer=l1(0.001)))
model.compile(loss="mse", optimizer="adam")
model.summary()
hist = model.fit(X_tr, y_tr, validation_data=(X_val, y_val))
pd.Series(model.predict(X_val)[:, 0]).hist()


================================================
FILE: stress_tests/run_stress_tests.sh
================================================
#!/usr/bin/env bash

# Show explicitly which commands are currently running.
set -x

# TODO (williamma12): Once we use clusters, make sure to download latest wheels
# from s3 bucket instead of building ray
# Ray directory
RAY_DIR=${1}

ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
RESULT_FILE=$ROOT_DIR/results-$(date '+%Y-%m-%d_%H-%M-%S').log
echo "Logging to" $RESULT_FILE
touch $RESULT_FILE

setup_environment(){
    pushd "$ROOT_DIR"
    # Create a virtual environment for the stress tests
    python -m virtualenv stress_tests_env >> $RESULT_FILE
    source stress_tests_env/bin/activate >> $RESULT_FILE

    # Install ray from source if available
    if [[ ! -z "$RAY_DIR" ]]; then
        pushd "$RAY_DIR"
        pip install -e . --verbose >> $RESULT_FILE
        popd
    fi

    # Install modin from source to virtual environment
    pushd "$ROOT_DIR/.."
    pip install -e . >> $RESULT_FILE
    popd

    # Install basic data science packages
    pip install matplotlib numpy seaborn scipy >> $RESULT_FILE

    # Install machine learning packages
    pip install scikit-learn xgboost lightgbm keras >> $RESULT_FILE

    # Install packages for kaggle18
    pip install nltk wordcloud plotly bokeh pyLDAvis >> $RESULT_FILE

    popd
}

teardown_environment(){
    pushd "$ROOT_DIR"
    rm -rf stress_tests_env >> $RESULT_FILE
    popd
}

run_test(){
    local test_name=$1

    echo "Try running $test_name."
    {
        pytest -vls "$test_name.py" >> $RESULT_FILE
    } || echo "FAIL: $test_name" >> $RESULT_FILE
}

pushd "$ROOT_DIR"
    setup_environment
    run_test test_kaggle_ipynb
    teardown_environment
popd

cat $RESULT_FILE
[ ! -s $RESULT_FILE ] || exit 1


================================================
FILE: stress_tests/test_kaggle_ipynb.py
================================================
import logging
import os
import subprocess

import numpy as np
import pytest

import modin.pandas as pd

# import ray
# ray.init(address="localhost:6379")


logger = logging.getLogger(__name__)

# Size for synthetic datasets
DF_SIZE = 1 * 2**10 * 2**10  # * 2**10 # 1 GiB dataframes
# This file path
DIR_PATH = os.path.dirname(os.path.realpath(__file__))
KAGGLE_DIR_PATH = "{}/kaggle".format(DIR_PATH)


def create_dataframe(columns, dtypes, size):
    def _num_to_str(x):
        letters = ""
        while x:
            mod = (x - 1) % 26
            letters += chr(mod + 65)
            x = (x - 1) // 26
        result = "".join(reversed(letters))
        if "NA" in result:
            return _num_to_str(x + 1)
        else:
            return result

    result_dict = {}
    for col, dtype in zip(columns, dtypes):
        if dtype is str:
            result_dict[col] = [_num_to_str(x + 1) for x in np.arange(size, dtype=int)]
        elif dtype is bool:
            result_dict[col] = [x % 2 == 0 for x in np.arange(size, dtype=int)]
        else:
            result_dict[col] = np.arange(size, dtype=dtype)
    return pd.DataFrame(result_dict)


@pytest.fixture
def generate_dataset():
    """Generates a synthetic dataset using the given arguments.

    Args:
        columns (list): Column names of the result
        dtypes (list): List of dtypes for the corresponding column
        size (int): Number of rows for result

    Returns:
        Modin dataframe of synthetic data following arguments.
    """
    # Record of files generated for a test
    filenames = []

    def _dataset_builder(filename, columns, dtypes, size=DF_SIZE, files_to_remove=[]):
        # Add the files generated by the script to be removed
        for file in files_to_remove:
            filenames.append("{}/{}".format(KAGGLE_DIR_PATH, file))

        # Update filename to include path
        filename = "{}/{}".format(KAGGLE_DIR_PATH, filename)

        # Check that the number of column names is the same as the nubmer of dtypes
        if len(columns) != len(dtypes):
            raise ValueError("len(columns) != len(dtypes)")

        # Determine number of rows for synthetic dataset
        row_size = (
            create_dataframe(columns, dtypes, 1)
            .memory_usage(index=False, deep=True)
            .sum()
        )
        result = create_dataframe(columns, dtypes, np.ceil(size / row_size))

        result.to_csv(filename)
        filenames.append(filename)
        return result

    # Return dataset builder factory
    yield _dataset_builder

    # Delete files created
    for filename in filenames:
        if os.path.exists(filename):
            os.remove(filename)


def test_kaggle3(generate_dataset):
    pokemon_columns = [
        "#",
        "Name",
        "Type 1",
        "Type 2",
        "HP",
        "Attack",
        "Defense",
        "Sp. Atk",
        "Sp. Def",
        "Speed",
        "Generation",
        "Legendary",
    ]
    pokemon_dtypes = [int, str, str, str, int, int, int, int, int, int, int, bool]
    generate_dataset(
        "pokemon.csv", pokemon_columns, pokemon_dtypes, files_to_remove=["graph.png"]
    )

    ipynb = subprocess.Popen(
        ["python", "kaggle3.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle3")
    assert ipynb.returncode == 0


def test_kaggle4(generate_dataset):
    columns = [
        "Id",
        "MSSubClass",
        "MSZoning",
        "LotFrontage",
        "LotArea",
        "Street",
        "Alley",
        "LotShape",
        "LandContour",
        "Utilities",
        "LotConfig",
        "LandSlope",
        "Neighborhood",
        "Condition1",
        "Condition2",
        "BldgType",
        "HouseStyle",
        "OverallQual",
        "OverallCond",
        "YearBuilt",
        "YearRemodAdd",
        "RoofStyle",
        "RoofMatl",
        "Exterior1st",
        "Exterior2nd",
        "MasVnrType",
        "MasVnrArea",
        "ExterQual",
        "ExterCond",
        "Foundation",
        "BsmtQual",
        "BsmtCond",
        "BsmtExposure",
        "BsmtFinType1",
        "BsmtFinSF1",
        "BsmtFinType2",
        "BsmtFinSF2",
        "BsmtUnfSF",
        "TotalBsmtSF",
        "Heating",
        "HeatingQC",
        "CentralAir",
        "Electrical",
        "1stFlrSF",
        "2ndFlrSF",
        "LowQualFinSF",
        "GrLivArea",
        "BsmtFullBath",
        "BsmtHalfBath",
        "FullBath",
        "HalfBath",
        "BedroomAbvGr",
        "KitchenAbvGr",
        "KitchenQual",
        "TotRmsAbvGrd",
        "Functional",
        "Fireplaces",
        "FireplaceQu",
        "GarageType",
        "GarageYrBlt",
        "GarageFinish",
        "GarageCars",
        "GarageArea",
        "GarageQual",
        "GarageCond",
        "PavedDrive",
        "WoodDeckSF",
        "OpenPorchSF",
        "EnclosedPorch",
        "3SsnPorch",
        "ScreenPorch",
        "PoolArea",
        "PoolQC",
        "Fence",
        "MiscFeature",
        "MiscVal",
        "MoSold",
        "YrSold",
        "SaleType",
        "SaleCondition",
        "SalePrice",
    ]
    dtypes = [
        int,
        int,
        str,
        float,
        int,
        str,
        float,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        str,
        str,
        str,
        str,
        str,
        float,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        int,
        str,
        int,
        int,
        int,
        str,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        str,
        int,
        str,
        int,
        float,
        str,
        float,
        str,
        int,
        int,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        int,
        int,
        float,
        float,
        float,
        int,
        int,
        int,
        str,
        str,
        int,
    ]
    generate_dataset("train.csv", columns, dtypes)
    generate_dataset("test.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle4.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle4")
    assert ipynb.returncode == 0


def test_kaggle5(generate_dataset):
    columns = [
        "PassengerId",
        "Survived",
        "Pclass",
        "Name",
        "Sex",
        "Age",
        "SibSp",
        "Parch",
        "Ticket",
        "Fare",
        "Cabin",
        "Embarked",
    ]
    dtypes = [int, int, int, str, str, float, int, int, str, float, float, str]
    generate_dataset("train.csv", columns, dtypes)
    generate_dataset("test.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle5.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle5")
    assert ipynb.returncode == 0


@pytest.mark.skip("Missing Original Data Schema")
def test_kaggle6(generate_dataset):
    columns = []
    dtypes = []
    generate_dataset("test.csv", columns, dtypes)
    generate_dataset("train.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle6.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle6")
    assert ipynb.returncode == 0


def test_kaggle7(generate_dataset):
    columns = [
        "SK_ID_CURR",
        "TARGET",
        "NAME_CONTRACT_TYPE",
        "CODE_GENDER",
        "FLAG_OWN_CAR",
        "FLAG_OWN_REALTY",
        "CNT_CHILDREN",
        "AMT_INCOME_TOTAL",
        "AMT_CREDIT",
        "AMT_ANNUITY",
        "AMT_GOODS_PRICE",
        "NAME_TYPE_SUITE",
        "NAME_INCOME_TYPE",
        "NAME_EDUCATION_TYPE",
        "NAME_FAMILY_STATUS",
        "NAME_HOUSING_TYPE",
        "REGION_POPULATION_RELATIVE",
        "DAYS_BIRTH",
        "DAYS_EMPLOYED",
        "DAYS_REGISTRATION",
        "DAYS_ID_PUBLISH",
        "OWN_CAR_AGE",
        "FLAG_MOBIL",
        "FLAG_EMP_PHONE",
        "FLAG_WORK_PHONE",
        "FLAG_CONT_MOBILE",
        "FLAG_PHONE",
        "FLAG_EMAIL",
        "OCCUPATION_TYPE",
        "CNT_FAM_MEMBERS",
        "REGION_RATING_CLIENT",
        "REGION_RATING_CLIENT_W_CITY",
        "WEEKDAY_APPR_PROCESS_START",
        "HOUR_APPR_PROCESS_START",
        "REG_REGION_NOT_LIVE_REGION",
        "REG_REGION_NOT_WORK_REGION",
        "LIVE_REGION_NOT_WORK_REGION",
        "REG_CITY_NOT_LIVE_CITY",
        "REG_CITY_NOT_WORK_CITY",
        "LIVE_CITY_NOT_WORK_CITY",
        "ORGANIZATION_TYPE",
        "EXT_SOURCE_1",
        "EXT_SOURCE_2",
        "EXT_SOURCE_3",
        "APARTMENTS_AVG",
        "BASEMENTAREA_AVG",
        "YEARS_BEGINEXPLUATATION_AVG",
        "YEARS_BUILD_AVG",
        "COMMONAREA_AVG",
        "ELEVATORS_AVG",
        "ENTRANCES_AVG",
        "FLOORSMAX_AVG",
        "FLOORSMIN_AVG",
        "LANDAREA_AVG",
        "LIVINGAPARTMENTS_AVG",
        "LIVINGAREA_AVG",
        "NONLIVINGAPARTMENTS_AVG",
        "NONLIVINGAREA_AVG",
        "APARTMENTS_MODE",
        "BASEMENTAREA_MODE",
        "YEARS_BEGINEXPLUATATION_MODE",
        "YEARS_BUILD_MODE",
        "COMMONAREA_MODE",
        "ELEVATORS_MODE",
        "ENTRANCES_MODE",
        "FLOORSMAX_MODE",
        "FLOORSMIN_MODE",
        "LANDAREA_MODE",
        "LIVINGAPARTMENTS_MODE",
        "LIVINGAREA_MODE",
        "NONLIVINGAPARTMENTS_MODE",
        "NONLIVINGAREA_MODE",
        "APARTMENTS_MEDI",
        "BASEMENTAREA_MEDI",
        "YEARS_BEGINEXPLUATATION_MEDI",
        "YEARS_BUILD_MEDI",
        "COMMONAREA_MEDI",
        "ELEVATORS_MEDI",
        "ENTRANCES_MEDI",
        "FLOORSMAX_MEDI",
        "FLOORSMIN_MEDI",
        "LANDAREA_MEDI",
        "LIVINGAPARTMENTS_MEDI",
        "LIVINGAREA_MEDI",
        "NONLIVINGAPARTMENTS_MEDI",
        "NONLIVINGAREA_MEDI",
        "FONDKAPREMONT_MODE",
        "HOUSETYPE_MODE",
        "TOTALAREA_MODE",
        "WALLSMATERIAL_MODE",
        "EMERGENCYSTATE_MODE",
        "OBS_30_CNT_SOCIAL_CIRCLE",
        "DEF_30_CNT_SOCIAL_CIRCLE",
        "OBS_60_CNT_SOCIAL_CIRCLE",
        "DEF_60_CNT_SOCIAL_CIRCLE",
        "DAYS_LAST_PHONE_CHANGE",
        "FLAG_DOCUMENT_2",
        "FLAG_DOCUMENT_3",
        "FLAG_DOCUMENT_4",
        "FLAG_DOCUMENT_5",
        "FLAG_DOCUMENT_6",
        "FLAG_DOCUMENT_7",
        "FLAG_DOCUMENT_8",
        "FLAG_DOCUMENT_9",
        "FLAG_DOCUMENT_10",
        "FLAG_DOCUMENT_11",
        "FLAG_DOCUMENT_12",
        "FLAG_DOCUMENT_13",
        "FLAG_DOCUMENT_14",
        "FLAG_DOCUMENT_15",
        "FLAG_DOCUMENT_16",
        "FLAG_DOCUMENT_17",
        "FLAG_DOCUMENT_18",
        "FLAG_DOCUMENT_19",
        "FLAG_DOCUMENT_20",
        "FLAG_DOCUMENT_21",
        "AMT_REQ_CREDIT_BUREAU_HOUR",
        "AMT_REQ_CREDIT_BUREAU_DAY",
        "AMT_REQ_CREDIT_BUREAU_WEEK",
        "AMT_REQ_CREDIT_BUREAU_MON",
        "AMT_REQ_CREDIT_BUREAU_QRT",
        "AMT_REQ_CREDIT_BUREAU_YEAR",
    ]
    dtypes = [
        int,
        int,
        str,
        str,
        str,
        str,
        int,
        float,
        float,
        float,
        float,
        str,
        str,
        str,
        str,
        str,
        float,
        int,
        int,
        float,
        int,
        float,
        int,
        int,
        int,
        int,
        int,
        int,
        str,
        float,
        int,
        int,
        str,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        str,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        str,
        str,
        float,
        str,
        str,
        float,
        float,
        float,
        float,
        float,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        float,
        float,
        float,
        float,
        float,
        float,
    ]
    generate_dataset(
        "application_train.csv",
        columns,
        dtypes,
        files_to_remove=[
            "log_reg_baseline.csv",
            "random_forest_baseline.csv",
            "random_forest_baseline_engineered.csv",
            "random_forest_baseline_domain.csv",
            "baseline_lgb.csv",
            "baseline_lgb_domain_features.csv",
        ],
    )
    generate_dataset("application_test.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle7.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle7")
    assert ipynb.returncode == 0


def test_kaggle8(generate_dataset):
    columns = [
        "Id",
        "MSSubClass",
        "MSZoning",
        "LotFrontage",
        "LotArea",
        "Street",
        "Alley",
        "LotShape",
        "LandContour",
        "Utilities",
        "LotConfig",
        "LandSlope",
        "Neighborhood",
        "Condition1",
        "Condition2",
        "BldgType",
        "HouseStyle",
        "OverallQual",
        "OverallCond",
        "YearBuilt",
        "YearRemodAdd",
        "RoofStyle",
        "RoofMatl",
        "Exterior1st",
        "Exterior2nd",
        "MasVnrType",
        "MasVnrArea",
        "ExterQual",
        "ExterCond",
        "Foundation",
        "BsmtQual",
        "BsmtCond",
        "BsmtExposure",
        "BsmtFinType1",
        "BsmtFinSF1",
        "BsmtFinType2",
        "BsmtFinSF2",
        "BsmtUnfSF",
        "TotalBsmtSF",
        "Heating",
        "HeatingQC",
        "CentralAir",
        "Electrical",
        "1stFlrSF",
        "2ndFlrSF",
        "LowQualFinSF",
        "GrLivArea",
        "BsmtFullBath",
        "BsmtHalfBath",
        "FullBath",
        "HalfBath",
        "BedroomAbvGr",
        "KitchenAbvGr",
        "KitchenQual",
        "TotRmsAbvGrd",
        "Functional",
        "Fireplaces",
        "FireplaceQu",
        "GarageType",
        "GarageYrBlt",
        "GarageFinish",
        "GarageCars",
        "GarageArea",
        "GarageQual",
        "GarageCond",
        "PavedDrive",
        "WoodDeckSF",
        "OpenPorchSF",
        "EnclosedPorch",
        "3SsnPorch",
        "ScreenPorch",
        "PoolArea",
        "PoolQC",
        "Fence",
        "MiscFeature",
        "MiscVal",
        "MoSold",
        "YrSold",
        "SaleType",
        "SaleCondition",
        "SalePrice",
    ]
    dtypes = [
        int,
        int,
        str,
        float,
        int,
        str,
        float,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        str,
        str,
        str,
        str,
        str,
        float,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        float,
        str,
        float,
        float,
        float,
        str,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        float,
        float,
        int,
        int,
        int,
        int,
        str,
        int,
        str,
        int,
        float,
        str,
        float,
        str,
        float,
        float,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        int,
        int,
        float,
        str,
        float,
        int,
        int,
        int,
        str,
        str,
        int,
    ]
    generate_dataset("test.csv", columns, dtypes, files_to_remove=["submission.csv"])
    generate_dataset("train.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle8.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle8")
    assert ipynb.returncode == 0


def test_kaggle9(generate_dataset):
    columns = [
        "Id",
        "MSSubClass",
        "MSZoning",
        "LotFrontage",
        "LotArea",
        "Street",
        "Alley",
        "LotShape",
        "LandContour",
        "Utilities",
        "LotConfig",
        "LandSlope",
        "Neighborhood",
        "Condition1",
        "Condition2",
        "BldgType",
        "HouseStyle",
        "OverallQual",
        "OverallCond",
        "YearBuilt",
        "YearRemodAdd",
        "RoofStyle",
        "RoofMatl",
        "Exterior1st",
        "Exterior2nd",
        "MasVnrType",
        "MasVnrArea",
        "ExterQual",
        "ExterCond",
        "Foundation",
        "BsmtQual",
        "BsmtCond",
        "BsmtExposure",
        "BsmtFinType1",
        "BsmtFinSF1",
        "BsmtFinType2",
        "BsmtFinSF2",
        "BsmtUnfSF",
        "TotalBsmtSF",
        "Heating",
        "HeatingQC",
        "CentralAir",
        "Electrical",
        "1stFlrSF",
        "2ndFlrSF",
        "LowQualFinSF",
        "GrLivArea",
        "BsmtFullBath",
        "BsmtHalfBath",
        "FullBath",
        "HalfBath",
        "BedroomAbvGr",
        "KitchenAbvGr",
        "KitchenQual",
        "TotRmsAbvGrd",
        "Functional",
        "Fireplaces",
        "FireplaceQu",
        "GarageType",
        "GarageYrBlt",
        "GarageFinish",
        "GarageCars",
        "GarageArea",
        "GarageQual",
        "GarageCond",
        "PavedDrive",
        "WoodDeckSF",
        "OpenPorchSF",
        "EnclosedPorch",
        "3SsnPorch",
        "ScreenPorch",
        "PoolArea",
        "PoolQC",
        "Fence",
        "MiscFeature",
        "MiscVal",
        "MoSold",
        "YrSold",
        "SaleType",
        "SaleCondition",
        "SalePrice",
    ]
    dtypes = [
        int,
        int,
        str,
        float,
        int,
        str,
        float,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        str,
        str,
        str,
        str,
        str,
        float,
        str,
        str,
        str,
        str,
        str,
        str,
        str,
        int,
        str,
        int,
        int,
        int,
        str,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        str,
        int,
        str,
        int,
        float,
        str,
        float,
        str,
        int,
        int,
        str,
        str,
        str,
        int,
        int,
        int,
        int,
        int,
        int,
        float,
        float,
        float,
        int,
        int,
        int,
        str,
        str,
        int,
    ]
    generate_dataset("test.csv", columns, dtypes, files_to_remove=["ridge_sol.csv"])
    generate_dataset("train.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle9.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle9")
    assert ipynb.returncode == 0


def test_kaggle10(generate_dataset):
    columns = [
        "pelvic_incidence",
        "pelvic_tilt numeric",
        "lumbar_lordosis_angle",
        "sacral_slope",
        "pelvic_radius",
        "degree_spondylolisthesis",
        "class",
    ]
    dtypes = [float, float, float, float, float, float, str]
    generate_dataset(
        "column_2C_weka.csv", columns, dtypes, files_to_remove=["graph.png"]
    )

    ipynb = subprocess.Popen(
        ["python", "kaggle10.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle10")
    assert ipynb.returncode == 0


def test_kaggle12(generate_dataset):
    columns = [
        "PassengerId",
        "Survived",
        "Pclass",
        "Name",
        "Sex",
        "Age",
        "SibSp",
        "Parch",
        "Ticket",
        "Fare",
        "Cabin",
        "Embarked",
    ]
    dtypes = [int, int, int, str, str, float, int, int, str, float, float, str]
    generate_dataset(
        "train.csv", columns, dtypes, files_to_remove=["ensemble_python_voting.csv"]
    )
    generate_dataset("test.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle12.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle12")
    assert ipynb.returncode == 0


def test_kaggle13(generate_dataset):
    columns = [
        "Id",
        "SepalLengthCm",
        "SepalWidthCm",
        "PetalLengthCm",
        "PetalWidthCm",
        "Species",
    ]
    dtypes = [int, float, float, float, float, str]
    generate_dataset("Iris.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle13.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle13")
    assert ipynb.returncode == 0


def test_kaggle14(generate_dataset):
    columns = [
        "PassengerId",
        "Survived",
        "Pclass",
        "Name",
        "Sex",
        "Age",
        "SibSp",
        "Parch",
        "Ticket",
        "Fare",
        "Cabin",
        "Embarked",
    ]
    dtypes = [int, int, int, str, str, float, int, int, str, float, float, str]
    generate_dataset("train.csv", columns, dtypes)
    generate_dataset("test.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle14.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle14")
    assert ipynb.returncode == 0


def test_kaggle17(generate_dataset):
    columns = [
        "Suburb",
        "Address",
        "Rooms",
        "Type",
        "Price",
        "Method",
        "SellerG",
        "Date",
        "Distance",
        "Postcode",
        "Bedroom2",
        "Bathroom",
        "Car",
        "Landsize",
        "BuildingArea",
        "YearBuilt",
        "CouncilArea",
        "Lattitude",
        "Longtitude",
        "Regionname",
        "Propertycount",
    ]
    dtypes = [
        str,
        str,
        int,
        str,
        float,
        str,
        str,
        str,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        str,
        float,
        float,
        str,
        float,
    ]
    generate_dataset("melb_data.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle17.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle17")
    assert ipynb.returncode == 0


def test_kaggle18(generate_dataset):
    columns = [
        "train_id",
        "name",
        "item_condition_id",
        "category_name",
        "brand_name",
        "price",
        "shipping",
        "item_description",
    ]
    # TODO (williamma12): "category_name" should be strings but original data
    # that is not currently captured by the data generation
    dtypes = [int, str, int, int, float, float, int, str]
    generate_dataset("test.csv", columns, dtypes)
    generate_dataset("train.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle18.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle18")
    assert ipynb.returncode == 0


def test_kaggle19(generate_dataset):
    columns = [
        "Id",
        "groupId",
        "matchId",
        "assists",
        "boosts",
        "damageDealt",
        "DBNOs",
        "headshotKills",
        "heals",
        "killPlace",
        "killPoints",
        "kills",
        "killStreaks",
        "longestKill",
        "matchDuration",
        "matchType",
        "maxPlace",
        "numGroups",
        "rankPoints",
        "revives",
        "rideDistance",
        "roadKills",
        "swimDistance",
        "teamKills",
        "vehicleDestroys",
        "walkDistance",
        "weaponsAcquired",
        "winPoints",
        "winPlacePerc",
    ]
    dtypes = [
        str,
        str,
        str,
        int,
        int,
        float,
        int,
        int,
        int,
        int,
        int,
        int,
        int,
        float,
        int,
        str,
        int,
        int,
        int,
        int,
        float,
        int,
        float,
        int,
        int,
        float,
        int,
        int,
        int,
    ]
    generate_dataset("train.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle19.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle19")
    assert ipynb.returncode == 0


def test_kaggle20(generate_dataset):
    columns = [
        "id",
        "diagnosis",
        "radius_mean",
        "texture_mean",
        "perimeter_mean",
        "area_mean",
        "smoothness_mean",
        "compactness_mean",
        "concavity_mean",
        "concave points_mean",
        "symmetry_mean",
        "fractal_dimension_mean",
        "radius_se",
        "texture_se",
        "perimeter_se",
        "area_se",
        "smoothness_se",
        "compactness_se",
        "concavity_se",
        "concave points_se",
        "symmetry_se",
        "fractal_dimension_se",
        "radius_worst",
        "texture_worst",
        "perimeter_worst",
        "area_worst",
        "smoothness_worst",
        "compactness_worst",
        "concavity_worst",
        "concave points_worst",
        "symmetry_worst",
        "fractal_dimension_worst",
        "Unnamed: 32",
    ]
    dtypes = [
        int,
        str,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
        float,
    ]
    generate_dataset("data.csv", columns, dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle20.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle20")
    assert ipynb.returncode == 0


def test_kaggle22(generate_dataset):
    train_columns = [
        "id",
        "comment_text",
        "toxic",
        "severe_toxic",
        "obscene",
        "threat",
        "insult",
        "identity_hate",
    ]
    train_dtypes = [str, str, float, float, float, float, float, float]
    test_columns = ["id", "comment_text"]
    test_dtypes = [str, str]
    submission_columns = [
        "id",
        "toxic",
        "severe_toxic",
        "obscene",
        "threat",
        "insult",
        "identity_hate",
    ]
    submission_dtypes = [str, float, float, float, float, float, float]
    generate_dataset(
        "train.csv", train_columns, train_dtypes, files_to_remove=["submission.csv"]
    )
    generate_dataset("test.csv", test_columns, test_dtypes)
    generate_dataset("sample_submission.csv", submission_columns, submission_dtypes)

    ipynb = subprocess.Popen(
        ["python", "kaggle22.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=KAGGLE_DIR_PATH,
    )
    outs, errs = ipynb.communicate()

    if ipynb.returncode:
        logging.debug("Error message\n-------------\n %s", errs.decode("utf-8"))

    logging.info("Finished kaggle22")
    assert ipynb.returncode == 0


================================================
FILE: versioneer.py
================================================
# Version: 0.29

"""The Versioneer - like a rocketeer, but for versions.

The Versioneer
==============

* like a rocketeer, but for versions!
* https://github.com/python-versioneer/python-versioneer
* Brian Warner
* License: Public Domain (Unlicense)
* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3
* [![Latest Version][pypi-image]][pypi-url]
* [![Build Status][travis-image]][travis-url]

This is a tool for managing a recorded version number in setuptools-based
python projects. The goal is to remove the tedious and error-prone "update
the embedded version string" step from your release process. Making a new
release should be as easy as recording a new tag in your version-control
system, and maybe making new tarballs.


## Quick Install

Versioneer provides two installation modes. The "classic" vendored mode installs
a copy of versioneer into your repository. The experimental build-time dependency mode
is intended to allow you to skip this step and simplify the process of upgrading.

### Vendored mode

* `pip install versioneer` to somewhere in your $PATH
   * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is
     available, so you can also use `conda install -c conda-forge versioneer`
* add a `[tool.versioneer]` section to your `pyproject.toml` or a
  `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))
   * Note that you will need to add `tomli; python_version < "3.11"` to your
     build-time dependencies if you use `pyproject.toml`
* run `versioneer install --vendor` in your source tree, commit the results
* verify version information with `python setup.py version`

### Build-time dependency mode

* `pip install versioneer` to somewhere in your $PATH
   * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is
     available, so you can also use `conda install -c conda-forge versioneer`
* add a `[tool.versioneer]` section to your `pyproject.toml` or a
  `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))
* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`)
  to the `requires` key of the `build-system` table in `pyproject.toml`:
  ```toml
  [build-system]
  requires = ["setuptools", "versioneer[toml]"]
  build-backend = "setuptools.build_meta"
  ```
* run `versioneer install --no-vendor` in your source tree, commit the results
* verify version information with `python setup.py version`

## Version Identifiers

Source trees come from a variety of places:

* a version-control system checkout (mostly used by developers)
* a nightly tarball, produced by build automation
* a snapshot tarball, produced by a web-based VCS browser, like github's
  "tarball from tag" feature
* a release tarball, produced by "setup.py sdist", distributed through PyPI

Within each source tree, the version identifier (either a string or a number,
this tool is format-agnostic) can come from a variety of places:

* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
  about recent "tags" and an absolute revision-id
* the name of the directory into which the tarball was unpacked
* an expanded VCS keyword ($Id$, etc)
* a `_version.py` created by some earlier build step

For released software, the version identifier is closely related to a VCS
tag. Some projects use tag names that include more than just the version
string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
needs to strip the tag prefix to extract the version identifier. For
unreleased software (between tags), the version identifier should provide
enough information to help developers recreate the same tree, while also
giving them an idea of roughly how old the tree is (after version 1.2, before
version 1.3). Many VCS systems can report a description that captures this,
for example `git describe --tags --dirty --always` reports things like
"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
uncommitted changes).

The version identifier is used for multiple purposes:

* to allow the module to self-identify its version: `myproject.__version__`
* to choose a name and prefix for a 'setup.py sdist' tarball

## Theory of Operation

Versioneer works by adding a special `_version.py` file into your source
tree, where your `__init__.py` can import it. This `_version.py` knows how to
dynamically ask the VCS tool for version information at import time.

`_version.py` also contains `$Revision$` markers, and the installation
process marks `_version.py` to have this marker rewritten with a tag name
during the `git archive` command. As a result, generated tarballs will
contain enough information to get the proper version.

To allow `setup.py` to compute a version too, a `versioneer.py` is added to
the top level of your source tree, next to `setup.py` and the `setup.cfg`
that configures it. This overrides several distutils/setuptools commands to
compute the version when invoked, and changes `setup.py build` and `setup.py
sdist` to replace `_version.py` with a small static file that contains just
the generated version data.

## Installation

See [INSTALL.md](./INSTALL.md) for detailed installation instructions.

## Version-String Flavors

Code which uses Versioneer can learn about its version string at runtime by
importing `_version` from your main `__init__.py` file and running the
`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
import the top-level `versioneer.py` and run `get_versions()`.

Both functions return a dictionary with different flavors of version
information:

* `['version']`: A condensed version string, rendered using the selected
  style. This is the most commonly used value for the project's version
  string. The default "pep440" style yields strings like `0.11`,
  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
  below for alternative styles.

* `['full-revisionid']`: detailed revision identifier. For Git, this is the
  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".

* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
  commit date in ISO 8601 format. This will be None if the date is not
  available.

* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
  this is only accurate if run in a VCS checkout, otherwise it is likely to
  be False or None

* `['error']`: if the version string could not be computed, this will be set
  to a string describing the problem, otherwise it will be None. It may be
  useful to throw an exception in setup.py if this is set, to avoid e.g.
  creating tarballs with a version string of "unknown".

Some variants are more useful than others. Including `full-revisionid` in a
bug report should allow developers to reconstruct the exact code being tested
(or indicate the presence of local changes that should be shared with the
developers). `version` is suitable for display in an "about" box or a CLI
`--version` output: it can be easily compared against release notes and lists
of bugs fixed in various releases.

The installer adds the following text to your `__init__.py` to place a basic
version in `YOURPROJECT.__version__`:

    from ._version import get_versions
    __version__ = get_versions()['version']
    del get_versions

## Styles

The setup.cfg `style=` configuration controls how the VCS information is
rendered into a version string.

The default style, "pep440", produces a PEP440-compliant string, equal to the
un-prefixed tag name for actual releases, and containing an additional "local
version" section with more detail for in-between builds. For Git, this is
TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
that this commit is two revisions ("+2") beyond the "0.11" tag. For released
software (exactly equal to a known tag), the identifier will only contain the
stripped tag, e.g. "0.11".

Other styles are available. See [details.md](details.md) in the Versioneer
source tree for descriptions.

## Debugging

Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
to return a version of "0+unknown". To investigate the problem, run `setup.py
version`, which will run the version-lookup code in a verbose mode, and will
display the full contents of `get_versions()` (including the `error` string,
which may help identify what went wrong).

## Known Limitations

Some situations are known to cause problems for Versioneer. This details the
most significant ones. More can be found on Github
[issues page](https://github.com/python-versioneer/python-versioneer/issues).

### Subprojects

Versioneer has limited support for source trees in which `setup.py` is not in
the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
two common reasons why `setup.py` might not be in the root:

* Source trees which contain multiple subprojects, such as
  [Buildbot](https://github.com/buildbot/buildbot), which contains both
  "master" and "slave" subprojects, each with their own `setup.py`,
  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
  distributions (and upload multiple independently-installable tarballs).
* Source trees whose main purpose is to contain a C library, but which also
  provide bindings to Python (and perhaps other languages) in subdirectories.

Versioneer will look for `.git` in parent directories, and most operations
should get the right version string. However `pip` and `setuptools` have bugs
and implementation details which frequently cause `pip install .` from a
subproject directory to fail to find a correct version string (so it usually
defaults to `0+unknown`).

`pip install --editable .` should work correctly. `setup.py install` might
work too.

Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
some later version.

[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
this issue. The discussion in
[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
issue from the Versioneer side in more detail.
[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
pip to let Versioneer work correctly.

Versioneer-0.16 and earlier only looked for a `.git` directory next to the
`setup.cfg`, so subprojects were completely unsupported with those releases.

### Editable installs with setuptools <= 18.5

`setup.py develop` and `pip install --editable .` allow you to install a
project into a virtualenv once, then continue editing the source code (and
test) without re-installing after every change.

"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
convenient way to specify executable scripts that should be installed along
with the python package.

These both work as expected when using modern setuptools. When using
setuptools-18.5 or earlier, however, certain operations will cause
`pkg_resources.DistributionNotFound` errors when running the entrypoint
script, which must be resolved by re-installing the package. This happens
when the install happens with one version, then the egg_info data is
regenerated while a different version is checked out. Many setup.py commands
cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
a different virtualenv), so this can be surprising.

[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
this one, but upgrading to a newer version of setuptools should probably
resolve it.


## Updating Versioneer

To upgrade your project to a new release of Versioneer, do the following:

* install the new Versioneer (`pip install -U versioneer` or equivalent)
* edit `setup.cfg` and `pyproject.toml`, if necessary,
  to include any new configuration settings indicated by the release notes.
  See [UPGRADING](./UPGRADING.md) for details.
* re-run `versioneer install --[no-]vendor` in your source tree, to replace
  `SRC/_version.py`
* commit any changed files

## Future Directions

This tool is designed to make it easily extended to other version-control
systems: all VCS-specific components are in separate directories like
src/git/ . The top-level `versioneer.py` script is assembled from these
components by running make-versioneer.py . In the future, make-versioneer.py
will take a VCS name as an argument, and will construct a version of
`versioneer.py` that is specific to the given VCS. It might also take the
configuration arguments that are currently provided manually during
installation by editing setup.py . Alternatively, it might go the other
direction and include code from all supported VCS systems, reducing the
number of intermediate scripts.

## Similar projects

* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
  dependency
* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
  versioneer
* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
  plugin

## License

To make Versioneer easier to embed, all its code is dedicated to the public
domain. The `_version.py` that it creates is also in the public domain.
Specifically, both are released under the "Unlicense", as described in
https://unlicense.org/.

[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
[pypi-url]: https://pypi.python.org/pypi/versioneer/
[travis-image]:
https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer

"""
# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
# pylint:disable=attribute-defined-outside-init,too-many-arguments

import configparser
import errno
import functools
import json
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import Any, Callable, Dict, List, NoReturn, Optional, Tuple, Union, cast

have_tomllib = True
if sys.version_info >= (3, 11):
    import tomllib
else:
    try:
        import tomli as tomllib
    except ImportError:
        have_tomllib = False


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""

    VCS: str
    style: str
    tag_prefix: str
    versionfile_source: str
    versionfile_build: Optional[str]
    parentdir_prefix: Optional[str]
    verbose: Optional[bool]


def get_root() -> str:
    """Get the project root directory.

    We require that all commands are run from the project root, i.e. the
    directory that contains setup.py, setup.cfg, and versioneer.py .
    """
    root = os.path.realpath(os.path.abspath(os.getcwd()))
    setup_py = os.path.join(root, "setup.py")
    pyproject_toml = os.path.join(root, "pyproject.toml")
    versioneer_py = os.path.join(root, "versioneer.py")
    if not (
        os.path.exists(setup_py)
        or os.path.exists(pyproject_toml)
        or os.path.exists(versioneer_py)
    ):
        # allow 'python path/to/setup.py COMMAND'
        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
        setup_py = os.path.join(root, "setup.py")
        pyproject_toml = os.path.join(root, "pyproject.toml")
        versioneer_py = os.path.join(root, "versioneer.py")
    if not (
        os.path.exists(setup_py)
        or os.path.exists(pyproject_toml)
        or os.path.exists(versioneer_py)
    ):
        err = (
            "Versioneer was unable to run the project root directory. "
            "Versioneer requires setup.py to be executed from "
            "its immediate directory (like 'python setup.py COMMAND'), "
            "or in a way that lets it use sys.argv[0] to find the root "
            "(like 'python path/to/setup.py COMMAND')."
        )
        raise VersioneerBadRootError(err)
    try:
        # Certain runtime workflows (setup.py install/develop in a setuptools
        # tree) execute all dependencies in a single python process, so
        # "versioneer" may be imported multiple times, and python's shared
        # module-import table will cache the first one. So we can't use
        # os.path.dirname(__file__), as that will find whichever
        # versioneer.py was first imported, even in later projects.
        my_path = os.path.realpath(os.path.abspath(__file__))
        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
        if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals():
            print(
                "Warning: build in %s is using versioneer.py from %s"
                % (os.path.dirname(my_path), versioneer_py)
            )
    except NameError:
        pass
    return root


def get_config_from_root(root: str) -> VersioneerConfig:
    """Read the project setup.cfg file to determine Versioneer config."""
    # This might raise OSError (if setup.cfg is missing), or
    # configparser.NoSectionError (if it lacks a [versioneer] section), or
    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
    # the top of versioneer.py for instructions on writing your setup.cfg .
    root_pth = Path(root)
    pyproject_toml = root_pth / "pyproject.toml"
    setup_cfg = root_pth / "setup.cfg"
    section: Union[Dict[str, Any], configparser.SectionProxy, None] = None
    if pyproject_toml.exists() and have_tomllib:
        try:
            with open(pyproject_toml, "rb") as fobj:
                pp = tomllib.load(fobj)
            section = pp["tool"]["versioneer"]
        except (tomllib.TOMLDecodeError, KeyError) as e:
            print(f"Failed to load config from {pyproject_toml}: {e}")
            print("Try to load it from setup.cfg")
    if not section:
        parser = configparser.ConfigParser()
        with open(setup_cfg) as cfg_file:
            parser.read_file(cfg_file)
        parser.get("versioneer", "VCS")  # raise error if missing

        section = parser["versioneer"]

    # `cast`` really shouldn't be used, but its simplest for the
    # common VersioneerConfig users at the moment. We verify against
    # `None` values elsewhere where it matters

    cfg = VersioneerConfig()
    cfg.VCS = section["VCS"]
    cfg.style = section.get("style", "")
    cfg.versionfile_source = cast(str, section.get("versionfile_source"))
    cfg.versionfile_build = section.get("versionfile_build")
    cfg.tag_prefix = cast(str, section.get("tag_prefix"))
    if cfg.tag_prefix in ("''", '""', None):
        cfg.tag_prefix = ""
    cfg.parentdir_prefix = section.get("parentdir_prefix")
    if isinstance(section, configparser.SectionProxy):
        # Make sure configparser translates to bool
        cfg.verbose = section.getboolean("verbose")
    else:
        cfg.verbose = section.get("verbose")

    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


# these dictionaries contain VCS-specific tools
LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}


def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
    """Create decorator to mark a method as the handler of a VCS."""

    def decorate(f: Callable) -> Callable:
        """Store f in HANDLERS[vcs][method]."""
        HANDLERS.setdefault(vcs, {})[method] = f
        return f

    return decorate


def run_command(
    commands: List[str],
    args: List[str],
    cwd: Optional[str] = None,
    verbose: bool = False,
    hide_stderr: bool = False,
    env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
    """Call the given command(s)."""
    assert isinstance(commands, list)
    process = None

    popen_kwargs: Dict[str, Any] = {}
    if sys.platform == "win32":
        # This hides the console window if pythonw.exe is used
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        popen_kwargs["startupinfo"] = startupinfo

    for command in commands:
        try:
            dispcmd = str([command] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            process = subprocess.Popen(
                [command] + args,
                cwd=cwd,
                env=env,
                stdout=subprocess.PIPE,
                stderr=(subprocess.PIPE if hide_stderr else None),
                **popen_kwargs,
            )
            break
        except OSError as e:
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %s" % dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %s" % (commands,))
        return None, None
    stdout = process.communicate()[0].strip().decode()
    if process.returncode != 0:
        if verbose:
            print("unable to run %s (error)" % dispcmd)
            print("stdout was %s" % stdout)
        return None, process.returncode
    return stdout, process.returncode


LONG_VERSION_PY[
    "git"
] = r'''
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.

# This file is released into the public domain.
# Generated by versioneer-0.29
# https://github.com/python-versioneer/python-versioneer

"""Git implementation of _version.py."""

import errno
import os
import re
import subprocess
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple
import functools


def get_keywords() -> Dict[str, str]:
    """Get the keywords needed to look up the version information."""
    # these strings will be replaced by git during git-archive.
    # setup.py/versioneer.py will grep for the variable names, so they must
    # each be defined on a line of their own. _version.py will just call
    # get_keywords().
    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
    return keywords


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""

    VCS: str
    style: str
    tag_prefix: str
    parentdir_prefix: str
    versionfile_source: str
    verbose: bool


def get_config() -> VersioneerConfig:
    """Create, populate and return the VersioneerConfig() object."""
    # these strings are filled in when 'setup.py versioneer' creates
    # _version.py
    cfg = VersioneerConfig()
    cfg.VCS = "git"
    cfg.style = "%(STYLE)s"
    cfg.tag_prefix = "%(TAG_PREFIX)s"
    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
    cfg.verbose = False
    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}


def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
    """Create decorator to mark a method as the handler of a VCS."""
    def decorate(f: Callable) -> Callable:
        """Store f in HANDLERS[vcs][method]."""
        if vcs not in HANDLERS:
            HANDLERS[vcs] = {}
        HANDLERS[vcs][method] = f
        return f
    return decorate


def run_command(
    commands: List[str],
    args: List[str],
    cwd: Optional[str] = None,
    verbose: bool = False,
    hide_stderr: bool = False,
    env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
    """Call the given command(s)."""
    assert isinstance(commands, list)
    process = None

    popen_kwargs: Dict[str, Any] = {}
    if sys.platform == "win32":
        # This hides the console window if pythonw.exe is used
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        popen_kwargs["startupinfo"] = startupinfo

    for command in commands:
        try:
            dispcmd = str([command] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
                                       stdout=subprocess.PIPE,
                                       stderr=(subprocess.PIPE if hide_stderr
                                               else None), **popen_kwargs)
            break
        except OSError as e:
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %%s" %% dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %%s" %% (commands,))
        return None, None
    stdout = process.communicate()[0].strip().decode()
    if process.returncode != 0:
        if verbose:
            print("unable to run %%s (error)" %% dispcmd)
            print("stdout was %%s" %% stdout)
        return None, process.returncode
    return stdout, process.returncode


def versions_from_parentdir(
    parentdir_prefix: str,
    root: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for _ in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {"version": dirname[len(parentdir_prefix):],
                    "full-revisionid": None,
                    "dirty": False, "error": None, "date": None}
        rootdirs.append(root)
        root = os.path.dirname(root)  # up a level

    if verbose:
        print("Tried directories %%s but none started with prefix %%s" %%
              (str(rootdirs), parentdir_prefix))
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords: Dict[str, str] = {}
    try:
        with open(versionfile_abs, "r") as fobj:
            for line in fobj:
                if line.strip().startswith("git_refnames ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["refnames"] = mo.group(1)
                if line.strip().startswith("git_full ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["full"] = mo.group(1)
                if line.strip().startswith("git_date ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["date"] = mo.group(1)
    except OSError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(
    keywords: Dict[str, str],
    tag_prefix: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Get version information from git keywords."""
    if "refnames" not in keywords:
        raise NotThisMethod("Short version file found")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = {r.strip() for r in refnames.strip("()").split(",")}
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %%d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = {r for r in refs if re.search(r'\d', r)}
        if verbose:
            print("discarding '%%s', no digits" %% ",".join(refs - tags))
    if verbose:
        print("likely tags: %%s" %% ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix):]
            # Filter out refs that exactly match prefix or that don't start
            # with a number once the prefix is stripped (mostly a concern
            # when prefix is '')
            if not re.match(r'\d', r):
                continue
            if verbose:
                print("picking %%s" %% r)
            return {"version": r,
                    "full-revisionid": keywords["full"].strip(),
                    "dirty": False, "error": None,
                    "date": date}
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {"version": "0+unknown",
            "full-revisionid": keywords["full"].strip(),
            "dirty": False, "error": "no suitable tags", "date": None}


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(
    tag_prefix: str,
    root: str,
    verbose: bool,
    runner: Callable = run_command
) -> Dict[str, Any]:
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    # GIT_DIR can interfere with correct operation of Versioneer.
    # It may be intended to be passed to the Versioneer-versioned project,
    # but that should not change where we get our version from.
    env = os.environ.copy()
    env.pop("GIT_DIR", None)
    runner = functools.partial(runner, env=env)

    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
                   hide_stderr=not verbose)
    if rc != 0:
        if verbose:
            print("Directory %%s not under git control" %% root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = runner(GITS, [
        "describe", "--tags", "--dirty", "--always", "--long",
        "--match", f"{tag_prefix}[[:digit:]]*"
    ], cwd=root)
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces: Dict[str, Any] = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
                             cwd=root)
    # --abbrev-ref was added in git-1.6.3
    if rc != 0 or branch_name is None:
        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
    branch_name = branch_name.strip()

    if branch_name == "HEAD":
        # If we aren't exactly on a branch, pick a branch which represents
        # the current commit. If all else fails, we are on a branchless
        # commit.
        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
        # --contains was added in git-1.5.4
        if rc != 0 or branches is None:
            raise NotThisMethod("'git branch --contains' returned error")
        branches = branches.split("\n")

        # Remove the first line if we're running detached
        if "(" in branches[0]:
            branches.pop(0)

        # Strip off the leading "* " from the list of branches.
        branches = [branch[2:] for branch in branches]
        if "master" in branches:
            branch_name = "master"
        elif not branches:
            branch_name = None
        else:
            # Pick the first branch that is returned. Good or bad.
            branch_name = branches[0]

    pieces["branch"] = branch_name

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[:git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
        if not mo:
            # unparsable. Maybe git-describe is misbehaving?
            pieces["error"] = ("unable to parse git-describe output: '%%s'"
                               %% describe_out)
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%%s' doesn't start with prefix '%%s'"
                print(fmt %% (full_tag, tag_prefix))
            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
                               %% (full_tag, tag_prefix))
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix):]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
        pieces["distance"] = len(out.split())  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def plus_or_dot(pieces: Dict[str, Any]) -> str:
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces: Dict[str, Any]) -> str:
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_branch(pieces: Dict[str, Any]) -> str:
    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .

    The ".dev0" means not master branch. Note that .dev0 sorts backwards
    (a feature branch will appear "older" than the master branch).

    Exceptions:
    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0"
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
    """Split pep440 version string at the post-release segment.

    Returns the release segments before the post-release and the
    post-release version number (or -1 if no post-release segment is present).
    """
    vc = str.split(ver, ".post")
    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None


def render_pep440_pre(pieces: Dict[str, Any]) -> str:
    """TAG[.postN.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        if pieces["distance"]:
            # update the post release segment
            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
            rendered = tag_version
            if post_version is not None:
                rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"])
            else:
                rendered += ".post0.dev%%d" %% (pieces["distance"])
        else:
            # no commits, use the tag as the version
            rendered = pieces["closest-tag"]
    else:
        # exception #1
        rendered = "0.post0.dev%%d" %% pieces["distance"]
    return rendered


def render_pep440_post(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%%d" %% pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%%s" %% pieces["short"]
    else:
        # exception #1
        rendered = "0.post%%d" %% pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%%s" %% pieces["short"]
    return rendered


def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .

    The ".dev0" means not master branch.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%%d" %% pieces["distance"]
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%%s" %% pieces["short"]
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0.post%%d" %% pieces["distance"]
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+g%%s" %% pieces["short"]
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_old(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%%d" %% pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%%d" %% pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces: Dict[str, Any]) -> str:
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces: Dict[str, Any]) -> str:
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {"version": "unknown",
                "full-revisionid": pieces.get("long"),
                "dirty": None,
                "error": pieces["error"],
                "date": None}

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-branch":
        rendered = render_pep440_branch(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-post-branch":
        rendered = render_pep440_post_branch(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%%s'" %% style)

    return {"version": rendered, "full-revisionid": pieces["long"],
            "dirty": pieces["dirty"], "error": None,
            "date": pieces.get("date")}


def get_versions() -> Dict[str, Any]:
    """Get version information or return default if unable to do so."""
    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
    # __file__, we can work backwards from there to the root. Some
    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
    # case we can only use expanded keywords.

    cfg = get_config()
    verbose = cfg.verbose

    try:
        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
                                          verbose)
    except NotThisMethod:
        pass

    try:
        root = os.path.realpath(__file__)
        # versionfile_source is the relative path from the top of the source
        # tree (where the .git directory might live) to this file. Invert
        # this to find the root from __file__.
        for _ in cfg.versionfile_source.split('/'):
            root = os.path.dirname(root)
    except NameError:
        return {"version": "0+unknown", "full-revisionid": None,
                "dirty": None,
                "error": "unable to find root of source tree",
                "date": None}

    try:
        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
        return render(pieces, cfg.style)
    except NotThisMethod:
        pass

    try:
        if cfg.parentdir_prefix:
            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
    except NotThisMethod:
        pass

    return {"version": "0+unknown", "full-revisionid": None,
            "dirty": None,
            "error": "unable to compute version", "date": None}
'''


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords: Dict[str, str] = {}
    try:
        with open(versionfile_abs, "r") as fobj:
            for line in fobj:
                if line.strip().startswith("git_refnames ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["refnames"] = mo.group(1)
                if line.strip().startswith("git_full ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["full"] = mo.group(1)
                if line.strip().startswith("git_date ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["date"] = mo.group(1)
    except OSError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(
    keywords: Dict[str, str],
    tag_prefix: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Get version information from git keywords."""
    if "refnames" not in keywords:
        raise NotThisMethod("Short version file found")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = {r.strip() for r in refnames.strip("()").split(",")}
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = {r for r in refs if re.search(r"\d", r)}
        if verbose:
            print("discarding '%s', no digits" % ",".join(refs - tags))
    if verbose:
        print("likely tags: %s" % ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix) :]
            # Filter out refs that exactly match prefix or that don't start
            # with a number once the prefix is stripped (mostly a concern
            # when prefix is '')
            if not re.match(r"\d", r):
                continue
            if verbose:
                print("picking %s" % r)
            return {
                "version": r,
                "full-revisionid": keywords["full"].strip(),
                "dirty": False,
                "error": None,
                "date": date,
            }
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {
        "version": "0+unknown",
        "full-revisionid": keywords["full"].strip(),
        "dirty": False,
        "error": "no suitable tags",
        "date": None,
    }


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(
    tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command
) -> Dict[str, Any]:
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    # GIT_DIR can interfere with correct operation of Versioneer.
    # It may be intended to be passed to the Versioneer-versioned project,
    # but that should not change where we get our version from.
    env = os.environ.copy()
    env.pop("GIT_DIR", None)
    runner = functools.partial(runner, env=env)

    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose)
    if rc != 0:
        if verbose:
            print("Directory %s not under git control" % root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = runner(
        GITS,
        [
            "describe",
            "--tags",
            "--dirty",
            "--always",
            "--long",
            "--match",
            f"{tag_prefix}[[:digit:]]*",
        ],
        cwd=root,
    )
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces: Dict[str, Any] = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
    # --abbrev-ref was added in git-1.6.3
    if rc != 0 or branch_name is None:
        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
    branch_name = branch_name.strip()

    if branch_name == "HEAD":
        # If we aren't exactly on a branch, pick a branch which represents
        # the current commit. If all else fails, we are on a branchless
        # commit.
        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
        # --contains was added in git-1.5.4
        if rc != 0 or branches is None:
            raise NotThisMethod("'git branch --contains' returned error")
        branches = branches.split("\n")

        # Remove the first line if we're running detached
        if "(" in branches[0]:
            branches.pop(0)

        # Strip off the leading "* " from the list of branches.
        branches = [branch[2:] for branch in branches]
        if "master" in branches:
            branch_name = "master"
        elif not branches:
            branch_name = None
        else:
            # Pick the first branch that is returned. Good or bad.
            branch_name = branches[0]

    pieces["branch"] = branch_name

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[: git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
        if not mo:
            # unparsable. Maybe git-describe is misbehaving?
            pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%s' doesn't start with prefix '%s'"
                print(fmt % (full_tag, tag_prefix))
            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
                full_tag,
                tag_prefix,
            )
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix) :]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
        pieces["distance"] = len(out.split())  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None:
    """Git-specific installation logic for Versioneer.

    For Git, this means creating/changing .gitattributes to mark _version.py
    for export-subst keyword substitution.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]
    files = [versionfile_source]
    if ipy:
        files.append(ipy)
    if "VERSIONEER_PEP518" not in globals():
        try:
            my_path = __file__
            if my_path.endswith((".pyc", ".pyo")):
                my_path = os.path.splitext(my_path)[0] + ".py"
            versioneer_file = os.path.relpath(my_path)
        except NameError:
            versioneer_file = "versioneer.py"
        files.append(versioneer_file)
    present = False
    try:
        with open(".gitattributes", "r") as fobj:
            for line in fobj:
                if line.strip().startswith(versionfile_source):
                    if "export-subst" in line.strip().split()[1:]:
                        present = True
                        break
    except OSError:
        pass
    if not present:
        with open(".gitattributes", "a+") as fobj:
            fobj.write(f"{versionfile_source} export-subst\n")
        files.append(".gitattributes")
    run_command(GITS, ["add", "--"] + files)


def versions_from_parentdir(
    parentdir_prefix: str,
    root: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for _ in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {
                "version": dirname[len(parentdir_prefix) :],
                "full-revisionid": None,
                "dirty": False,
                "error": None,
                "date": None,
            }
        rootdirs.append(root)
        root = os.path.dirname(root)  # up a level

    if verbose:
        print(
            "Tried directories %s but none started with prefix %s"
            % (str(rootdirs), parentdir_prefix)
        )
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


SHORT_VERSION_PY = """
# This file was generated by 'versioneer.py' (0.29) from
# revision-control system data, or from the parent directory name of an
# unpacked source archive. Distribution tarballs contain a pre-generated copy
# of this file.

import json

version_json = '''
%s
'''  # END VERSION_JSON


def get_versions():
    return json.loads(version_json)
"""


def versions_from_file(filename: str) -> Dict[str, Any]:
    """Try to determine the version from _version.py if present."""
    try:
        with open(filename) as f:
            contents = f.read()
    except OSError:
        raise NotThisMethod("unable to read _version.py")
    mo = re.search(
        r"version_json = '''\n(.*)'''  # END VERSION_JSON", contents, re.M | re.S
    )
    if not mo:
        mo = re.search(
            r"version_json = '''\r\n(.*)'''  # END VERSION_JSON", contents, re.M | re.S
        )
    if not mo:
        raise NotThisMethod("no version_json in _version.py")
    return json.loads(mo.group(1))


def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None:
    """Write the given version number to the given _version.py file."""
    contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": "))
    with open(filename, "w") as f:
        f.write(SHORT_VERSION_PY % contents)

    print("set %s to '%s'" % (filename, versions["version"]))


def plus_or_dot(pieces: Dict[str, Any]) -> str:
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces: Dict[str, Any]) -> str:
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_branch(pieces: Dict[str, Any]) -> str:
    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .

    The ".dev0" means not master branch. Note that .dev0 sorts backwards
    (a feature branch will appear "older" than the master branch).

    Exceptions:
    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0"
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
    """Split pep440 version string at the post-release segment.

    Returns the release segments before the post-release and the
    post-release version number (or -1 if no post-release segment is present).
    """
    vc = str.split(ver, ".post")
    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None


def render_pep440_pre(pieces: Dict[str, Any]) -> str:
    """TAG[.postN.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        if pieces["distance"]:
            # update the post release segment
            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
            rendered = tag_version
            if post_version is not None:
                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
            else:
                rendered += ".post0.dev%d" % (pieces["distance"])
        else:
            # no commits, use the tag as the version
            rendered = pieces["closest-tag"]
    else:
        # exception #1
        rendered = "0.post0.dev%d" % pieces["distance"]
    return rendered


def render_pep440_post(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
    return rendered


def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .

    The ".dev0" means not master branch.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_old(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces: Dict[str, Any]) -> str:
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces: Dict[str, Any]) -> str:
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {
            "version": "unknown",
            "full-revisionid": pieces.get("long"),
            "dirty": None,
            "error": pieces["error"],
            "date": None,
        }

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-branch":
        rendered = render_pep440_branch(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-post-branch":
        rendered = render_pep440_post_branch(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%s'" % style)

    return {
        "version": rendered,
        "full-revisionid": pieces["long"],
        "dirty": pieces["dirty"],
        "error": None,
        "date": pieces.get("date"),
    }


class VersioneerBadRootError(Exception):
    """The project root directory is unknown or missing key files."""


def get_versions(verbose: bool = False) -> Dict[str, Any]:
    """Get the project version from whatever source is available.

    Returns dict with two keys: 'version' and 'full'.
    """
    if "versioneer" in sys.modules:
        # see the discussion in cmdclass.py:get_cmdclass()
        del sys.modules["versioneer"]

    root = get_root()
    cfg = get_config_from_root(root)

    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
    handlers = HANDLERS.get(cfg.VCS)
    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
    verbose = verbose or bool(cfg.verbose)  # `bool()` used to avoid `None`
    assert (
        cfg.versionfile_source is not None
    ), "please set versioneer.versionfile_source"
    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"

    versionfile_abs = os.path.join(root, cfg.versionfile_source)

    # extract version from first of: _version.py, VCS command (e.g. 'git
    # describe'), parentdir. This is meant to work for developers using a
    # source checkout, for users of a tarball created by 'setup.py sdist',
    # and for users of a tarball/zipball created by 'git archive' or github's
    # download-from-tag feature or the equivalent in other VCSes.

    get_keywords_f = handlers.get("get_keywords")
    from_keywords_f = handlers.get("keywords")
    if get_keywords_f and from_keywords_f:
        try:
            keywords = get_keywords_f(versionfile_abs)
            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
            if verbose:
                print("got version from expanded keyword %s" % ver)
            return ver
        except NotThisMethod:
            pass

    try:
        ver = versions_from_file(versionfile_abs)
        if verbose:
            print("got version from file %s %s" % (versionfile_abs, ver))
        return ver
    except NotThisMethod:
        pass

    from_vcs_f = handlers.get("pieces_from_vcs")
    if from_vcs_f:
        try:
            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
            ver = render(pieces, cfg.style)
            if verbose:
                print("got version from VCS %s" % ver)
            return ver
        except NotThisMethod:
            pass

    try:
        if cfg.parentdir_prefix:
            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
            if verbose:
                print("got version from parentdir %s" % ver)
            return ver
    except NotThisMethod:
        pass

    if verbose:
        print("unable to compute version")

    return {
        "version": "0+unknown",
        "full-revisionid": None,
        "dirty": None,
        "error": "unable to compute version",
        "date": None,
    }


def get_version() -> str:
    """Get the short version string for this project."""
    return get_versions()["version"]


def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None):
    """Get the custom setuptools subclasses used by Versioneer.

    If the package uses a different cmdclass (e.g. one from numpy), it
    should be provide as an argument.
    """
    if "versioneer" in sys.modules:
        del sys.modules["versioneer"]
        # this fixes the "python setup.py develop" case (also 'install' and
        # 'easy_install .'), in which subdependencies of the main project are
        # built (using setup.py bdist_egg) in the same python process. Assume
        # a main project A and a dependency B, which use different versions
        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
        # sys.modules by the time B's setup.py is executed, causing B to run
        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
        # sandbox that restores sys.modules to it's pre-build state, so the
        # parent is protected against the child's "import versioneer". By
        # removing ourselves from sys.modules here, before the child build
        # happens, we protect the child from the parent's versioneer too.
        # Also see https://github.com/python-versioneer/python-versioneer/issues/52

    cmds = {} if cmdclass is None else cmdclass.copy()

    # we add "version" to setuptools
    from setuptools import Command

    class cmd_version(Command):
        description = "report generated version string"
        user_options: List[Tuple[str, str, str]] = []
        boolean_options: List[str] = []

        def initialize_options(self) -> None:
            pass

        def finalize_options(self) -> None:
            pass

        def run(self) -> None:
            vers = get_versions(verbose=True)
            print("Version: %s" % vers["version"])
            print(" full-revisionid: %s" % vers.get("full-revisionid"))
            print(" dirty: %s" % vers.get("dirty"))
            print(" date: %s" % vers.get("date"))
            if vers["error"]:
                print(" error: %s" % vers["error"])

    cmds["version"] = cmd_version

    # we override "build_py" in setuptools
    #
    # most invocation pathways end up running build_py:
    #  distutils/build -> build_py
    #  distutils/install -> distutils/build ->..
    #  setuptools/bdist_wheel -> distutils/install ->..
    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
    #  setuptools/install -> bdist_egg ->..
    #  setuptools/develop -> ?
    #  pip install:
    #   copies source tree to a tempdir before running egg_info/etc
    #   if .git isn't copied too, 'git describe' will fail
    #   then does setup.py bdist_wheel, or sometimes setup.py install
    #  setup.py egg_info -> ?

    # pip install -e . and setuptool/editable_wheel will invoke build_py
    # but the build_py command is not expected to copy any files.

    # we override different "build_py" commands for both environments
    if "build_py" in cmds:
        _build_py: Any = cmds["build_py"]
    else:
        from setuptools.command.build_py import build_py as _build_py

    class cmd_build_py(_build_py):
        def run(self) -> None:
            root = get_root()
            cfg = get_config_from_root(root)
            versions = get_versions()
            _build_py.run(self)
            if getattr(self, "editable_mode", False):
                # During editable installs `.py` and data files are
                # not copied to build_lib
                return
            # now locate _version.py in the new build/ directory and replace
            # it with an updated value
            if cfg.versionfile_build:
                target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)

    cmds["build_py"] = cmd_build_py

    if "build_ext" in cmds:
        _build_ext: Any = cmds["build_ext"]
    else:
        from setuptools.command.build_ext import build_ext as _build_ext

    class cmd_build_ext(_build_ext):
        def run(self) -> None:
            root = get_root()
            cfg = get_config_from_root(root)
            versions = get_versions()
            _build_ext.run(self)
            if self.inplace:
                # build_ext --inplace will only build extensions in
                # build/lib<..> dir with no _version.py to write to.
                # As in place builds will already have a _version.py
                # in the module dir, we do not need to write one.
                return
            # now locate _version.py in the new build/ directory and replace
            # it with an updated value
            if not cfg.versionfile_build:
                return
            target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)
            if not os.path.exists(target_versionfile):
                print(
                    f"Warning: {target_versionfile} does not exist, skipping "
                    "version update. This can happen if you are running build_ext "
                    "without first running build_py."
                )
                return
            print("UPDATING %s" % target_versionfile)
            write_to_version_file(target_versionfile, versions)

    cmds["build_ext"] = cmd_build_ext

    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
        from cx_Freeze.dist import build_exe as _build_exe  # type: ignore

        # nczeczulin reports that py2exe won't like the pep440-style string
        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
        # setup(console=[{
        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
        #   "product_version": versioneer.get_version(),
        #   ...

        class cmd_build_exe(_build_exe):
            def run(self) -> None:
                root = get_root()
                cfg = get_config_from_root(root)
                versions = get_versions()
                target_versionfile = cfg.versionfile_source
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)

                _build_exe.run(self)
                os.unlink(target_versionfile)
                with open(cfg.versionfile_source, "w") as f:
                    LONG = LONG_VERSION_PY[cfg.VCS]
                    f.write(
                        LONG
                        % {
                            "DOLLAR": "$",
                            "STYLE": cfg.style,
                            "TAG_PREFIX": cfg.tag_prefix,
                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
                        }
                    )

        cmds["build_exe"] = cmd_build_exe
        del cmds["build_py"]

    if "py2exe" in sys.modules:  # py2exe enabled?
        try:
            from py2exe.setuptools_buildexe import py2exe as _py2exe  # type: ignore
        except ImportError:
            from py2exe.distutils_buildexe import py2exe as _py2exe  # type: ignore

        class cmd_py2exe(_py2exe):
            def run(self) -> None:
                root = get_root()
                cfg = get_config_from_root(root)
                versions = get_versions()
                target_versionfile = cfg.versionfile_source
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)

                _py2exe.run(self)
                os.unlink(target_versionfile)
                with open(cfg.versionfile_source, "w") as f:
                    LONG = LONG_VERSION_PY[cfg.VCS]
                    f.write(
                        LONG
                        % {
                            "DOLLAR": "$",
                            "STYLE": cfg.style,
                            "TAG_PREFIX": cfg.tag_prefix,
                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
                        }
                    )

        cmds["py2exe"] = cmd_py2exe

    # sdist farms its file list building out to egg_info
    if "egg_info" in cmds:
        _egg_info: Any = cmds["egg_info"]
    else:
        from setuptools.command.egg_info import egg_info as _egg_info

    class cmd_egg_info(_egg_info):
        def find_sources(self) -> None:
            # egg_info.find_sources builds the manifest list and writes it
            # in one shot
            super().find_sources()

            # Modify the filelist and normalize it
            root = get_root()
            cfg = get_config_from_root(root)
            self.filelist.append("versioneer.py")
            if cfg.versionfile_source:
                # There are rare cases where versionfile_source might not be
                # included by default, so we must be explicit
                self.filelist.append(cfg.versionfile_source)
            self.filelist.sort()
            self.filelist.remove_duplicates()

            # The write method is hidden in the manifest_maker instance that
            # generated the filelist and was thrown away
            # We will instead replicate their final normalization (to unicode,
            # and POSIX-style paths)
            from setuptools import unicode_utils

            normalized = [
                unicode_utils.filesys_decode(f).replace(os.sep, "/")
                for f in self.filelist.files
            ]

            manifest_filename = os.path.join(self.egg_info, "SOURCES.txt")
            with open(manifest_filename, "w") as fobj:
                fobj.write("\n".join(normalized))

    cmds["egg_info"] = cmd_egg_info

    # we override different "sdist" commands for both environments
    if "sdist" in cmds:
        _sdist: Any = cmds["sdist"]
    else:
        from setuptools.command.sdist import sdist as _sdist

    class cmd_sdist(_sdist):
        def run(self) -> None:
            versions = get_versions()
            self._versioneer_generated_versions = versions
            # unless we update this, the command will keep using the old
            # version
            self.distribution.metadata.version = versions["version"]
            return _sdist.run(self)

        def make_release_tree(self, base_dir: str, files: List[str]) -> None:
            root = get_root()
            cfg = get_config_from_root(root)
            _sdist.make_release_tree(self, base_dir, files)
            # now locate _version.py in the new base_dir directory
            # (remembering that it may be a hardlink) and replace it with an
            # updated value
            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
            print("UPDATING %s" % target_versionfile)
            write_to_version_file(
                target_versionfile, self._versioneer_generated_versions
            )

    cmds["sdist"] = cmd_sdist

    return cmds


CONFIG_ERROR = """
setup.cfg is missing the necessary Versioneer configuration. You need
a section like:

 [versioneer]
 VCS = git
 style = pep440
 versionfile_source = src/myproject/_version.py
 versionfile_build = myproject/_version.py
 tag_prefix =
 parentdir_prefix = myproject-

You will also need to edit your setup.py to use the results:

 import versioneer
 setup(version=versioneer.get_version(),
       cmdclass=versioneer.get_cmdclass(), ...)

Please read the docstring in ./versioneer.py for configuration instructions,
edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
"""

SAMPLE_CONFIG = """
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.

[versioneer]
#VCS = git
#style = pep440
#versionfile_source =
#versionfile_build =
#tag_prefix =
#parentdir_prefix =

"""

OLD_SNIPPET = """
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
"""

INIT_PY_SNIPPET = """
from . import {0}
__version__ = {0}.get_versions()['version']
"""


def do_setup() -> int:
    """Do main VCS-independent setup function for installing Versioneer."""
    root = get_root()
    try:
        cfg = get_config_from_root(root)
    except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e:
        if isinstance(e, (OSError, configparser.NoSectionError)):
            print("Adding sample versioneer config to setup.cfg", file=sys.stderr)
            with open(os.path.join(root, "setup.cfg"), "a") as f:
                f.write(SAMPLE_CONFIG)
        print(CONFIG_ERROR, file=sys.stderr)
        return 1

    print(" creating %s" % cfg.versionfile_source)
    with open(cfg.versionfile_source, "w") as f:
        LONG = LONG_VERSION_PY[cfg.VCS]
        f.write(
            LONG
            % {
                "DOLLAR": "$",
                "STYLE": cfg.style,
                "TAG_PREFIX": cfg.tag_prefix,
                "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                "VERSIONFILE_SOURCE": cfg.versionfile_source,
            }
        )

    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
    maybe_ipy: Optional[str] = ipy
    if os.path.exists(ipy):
        try:
            with open(ipy, "r") as f:
                old = f.read()
        except OSError:
            old = ""
        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
        snippet = INIT_PY_SNIPPET.format(module)
        if OLD_SNIPPET in old:
            print(" replacing boilerplate in %s" % ipy)
            with open(ipy, "w") as f:
                f.write(old.replace(OLD_SNIPPET, snippet))
        elif snippet not in old:
            print(" appending to %s" % ipy)
            with open(ipy, "a") as f:
                f.write(snippet)
        else:
            print(" %s unmodified" % ipy)
    else:
        print(" %s doesn't exist, ok" % ipy)
        maybe_ipy = None

    # Make VCS-specific changes. For git, this means creating/changing
    # .gitattributes to mark _version.py for export-subst keyword
    # substitution.
    do_vcs_install(cfg.versionfile_source, maybe_ipy)
    return 0


def scan_setup_py() -> int:
    """Validate the contents of setup.py against Versioneer's expectations."""
    found = set()
    setters = False
    errors = 0
    with open("setup.py", "r") as f:
        for line in f.readlines():
            if "import versioneer" in line:
                found.add("import")
            if "versioneer.get_cmdclass()" in line:
                found.add("cmdclass")
            if "versioneer.get_version()" in line:
                found.add("get_version")
            if "versioneer.VCS" in line:
                setters = True
            if "versioneer.versionfile_source" in line:
                setters = True
    if len(found) != 3:
        print("")
        print("Your setup.py appears to be missing some important items")
        print("(but I might be wrong). Please make sure it has something")
        print("roughly like the following:")
        print("")
        print(" import versioneer")
        print(" setup( version=versioneer.get_version(),")
        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
        print("")
        errors += 1
    if setters:
        print("You should remove lines like 'versioneer.VCS = ' and")
        print("'versioneer.versionfile_source = ' . This configuration")
        print("now lives in setup.cfg, and should be removed from setup.py")
        print("")
        errors += 1
    return errors


def setup_command() -> NoReturn:
    """Set up Versioneer and exit with appropriate error code."""
    errors = do_setup()
    errors += scan_setup_py()
    sys.exit(1 if errors else 0)


if __name__ == "__main__":
    cmd = sys.argv[1]
    if cmd == "setup":
        setup_command()